blob: 724ce956a7ffa95010218717db15342f0d0d4dd5 [file] [log] [blame]
[email protected]51bcc5d2013-04-24 01:41:371// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
[email protected]e7bba5f82013-04-10 20:10:524
[email protected]318076b2013-04-18 21:19:455#ifndef URL_URL_UTIL_H_
6#define URL_URL_UTIL_H_
[email protected]e7bba5f82013-04-10 20:10:527
8#include <string>
9
[email protected]516f0182013-06-11 22:51:5610#include "base/strings/string16.h"
pkalinnikov054f4032016-08-31 10:54:1711#include "base/strings/string_piece.h"
tfarina018de6e2015-05-26 17:41:2012#include "url/third_party/mozilla/url_parse.h"
[email protected]318076b2013-04-18 21:19:4513#include "url/url_canon.h"
[email protected]cca6f392014-05-28 21:32:2614#include "url/url_constants.h"
[email protected]760ea502013-05-31 03:39:5115#include "url/url_export.h"
[email protected]e7bba5f82013-04-10 20:10:5216
[email protected]0318f922014-04-22 00:09:2317namespace url {
[email protected]e7bba5f82013-04-10 20:10:5218
19// Init ------------------------------------------------------------------------
20
21// Initialization is NOT required, it will be implicitly initialized when first
22// used. However, this implicit initialization is NOT threadsafe. If you are
23// using this library in a threaded environment and don't have a consistent
lizeb5120f6dc2016-02-19 09:29:4424// "first call" (an example might be calling Add*Scheme with your special
palmer29ae5482015-05-19 08:43:3725// application-specific schemes) then you will want to call initialize before
26// spawning any threads.
[email protected]e7bba5f82013-04-10 20:10:5227//
palmer29ae5482015-05-19 08:43:3728// It is OK to call this function more than once, subsequent calls will be
29// no-ops, unless Shutdown was called in the mean time. This will also be a
30// no-op if other calls to the library have forced an initialization beforehand.
[email protected]760ea502013-05-31 03:39:5131URL_EXPORT void Initialize();
[email protected]e7bba5f82013-04-10 20:10:5232
33// Cleanup is not required, except some strings may leak. For most user
34// applications, this is fine. If you're using it in a library that may get
35// loaded and unloaded, you'll want to unload to properly clean up your
36// library.
[email protected]760ea502013-05-31 03:39:5137URL_EXPORT void Shutdown();
[email protected]e7bba5f82013-04-10 20:10:5238
pkalinnikov054f4032016-08-31 10:54:1739// Schemes ---------------------------------------------------------------------
[email protected]e7bba5f82013-04-10 20:10:5240
tyoshino11a7c9fe2015-08-19 08:51:4641// Types of a scheme representing the requirements on the data represented by
42// the authority component of a URL with the scheme.
Nico Weber204f0a72015-08-19 15:56:2343enum SchemeType {
tyoshino11a7c9fe2015-08-19 08:51:4644 // The authority component of a URL with the scheme, if any, has the port
45 // (the default values may be omitted in a serialization).
46 SCHEME_WITH_PORT,
47 // The authority component of a URL with the scheme, if any, doesn't have a
48 // port.
49 SCHEME_WITHOUT_PORT,
50 // A URL with the scheme doesn't have the authority component.
51 SCHEME_WITHOUT_AUTHORITY,
52};
53
54// A pair for representing a standard scheme name and the SchemeType for it.
55struct URL_EXPORT SchemeWithType {
56 const char* scheme;
57 SchemeType type;
58};
59
palmer29ae5482015-05-19 08:43:3760// Adds an application-defined scheme to the internal list of "standard-format"
61// URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
62// URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
63//
64// This function is not threadsafe and can not be called concurrently with any
lizeb5120f6dc2016-02-19 09:29:4465// other url_util function. It will assert if the lists of schemes have
66// been locked (see LockSchemeRegistries).
tyoshino11a7c9fe2015-08-19 08:51:4667URL_EXPORT void AddStandardScheme(const char* new_scheme,
68 SchemeType scheme_type);
[email protected]e7bba5f82013-04-10 20:10:5269
lizeb5120f6dc2016-02-19 09:29:4470// Adds an application-defined scheme to the internal list of schemes allowed
71// for referrers.
72//
73// This function is not threadsafe and can not be called concurrently with any
74// other url_util function. It will assert if the lists of schemes have
75// been locked (see LockSchemeRegistries).
76URL_EXPORT void AddReferrerScheme(const char* new_scheme,
77 SchemeType scheme_type);
78
79// Sets a flag to prevent future calls to Add*Scheme from succeeding.
[email protected]e7bba5f82013-04-10 20:10:5280//
81// This is designed to help prevent errors for multithreaded applications.
lizeb5120f6dc2016-02-19 09:29:4482// Normal usage would be to call Add*Scheme for your custom schemes at
83// the beginning of program initialization, and then LockSchemeRegistries. This
84// prevents future callers from mistakenly calling Add*Scheme when the
[email protected]e7bba5f82013-04-10 20:10:5285// program is running with multiple threads, where such usage would be
86// dangerous.
87//
lizeb5120f6dc2016-02-19 09:29:4488// We could have had Add*Scheme use a lock instead, but that would add
[email protected]e7bba5f82013-04-10 20:10:5289// some platform-specific dependencies we don't otherwise have now, and is
90// overkill considering the normal usage is so simple.
lizeb5120f6dc2016-02-19 09:29:4491URL_EXPORT void LockSchemeRegistries();
[email protected]e7bba5f82013-04-10 20:10:5292
93// Locates the scheme in the given string and places it into |found_scheme|,
94// which may be NULL to indicate the caller does not care about the range.
95//
96// Returns whether the given |compare| scheme matches the scheme found in the
97// input (if any). The |compare| scheme must be a valid canonical scheme or
98// the result of the comparison is undefined.
[email protected]760ea502013-05-31 03:39:5199URL_EXPORT bool FindAndCompareScheme(const char* str,
100 int str_len,
101 const char* compare,
[email protected]0318f922014-04-22 00:09:23102 Component* found_scheme);
[email protected]3774f832013-06-11 21:21:57103URL_EXPORT bool FindAndCompareScheme(const base::char16* str,
[email protected]760ea502013-05-31 03:39:51104 int str_len,
105 const char* compare,
[email protected]0318f922014-04-22 00:09:23106 Component* found_scheme);
[email protected]e7bba5f82013-04-10 20:10:52107inline bool FindAndCompareScheme(const std::string& str,
108 const char* compare,
[email protected]0318f922014-04-22 00:09:23109 Component* found_scheme) {
[email protected]e7bba5f82013-04-10 20:10:52110 return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
111 compare, found_scheme);
112}
[email protected]3774f832013-06-11 21:21:57113inline bool FindAndCompareScheme(const base::string16& str,
[email protected]e7bba5f82013-04-10 20:10:52114 const char* compare,
[email protected]0318f922014-04-22 00:09:23115 Component* found_scheme) {
[email protected]e7bba5f82013-04-10 20:10:52116 return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
117 compare, found_scheme);
118}
119
tyoshino11a7c9fe2015-08-19 08:51:46120// Returns true if the given scheme identified by |scheme| within |spec| is in
121// the list of known standard-format schemes (see AddStandardScheme).
[email protected]0318f922014-04-22 00:09:23122URL_EXPORT bool IsStandard(const char* spec, const Component& scheme);
123URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme);
[email protected]e7bba5f82013-04-10 20:10:52124
lizeb5120f6dc2016-02-19 09:29:44125// Returns true if the given scheme identified by |scheme| within |spec| is in
126// the list of allowed schemes for referrers (see AddReferrerScheme).
127URL_EXPORT bool IsReferrerScheme(const char* spec, const Component& scheme);
128
tyoshino11a7c9fe2015-08-19 08:51:46129// Returns true and sets |type| to the SchemeType of the given scheme
130// identified by |scheme| within |spec| if the scheme is in the list of known
131// standard-format schemes (see AddStandardScheme).
132URL_EXPORT bool GetStandardSchemeType(const char* spec,
133 const Component& scheme,
134 SchemeType* type);
135
pkalinnikov054f4032016-08-31 10:54:17136// Domains ---------------------------------------------------------------------
137
138// Returns true if the |canonicalized_host| matches or is in the same domain as
139// the given |lower_ascii_domain| string. For example, if the canonicalized
140// hostname is "www.google.com", this will return true for "com", "google.com",
141// and "www.google.com" domains.
142//
143// If either of the input StringPieces is empty, the return value is false. The
144// input domain should be a lower-case ASCII string in order to match the
145// canonicalized host.
146URL_EXPORT bool DomainIs(base::StringPiece canonicalized_host,
147 base::StringPiece lower_ascii_domain);
148
149// URL library wrappers --------------------------------------------------------
[email protected]e7bba5f82013-04-10 20:10:52150
151// Parses the given spec according to the extracted scheme type. Normal users
152// should use the URL object, although this may be useful if performance is
153// critical and you don't want to do the heap allocation for the std::string.
154//
[email protected]0318f922014-04-22 00:09:23155// As with the Canonicalize* functions, the charset converter can
[email protected]e7bba5f82013-04-10 20:10:52156// be NULL to use UTF-8 (it will be faster in this case).
157//
158// Returns true if a valid URL was produced, false if not. On failure, the
159// output and parsed structures will still be filled and will be consistent,
160// but they will not represent a loadable URL.
[email protected]760ea502013-05-31 03:39:51161URL_EXPORT bool Canonicalize(const char* spec,
162 int spec_len,
[email protected]369e84f72013-11-23 01:53:52163 bool trim_path_end,
[email protected]0318f922014-04-22 00:09:23164 CharsetConverter* charset_converter,
165 CanonOutput* output,
166 Parsed* output_parsed);
[email protected]3774f832013-06-11 21:21:57167URL_EXPORT bool Canonicalize(const base::char16* spec,
[email protected]760ea502013-05-31 03:39:51168 int spec_len,
[email protected]369e84f72013-11-23 01:53:52169 bool trim_path_end,
[email protected]0318f922014-04-22 00:09:23170 CharsetConverter* charset_converter,
171 CanonOutput* output,
172 Parsed* output_parsed);
[email protected]e7bba5f82013-04-10 20:10:52173
174// Resolves a potentially relative URL relative to the given parsed base URL.
175// The base MUST be valid. The resulting canonical URL and parsed information
176// will be placed in to the given out variables.
177//
178// The relative need not be relative. If we discover that it's absolute, this
179// will produce a canonical version of that URL. See Canonicalize() for more
180// about the charset_converter.
181//
182// Returns true if the output is valid, false if the input could not produce
183// a valid URL.
[email protected]760ea502013-05-31 03:39:51184URL_EXPORT bool ResolveRelative(const char* base_spec,
185 int base_spec_len,
[email protected]0318f922014-04-22 00:09:23186 const Parsed& base_parsed,
[email protected]760ea502013-05-31 03:39:51187 const char* relative,
188 int relative_length,
[email protected]0318f922014-04-22 00:09:23189 CharsetConverter* charset_converter,
190 CanonOutput* output,
191 Parsed* output_parsed);
[email protected]760ea502013-05-31 03:39:51192URL_EXPORT bool ResolveRelative(const char* base_spec,
193 int base_spec_len,
[email protected]0318f922014-04-22 00:09:23194 const Parsed& base_parsed,
[email protected]3774f832013-06-11 21:21:57195 const base::char16* relative,
[email protected]760ea502013-05-31 03:39:51196 int relative_length,
[email protected]0318f922014-04-22 00:09:23197 CharsetConverter* charset_converter,
198 CanonOutput* output,
199 Parsed* output_parsed);
[email protected]e7bba5f82013-04-10 20:10:52200
qyearsley2bc727d2015-08-14 20:17:15201// Replaces components in the given VALID input URL. The new canonical URL info
[email protected]e7bba5f82013-04-10 20:10:52202// is written to output and out_parsed.
203//
204// Returns true if the resulting URL is valid.
[email protected]0318f922014-04-22 00:09:23205URL_EXPORT bool ReplaceComponents(const char* spec,
206 int spec_len,
207 const Parsed& parsed,
208 const Replacements<char>& replacements,
209 CharsetConverter* charset_converter,
210 CanonOutput* output,
211 Parsed* out_parsed);
[email protected]760ea502013-05-31 03:39:51212URL_EXPORT bool ReplaceComponents(
[email protected]e7bba5f82013-04-10 20:10:52213 const char* spec,
214 int spec_len,
[email protected]0318f922014-04-22 00:09:23215 const Parsed& parsed,
216 const Replacements<base::char16>& replacements,
217 CharsetConverter* charset_converter,
218 CanonOutput* output,
219 Parsed* out_parsed);
[email protected]e7bba5f82013-04-10 20:10:52220
pkalinnikov054f4032016-08-31 10:54:17221// String helper functions -----------------------------------------------------
[email protected]e7bba5f82013-04-10 20:10:52222
[email protected]e7bba5f82013-04-10 20:10:52223// Unescapes the given string using URL escaping rules.
[email protected]0318f922014-04-22 00:09:23224URL_EXPORT void DecodeURLEscapeSequences(const char* input,
225 int length,
226 CanonOutputW* output);
[email protected]e7bba5f82013-04-10 20:10:52227
qyearsley2bc727d2015-08-14 20:17:15228// Escapes the given string as defined by the JS method encodeURIComponent. See
[email protected]e7bba5f82013-04-10 20:10:52229// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
[email protected]0318f922014-04-22 00:09:23230URL_EXPORT void EncodeURIComponent(const char* input,
231 int length,
232 CanonOutput* output);
[email protected]e7bba5f82013-04-10 20:10:52233
[email protected]0318f922014-04-22 00:09:23234} // namespace url
[email protected]e7bba5f82013-04-10 20:10:52235
[email protected]318076b2013-04-18 21:19:45236#endif // URL_URL_UTIL_H_