Blame - url/url_util.h - chromium/src

[email protected]

51bcc5d

2013-04-24 01:41:37

[diff] [blame]

1

2

// Use of this source code is governed by a BSD-style license that can be

3

// found in the LICENSE file.

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

4

[email protected]

318076b

2013-04-18 21:19:45

[diff] [blame]

5

#ifndef URL_URL_UTIL_H_

6

#define URL_URL_UTIL_H_

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

#include <string>

2013-06-11 22:51:56

[diff] [blame]

10

#include "base/strings/string16.h"

pkalinnikov

054f403

2016-08-31 10:54:17

[diff] [blame]

11

#include "base/strings/string_piece.h"

tfarina

018de6e

2015-05-26 17:41:20

[diff] [blame]

12

#include "url/third_party/mozilla/url_parse.h"

[email protected]

318076b

2013-04-18 21:19:45

[diff] [blame]

13

#include "url/url_canon.h"

[email protected]

cca6f39

2014-05-28 21:32:26

[diff] [blame]

14

#include "url/url_constants.h"

[email protected]

760ea50

2013-05-31 03:39:51

[diff] [blame]

15

#include "url/url_export.h"

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

16

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

17

namespace url {

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

18

19

// Init ------------------------------------------------------------------------

20

21

// Initialization is NOT required, it will be implicitly initialized when first

22

// used. However, this implicit initialization is NOT threadsafe. If you are

23

// using this library in a threaded environment and don't have a consistent

lizeb

5120f6dc

2016-02-19 09:29:44

[diff] [blame]

24

// "first call" (an example might be calling Add*Scheme with your special

palmer

29ae548

2015-05-19 08:43:37

[diff] [blame]

25

// application-specific schemes) then you will want to call initialize before

26

// spawning any threads.

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

27

//

palmer

29ae548

2015-05-19 08:43:37

[diff] [blame]

28

// It is OK to call this function more than once, subsequent calls will be

29

// no-ops, unless Shutdown was called in the mean time. This will also be a

30

// no-op if other calls to the library have forced an initialization beforehand.

[email protected]

760ea50

2013-05-31 03:39:51

[diff] [blame]

31

URL_EXPORT void Initialize();

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

32

33

// Cleanup is not required, except some strings may leak. For most user

34

// applications, this is fine. If you're using it in a library that may get

35

// loaded and unloaded, you'll want to unload to properly clean up your

36

// library.

[email protected]

760ea50

2013-05-31 03:39:51

[diff] [blame]

37

URL_EXPORT void Shutdown();

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

38

pkalinnikov

054f403

2016-08-31 10:54:17

[diff] [blame]

39

// Schemes ---------------------------------------------------------------------

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

40

tyoshino

11a7c9fe

2015-08-19 08:51:46

[diff] [blame]

41

// Types of a scheme representing the requirements on the data represented by

42

// the authority component of a URL with the scheme.

Nico Weber

204f0a7

2015-08-19 15:56:23

[diff] [blame]

43

enum SchemeType {

tyoshino

11a7c9fe

2015-08-19 08:51:46

[diff] [blame]

44

// The authority component of a URL with the scheme, if any, has the port

45

// (the default values may be omitted in a serialization).

46

SCHEME_WITH_PORT,

47

// The authority component of a URL with the scheme, if any, doesn't have a

48

// port.

49

SCHEME_WITHOUT_PORT,

50

// A URL with the scheme doesn't have the authority component.

51

SCHEME_WITHOUT_AUTHORITY,

52

};

53

54

// A pair for representing a standard scheme name and the SchemeType for it.

55

struct URL_EXPORT SchemeWithType {

const char* scheme;

SchemeType type;

};

palmer

2015-05-19 08:43:37

[diff] [blame]

60

// Adds an application-defined scheme to the internal list of "standard-format"

61

// URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic

62

// URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).

63

//

64

// This function is not threadsafe and can not be called concurrently with any

lizeb

5120f6dc

2016-02-19 09:29:44

[diff] [blame]

65

// other url_util function. It will assert if the lists of schemes have

66

// been locked (see LockSchemeRegistries).

tyoshino

11a7c9fe

2015-08-19 08:51:46

[diff] [blame]

67

URL_EXPORT void AddStandardScheme(const char* new_scheme,

68

SchemeType scheme_type);

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

69

lizeb

5120f6dc

2016-02-19 09:29:44

[diff] [blame]

70

// Adds an application-defined scheme to the internal list of schemes allowed

71

// for referrers.

72

//

73

// This function is not threadsafe and can not be called concurrently with any

74

// other url_util function. It will assert if the lists of schemes have

75

// been locked (see LockSchemeRegistries).

76

URL_EXPORT void AddReferrerScheme(const char* new_scheme,

77

SchemeType scheme_type);

78

79

// Sets a flag to prevent future calls to Add*Scheme from succeeding.

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

80

//

81

// This is designed to help prevent errors for multithreaded applications.

lizeb

5120f6dc

2016-02-19 09:29:44

[diff] [blame]

82

// Normal usage would be to call Add*Scheme for your custom schemes at

83

// the beginning of program initialization, and then LockSchemeRegistries. This

84

// prevents future callers from mistakenly calling Add*Scheme when the

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

85

// program is running with multiple threads, where such usage would be

86

// dangerous.

87

//

lizeb

5120f6dc

2016-02-19 09:29:44

[diff] [blame]

88

// We could have had Add*Scheme use a lock instead, but that would add

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

89

// some platform-specific dependencies we don't otherwise have now, and is

90

// overkill considering the normal usage is so simple.

lizeb

5120f6dc

2016-02-19 09:29:44

[diff] [blame]

91

URL_EXPORT void LockSchemeRegistries();

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

92

93

// Locates the scheme in the given string and places it into |found_scheme|,

94

// which may be NULL to indicate the caller does not care about the range.

95

//

96

// Returns whether the given |compare| scheme matches the scheme found in the

97

// input (if any). The |compare| scheme must be a valid canonical scheme or

98

// the result of the comparison is undefined.

[email protected]

760ea50

2013-05-31 03:39:51

[diff] [blame]

99

URL_EXPORT bool FindAndCompareScheme(const char* str,

int str_len,

const char* compare,

2014-04-22 00:09:23

[diff] [blame]

102

Component* found_scheme);

[email protected]

3774f83

2013-06-11 21:21:57

[diff] [blame]

103

URL_EXPORT bool FindAndCompareScheme(const base::char16* str,

[email protected]

760ea50

2013-05-31 03:39:51

[diff] [blame]

int str_len,

const char* compare,

2014-04-22 00:09:23

[diff] [blame]

106

Component* found_scheme);

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

107

inline bool FindAndCompareScheme(const std::string& str,

108

const char* compare,

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

109

Component* found_scheme) {

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

110

return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),

111

compare, found_scheme);

112

}

[email protected]

3774f83

2013-06-11 21:21:57

[diff] [blame]

113

inline bool FindAndCompareScheme(const base::string16& str,

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

114

const char* compare,

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

115

Component* found_scheme) {

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

116

return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),

117

compare, found_scheme);

118

}

119

tyoshino

11a7c9fe

2015-08-19 08:51:46

[diff] [blame]

120

// Returns true if the given scheme identified by |scheme| within |spec| is in

121

// the list of known standard-format schemes (see AddStandardScheme).

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

122

URL_EXPORT bool IsStandard(const char* spec, const Component& scheme);

123

URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme);

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

124

lizeb

5120f6dc

2016-02-19 09:29:44

[diff] [blame]

125

// Returns true if the given scheme identified by |scheme| within |spec| is in

126

// the list of allowed schemes for referrers (see AddReferrerScheme).

127

URL_EXPORT bool IsReferrerScheme(const char* spec, const Component& scheme);

128

tyoshino

11a7c9fe

2015-08-19 08:51:46

[diff] [blame]

129

// Returns true and sets |type| to the SchemeType of the given scheme

130

// identified by |scheme| within |spec| if the scheme is in the list of known

131

// standard-format schemes (see AddStandardScheme).

132

URL_EXPORT bool GetStandardSchemeType(const char* spec,

133

const Component& scheme,

134

SchemeType* type);

135

pkalinnikov

054f403

2016-08-31 10:54:17

[diff] [blame]

136

// Domains ---------------------------------------------------------------------

137

138

// Returns true if the |canonicalized_host| matches or is in the same domain as

139

// the given |lower_ascii_domain| string. For example, if the canonicalized

140

// hostname is "www.google.com", this will return true for "com", "google.com",

141

// and "www.google.com" domains.

142

//

143

// If either of the input StringPieces is empty, the return value is false. The

144

// input domain should be a lower-case ASCII string in order to match the

145

// canonicalized host.

146

URL_EXPORT bool DomainIs(base::StringPiece canonicalized_host,

147

base::StringPiece lower_ascii_domain);

148

149

// URL library wrappers --------------------------------------------------------

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

150

151

// Parses the given spec according to the extracted scheme type. Normal users

152

// should use the URL object, although this may be useful if performance is

153

// critical and you don't want to do the heap allocation for the std::string.

154

//

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

155

// As with the Canonicalize* functions, the charset converter can

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

156

// be NULL to use UTF-8 (it will be faster in this case).

157

//

158

// Returns true if a valid URL was produced, false if not. On failure, the

159

// output and parsed structures will still be filled and will be consistent,

160

// but they will not represent a loadable URL.

[email protected]

760ea50

2013-05-31 03:39:51

[diff] [blame]

161

URL_EXPORT bool Canonicalize(const char* spec,

162

int spec_len,

[email protected]

369e84f7

2013-11-23 01:53:52

[diff] [blame]

163

bool trim_path_end,

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

164

CharsetConverter* charset_converter,

165

CanonOutput* output,

166

Parsed* output_parsed);

[email protected]

3774f83

2013-06-11 21:21:57

[diff] [blame]

167

URL_EXPORT bool Canonicalize(const base::char16* spec,

[email protected]

760ea50

2013-05-31 03:39:51

[diff] [blame]

168

int spec_len,

[email protected]

369e84f7

2013-11-23 01:53:52

[diff] [blame]

169

bool trim_path_end,

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

170

CharsetConverter* charset_converter,

171

CanonOutput* output,

172

Parsed* output_parsed);

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

173

174

// Resolves a potentially relative URL relative to the given parsed base URL.

175

// The base MUST be valid. The resulting canonical URL and parsed information

176

// will be placed in to the given out variables.

177

//

178

// The relative need not be relative. If we discover that it's absolute, this

179

// will produce a canonical version of that URL. See Canonicalize() for more

180

// about the charset_converter.

181

//

182

// Returns true if the output is valid, false if the input could not produce

183

// a valid URL.

[email protected]

760ea50

2013-05-31 03:39:51

[diff] [blame]

184

URL_EXPORT bool ResolveRelative(const char* base_spec,

185

int base_spec_len,

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

186

const Parsed& base_parsed,

[email protected]

760ea50

2013-05-31 03:39:51

[diff] [blame]

187

const char* relative,

188

int relative_length,

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

189

CharsetConverter* charset_converter,

190

CanonOutput* output,

191

Parsed* output_parsed);

[email protected]

760ea50

2013-05-31 03:39:51

[diff] [blame]

192

URL_EXPORT bool ResolveRelative(const char* base_spec,

193

int base_spec_len,

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

194

const Parsed& base_parsed,

[email protected]

3774f83

2013-06-11 21:21:57

[diff] [blame]

195

const base::char16* relative,

[email protected]

760ea50

2013-05-31 03:39:51

[diff] [blame]

196

int relative_length,

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

197

CharsetConverter* charset_converter,

198

CanonOutput* output,

199

Parsed* output_parsed);

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

200

qyearsley

2bc727d

2015-08-14 20:17:15

[diff] [blame]

201

// Replaces components in the given VALID input URL. The new canonical URL info

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

202

// is written to output and out_parsed.

203

//

204

// Returns true if the resulting URL is valid.

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

205

URL_EXPORT bool ReplaceComponents(const char* spec,

206

int spec_len,

207

const Parsed& parsed,

208

const Replacements<char>& replacements,

209

CharsetConverter* charset_converter,

CanonOutput* output,

Parsed* out_parsed);

2013-05-31 03:39:51

[diff] [blame]

212

URL_EXPORT bool ReplaceComponents(

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

const char* spec,

int spec_len,

2014-04-22 00:09:23

[diff] [blame]

215

const Parsed& parsed,

216

const Replacements<base::char16>& replacements,

217

CharsetConverter* charset_converter,

CanonOutput* output,

Parsed* out_parsed);

2013-04-10 20:10:52

[diff] [blame]

220

pkalinnikov

054f403

2016-08-31 10:54:17

[diff] [blame]

221

// String helper functions -----------------------------------------------------

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

222

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

223

// Unescapes the given string using URL escaping rules.

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

224

URL_EXPORT void DecodeURLEscapeSequences(const char* input,

225

int length,

226

CanonOutputW* output);

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

227

qyearsley

2bc727d

2015-08-14 20:17:15

[diff] [blame]

228

// Escapes the given string as defined by the JS method encodeURIComponent. See

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

229

// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

230

URL_EXPORT void EncodeURIComponent(const char* input,

231

int length,

232

CanonOutput* output);

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

233

[email protected]

0318f92

2014-04-22 00:09:23

[diff] [blame]

234

} // namespace url

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame]

235

[email protected]

318076b

2013-04-18 21:19:45

[diff] [blame]

236

#endif // URL_URL_UTIL_H_