blob: 7b73696e0cac61e590873694801ee3b3ae40cc04 [file] [log] [blame]
[email protected]f1d81922010-07-31 17:47:091// Copyright (c) 2010 The Chromium Authors. All rights reserved.
[email protected]047a03f2009-10-07 02:10:202// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/utf_string_conversions.h"
6
[email protected]b9f93832009-11-13 19:27:487#include "base/string_piece.h"
[email protected]f1d81922010-07-31 17:47:098#include "base/string_util.h"
[email protected]b9f93832009-11-13 19:27:489#include "base/utf_string_conversion_utils.h"
[email protected]047a03f2009-10-07 02:10:2010
[email protected]b9f93832009-11-13 19:27:4811using base::PrepareForUTF8Output;
12using base::PrepareForUTF16Or32Output;
13using base::ReadUnicodeCharacter;
14using base::WriteUnicodeCharacter;
[email protected]047a03f2009-10-07 02:10:2015
16namespace {
17
[email protected]047a03f2009-10-07 02:10:2018// Generalized Unicode converter -----------------------------------------------
19
20// Converts the given source Unicode character type to the given destination
21// Unicode character type as a STL string. The given input buffer and size
22// determine the source, and the given output STL string will be replaced by
23// the result.
24template<typename SRC_CHAR, typename DEST_STRING>
[email protected]ce85f602009-11-07 01:34:5325bool ConvertUnicode(const SRC_CHAR* src,
26 size_t src_len,
[email protected]b9f93832009-11-13 19:27:4827 DEST_STRING* output) {
[email protected]047a03f2009-10-07 02:10:2028 // ICU requires 32-bit numbers.
29 bool success = true;
30 int32 src_len32 = static_cast<int32>(src_len);
31 for (int32 i = 0; i < src_len32; i++) {
32 uint32 code_point;
33 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
[email protected]b9f93832009-11-13 19:27:4834 WriteUnicodeCharacter(code_point, output);
[email protected]047a03f2009-10-07 02:10:2035 } else {
[email protected]d7a3e8e2010-01-01 22:16:3836 WriteUnicodeCharacter(0xFFFD, output);
[email protected]047a03f2009-10-07 02:10:2037 success = false;
38 }
39 }
[email protected]ce85f602009-11-07 01:34:5340
[email protected]047a03f2009-10-07 02:10:2041 return success;
42}
43
[email protected]047a03f2009-10-07 02:10:2044} // namespace
45
46// UTF-8 <-> Wide --------------------------------------------------------------
47
[email protected]2500a0f2009-11-10 01:43:1548bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
[email protected]ce85f602009-11-07 01:34:5349 PrepareForUTF8Output(src, src_len, output);
[email protected]b9f93832009-11-13 19:27:4850 return ConvertUnicode(src, src_len, output);
[email protected]ce85f602009-11-07 01:34:5351}
[email protected]047a03f2009-10-07 02:10:2052
[email protected]2500a0f2009-11-10 01:43:1553std::string WideToUTF8(const std::wstring& wide) {
[email protected]ce85f602009-11-07 01:34:5354 std::string ret;
[email protected]047a03f2009-10-07 02:10:2055 // Ignore the success flag of this call, it will do the best it can for
56 // invalid input, which is what we want here.
[email protected]2500a0f2009-11-10 01:43:1557 WideToUTF8(wide.data(), wide.length(), &ret);
[email protected]047a03f2009-10-07 02:10:2058 return ret;
59}
60
[email protected]b9f93832009-11-13 19:27:4861bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
[email protected]ce85f602009-11-07 01:34:5362 PrepareForUTF16Or32Output(src, src_len, output);
[email protected]b9f93832009-11-13 19:27:4863 return ConvertUnicode(src, src_len, output);
[email protected]047a03f2009-10-07 02:10:2064}
65
[email protected]b9f93832009-11-13 19:27:4866std::wstring UTF8ToWide(const base::StringPiece& utf8) {
[email protected]047a03f2009-10-07 02:10:2067 std::wstring ret;
[email protected]b9f93832009-11-13 19:27:4868 UTF8ToWide(utf8.data(), utf8.length(), &ret);
[email protected]047a03f2009-10-07 02:10:2069 return ret;
70}
71
[email protected]047a03f2009-10-07 02:10:2072// UTF-16 <-> Wide -------------------------------------------------------------
73
74#if defined(WCHAR_T_IS_UTF16)
75
76// When wide == UTF-16, then conversions are a NOP.
[email protected]2500a0f2009-11-10 01:43:1577bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
[email protected]ce85f602009-11-07 01:34:5378 output->assign(src, src_len);
[email protected]ce85f602009-11-07 01:34:5379 return true;
80}
81
[email protected]2500a0f2009-11-10 01:43:1582string16 WideToUTF16(const std::wstring& wide) {
[email protected]047a03f2009-10-07 02:10:2083 return wide;
84}
85
[email protected]b9f93832009-11-13 19:27:4886bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
[email protected]047a03f2009-10-07 02:10:2087 output->assign(src, src_len);
88 return true;
89}
90
[email protected]b9f93832009-11-13 19:27:4891std::wstring UTF16ToWide(const string16& utf16) {
[email protected]047a03f2009-10-07 02:10:2092 return utf16;
93}
94
[email protected]047a03f2009-10-07 02:10:2095#elif defined(WCHAR_T_IS_UTF32)
96
[email protected]2500a0f2009-11-10 01:43:1597bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
[email protected]ce85f602009-11-07 01:34:5398 output->clear();
99 // Assume that normally we won't have any non-BMP characters so the counts
100 // will be the same.
101 output->reserve(src_len);
[email protected]b9f93832009-11-13 19:27:48102 return ConvertUnicode(src, src_len, output);
[email protected]ce85f602009-11-07 01:34:53103}
104
[email protected]2500a0f2009-11-10 01:43:15105string16 WideToUTF16(const std::wstring& wide) {
[email protected]047a03f2009-10-07 02:10:20106 string16 ret;
[email protected]2500a0f2009-11-10 01:43:15107 WideToUTF16(wide.data(), wide.length(), &ret);
[email protected]047a03f2009-10-07 02:10:20108 return ret;
109}
110
[email protected]b9f93832009-11-13 19:27:48111bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
[email protected]ce85f602009-11-07 01:34:53112 output->clear();
[email protected]047a03f2009-10-07 02:10:20113 // Assume that normally we won't have any non-BMP characters so the counts
114 // will be the same.
115 output->reserve(src_len);
[email protected]b9f93832009-11-13 19:27:48116 return ConvertUnicode(src, src_len, output);
[email protected]047a03f2009-10-07 02:10:20117}
118
[email protected]b9f93832009-11-13 19:27:48119std::wstring UTF16ToWide(const string16& utf16) {
[email protected]047a03f2009-10-07 02:10:20120 std::wstring ret;
[email protected]b9f93832009-11-13 19:27:48121 UTF16ToWide(utf16.data(), utf16.length(), &ret);
[email protected]047a03f2009-10-07 02:10:20122 return ret;
123}
124
[email protected]047a03f2009-10-07 02:10:20125#endif // defined(WCHAR_T_IS_UTF32)
126
127// UTF16 <-> UTF8 --------------------------------------------------------------
128
129#if defined(WCHAR_T_IS_UTF32)
130
131bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
[email protected]ce85f602009-11-07 01:34:53132 PrepareForUTF16Or32Output(src, src_len, output);
[email protected]b9f93832009-11-13 19:27:48133 return ConvertUnicode(src, src_len, output);
[email protected]047a03f2009-10-07 02:10:20134}
135
[email protected]39a749c2011-01-28 02:40:46136string16 UTF8ToUTF16(const base::StringPiece& utf8) {
[email protected]047a03f2009-10-07 02:10:20137 string16 ret;
[email protected]047a03f2009-10-07 02:10:20138 // Ignore the success flag of this call, it will do the best it can for
139 // invalid input, which is what we want here.
140 UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
141 return ret;
142}
143
144bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
[email protected]ce85f602009-11-07 01:34:53145 PrepareForUTF8Output(src, src_len, output);
[email protected]b9f93832009-11-13 19:27:48146 return ConvertUnicode(src, src_len, output);
[email protected]047a03f2009-10-07 02:10:20147}
148
149std::string UTF16ToUTF8(const string16& utf16) {
150 std::string ret;
[email protected]047a03f2009-10-07 02:10:20151 // Ignore the success flag of this call, it will do the best it can for
152 // invalid input, which is what we want here.
153 UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
154 return ret;
155}
156
157#elif defined(WCHAR_T_IS_UTF16)
158// Easy case since we can use the "wide" versions we already wrote above.
159
160bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
161 return UTF8ToWide(src, src_len, output);
162}
163
[email protected]39a749c2011-01-28 02:40:46164string16 UTF8ToUTF16(const base::StringPiece& utf8) {
[email protected]047a03f2009-10-07 02:10:20165 return UTF8ToWide(utf8);
166}
167
168bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
169 return WideToUTF8(src, src_len, output);
170}
171
172std::string UTF16ToUTF8(const string16& utf16) {
173 return WideToUTF8(utf16);
174}
175
176#endif
[email protected]f1d81922010-07-31 17:47:09177
[email protected]39a749c2011-01-28 02:40:46178std::wstring ASCIIToWide(const base::StringPiece& ascii) {
[email protected]f1d81922010-07-31 17:47:09179 DCHECK(IsStringASCII(ascii)) << ascii;
180 return std::wstring(ascii.begin(), ascii.end());
181}
182
[email protected]39a749c2011-01-28 02:40:46183string16 ASCIIToUTF16(const base::StringPiece& ascii) {
[email protected]f1d81922010-07-31 17:47:09184 DCHECK(IsStringASCII(ascii)) << ascii;
185 return string16(ascii.begin(), ascii.end());
186}