[email protected] | f1d8192 | 2010-07-31 17:47:09 | [diff] [blame] | 1 | // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "base/utf_string_conversions.h" |
| 6 | |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 7 | #include "base/string_piece.h" |
[email protected] | f1d8192 | 2010-07-31 17:47:09 | [diff] [blame] | 8 | #include "base/string_util.h" |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 9 | #include "base/utf_string_conversion_utils.h" |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 10 | |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 11 | using base::PrepareForUTF8Output; |
| 12 | using base::PrepareForUTF16Or32Output; |
| 13 | using base::ReadUnicodeCharacter; |
| 14 | using base::WriteUnicodeCharacter; |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 15 | |
| 16 | namespace { |
| 17 | |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 18 | // Generalized Unicode converter ----------------------------------------------- |
| 19 | |
| 20 | // Converts the given source Unicode character type to the given destination |
| 21 | // Unicode character type as a STL string. The given input buffer and size |
| 22 | // determine the source, and the given output STL string will be replaced by |
| 23 | // the result. |
| 24 | template<typename SRC_CHAR, typename DEST_STRING> |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 25 | bool ConvertUnicode(const SRC_CHAR* src, |
| 26 | size_t src_len, |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 27 | DEST_STRING* output) { |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 28 | // ICU requires 32-bit numbers. |
| 29 | bool success = true; |
| 30 | int32 src_len32 = static_cast<int32>(src_len); |
| 31 | for (int32 i = 0; i < src_len32; i++) { |
| 32 | uint32 code_point; |
| 33 | if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 34 | WriteUnicodeCharacter(code_point, output); |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 35 | } else { |
[email protected] | d7a3e8e | 2010-01-01 22:16:38 | [diff] [blame] | 36 | WriteUnicodeCharacter(0xFFFD, output); |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 37 | success = false; |
| 38 | } |
| 39 | } |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 40 | |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 41 | return success; |
| 42 | } |
| 43 | |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 44 | } // namespace |
| 45 | |
| 46 | // UTF-8 <-> Wide -------------------------------------------------------------- |
| 47 | |
[email protected] | 2500a0f | 2009-11-10 01:43:15 | [diff] [blame] | 48 | bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 49 | PrepareForUTF8Output(src, src_len, output); |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 50 | return ConvertUnicode(src, src_len, output); |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 51 | } |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 52 | |
[email protected] | 2500a0f | 2009-11-10 01:43:15 | [diff] [blame] | 53 | std::string WideToUTF8(const std::wstring& wide) { |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 54 | std::string ret; |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 55 | // Ignore the success flag of this call, it will do the best it can for |
| 56 | // invalid input, which is what we want here. |
[email protected] | 2500a0f | 2009-11-10 01:43:15 | [diff] [blame] | 57 | WideToUTF8(wide.data(), wide.length(), &ret); |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 58 | return ret; |
| 59 | } |
| 60 | |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 61 | bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 62 | PrepareForUTF16Or32Output(src, src_len, output); |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 63 | return ConvertUnicode(src, src_len, output); |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 64 | } |
| 65 | |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 66 | std::wstring UTF8ToWide(const base::StringPiece& utf8) { |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 67 | std::wstring ret; |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 68 | UTF8ToWide(utf8.data(), utf8.length(), &ret); |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 69 | return ret; |
| 70 | } |
| 71 | |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 72 | // UTF-16 <-> Wide ------------------------------------------------------------- |
| 73 | |
| 74 | #if defined(WCHAR_T_IS_UTF16) |
| 75 | |
| 76 | // When wide == UTF-16, then conversions are a NOP. |
[email protected] | 2500a0f | 2009-11-10 01:43:15 | [diff] [blame] | 77 | bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 78 | output->assign(src, src_len); |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 79 | return true; |
| 80 | } |
| 81 | |
[email protected] | 2500a0f | 2009-11-10 01:43:15 | [diff] [blame] | 82 | string16 WideToUTF16(const std::wstring& wide) { |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 83 | return wide; |
| 84 | } |
| 85 | |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 86 | bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 87 | output->assign(src, src_len); |
| 88 | return true; |
| 89 | } |
| 90 | |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 91 | std::wstring UTF16ToWide(const string16& utf16) { |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 92 | return utf16; |
| 93 | } |
| 94 | |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 95 | #elif defined(WCHAR_T_IS_UTF32) |
| 96 | |
[email protected] | 2500a0f | 2009-11-10 01:43:15 | [diff] [blame] | 97 | bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 98 | output->clear(); |
| 99 | // Assume that normally we won't have any non-BMP characters so the counts |
| 100 | // will be the same. |
| 101 | output->reserve(src_len); |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 102 | return ConvertUnicode(src, src_len, output); |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 103 | } |
| 104 | |
[email protected] | 2500a0f | 2009-11-10 01:43:15 | [diff] [blame] | 105 | string16 WideToUTF16(const std::wstring& wide) { |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 106 | string16 ret; |
[email protected] | 2500a0f | 2009-11-10 01:43:15 | [diff] [blame] | 107 | WideToUTF16(wide.data(), wide.length(), &ret); |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 108 | return ret; |
| 109 | } |
| 110 | |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 111 | bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 112 | output->clear(); |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 113 | // Assume that normally we won't have any non-BMP characters so the counts |
| 114 | // will be the same. |
| 115 | output->reserve(src_len); |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 116 | return ConvertUnicode(src, src_len, output); |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 117 | } |
| 118 | |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 119 | std::wstring UTF16ToWide(const string16& utf16) { |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 120 | std::wstring ret; |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 121 | UTF16ToWide(utf16.data(), utf16.length(), &ret); |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 122 | return ret; |
| 123 | } |
| 124 | |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 125 | #endif // defined(WCHAR_T_IS_UTF32) |
| 126 | |
| 127 | // UTF16 <-> UTF8 -------------------------------------------------------------- |
| 128 | |
| 129 | #if defined(WCHAR_T_IS_UTF32) |
| 130 | |
| 131 | bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 132 | PrepareForUTF16Or32Output(src, src_len, output); |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 133 | return ConvertUnicode(src, src_len, output); |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 134 | } |
| 135 | |
[email protected] | 39a749c | 2011-01-28 02:40:46 | [diff] [blame] | 136 | string16 UTF8ToUTF16(const base::StringPiece& utf8) { |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 137 | string16 ret; |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 138 | // Ignore the success flag of this call, it will do the best it can for |
| 139 | // invalid input, which is what we want here. |
| 140 | UTF8ToUTF16(utf8.data(), utf8.length(), &ret); |
| 141 | return ret; |
| 142 | } |
| 143 | |
| 144 | bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame] | 145 | PrepareForUTF8Output(src, src_len, output); |
[email protected] | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 146 | return ConvertUnicode(src, src_len, output); |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 147 | } |
| 148 | |
| 149 | std::string UTF16ToUTF8(const string16& utf16) { |
| 150 | std::string ret; |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 151 | // Ignore the success flag of this call, it will do the best it can for |
| 152 | // invalid input, which is what we want here. |
| 153 | UTF16ToUTF8(utf16.data(), utf16.length(), &ret); |
| 154 | return ret; |
| 155 | } |
| 156 | |
| 157 | #elif defined(WCHAR_T_IS_UTF16) |
| 158 | // Easy case since we can use the "wide" versions we already wrote above. |
| 159 | |
| 160 | bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { |
| 161 | return UTF8ToWide(src, src_len, output); |
| 162 | } |
| 163 | |
[email protected] | 39a749c | 2011-01-28 02:40:46 | [diff] [blame] | 164 | string16 UTF8ToUTF16(const base::StringPiece& utf8) { |
[email protected] | 047a03f | 2009-10-07 02:10:20 | [diff] [blame] | 165 | return UTF8ToWide(utf8); |
| 166 | } |
| 167 | |
| 168 | bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { |
| 169 | return WideToUTF8(src, src_len, output); |
| 170 | } |
| 171 | |
| 172 | std::string UTF16ToUTF8(const string16& utf16) { |
| 173 | return WideToUTF8(utf16); |
| 174 | } |
| 175 | |
| 176 | #endif |
[email protected] | f1d8192 | 2010-07-31 17:47:09 | [diff] [blame] | 177 | |
[email protected] | 39a749c | 2011-01-28 02:40:46 | [diff] [blame] | 178 | std::wstring ASCIIToWide(const base::StringPiece& ascii) { |
[email protected] | f1d8192 | 2010-07-31 17:47:09 | [diff] [blame] | 179 | DCHECK(IsStringASCII(ascii)) << ascii; |
| 180 | return std::wstring(ascii.begin(), ascii.end()); |
| 181 | } |
| 182 | |
[email protected] | 39a749c | 2011-01-28 02:40:46 | [diff] [blame] | 183 | string16 ASCIIToUTF16(const base::StringPiece& ascii) { |
[email protected] | f1d8192 | 2010-07-31 17:47:09 | [diff] [blame] | 184 | DCHECK(IsStringASCII(ascii)) << ascii; |
| 185 | return string16(ascii.begin(), ascii.end()); |
| 186 | } |