Blame - base/utf_string_conversions.cc - chromium/src

blob: 7b73696e0cac61e590873694801ee3b3ae40cc04 [file] [log] [blame]

[email protected]	f1d8192	2010-07-31 17:47:09	[diff] [blame]	1	// Copyright (c) 2010 The Chromium Authors. All rights reserved.
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "base/utf_string_conversions.h"
				6
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	7	#include "base/string_piece.h"
[email protected]	f1d8192	2010-07-31 17:47:09	[diff] [blame]	8	#include "base/string_util.h"
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	9	#include "base/utf_string_conversion_utils.h"
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	10
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	11	using base::PrepareForUTF8Output;
				12	using base::PrepareForUTF16Or32Output;
				13	using base::ReadUnicodeCharacter;
				14	using base::WriteUnicodeCharacter;
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	15
				16	namespace {
				17
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	18	// Generalized Unicode converter -----------------------------------------------
				19
				20	// Converts the given source Unicode character type to the given destination
				21	// Unicode character type as a STL string. The given input buffer and size
				22	// determine the source, and the given output STL string will be replaced by
				23	// the result.
				24	template<typename SRC_CHAR, typename DEST_STRING>
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	25	bool ConvertUnicode(const SRC_CHAR* src,
				26	size_t src_len,
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	27	DEST_STRING* output) {
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	28	// ICU requires 32-bit numbers.
				29	bool success = true;
				30	int32 src_len32 = static_cast<int32>(src_len);
				31	for (int32 i = 0; i < src_len32; i++) {
				32	uint32 code_point;
				33	if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	34	WriteUnicodeCharacter(code_point, output);
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	35	} else {
[email protected]	d7a3e8e	2010-01-01 22:16:38	[diff] [blame]	36	WriteUnicodeCharacter(0xFFFD, output);
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	37	success = false;
				38	}
				39	}
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	40
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	41	return success;
				42	}
				43
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	44	} // namespace
				45
				46	// UTF-8 <-> Wide --------------------------------------------------------------
				47
[email protected]	2500a0f	2009-11-10 01:43:15	[diff] [blame]	48	bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	49	PrepareForUTF8Output(src, src_len, output);
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	50	return ConvertUnicode(src, src_len, output);
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	51	}
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	52
[email protected]	2500a0f	2009-11-10 01:43:15	[diff] [blame]	53	std::string WideToUTF8(const std::wstring& wide) {
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	54	std::string ret;
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	55	// Ignore the success flag of this call, it will do the best it can for
				56	// invalid input, which is what we want here.
[email protected]	2500a0f	2009-11-10 01:43:15	[diff] [blame]	57	WideToUTF8(wide.data(), wide.length(), &ret);
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	58	return ret;
				59	}
				60
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	61	bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	62	PrepareForUTF16Or32Output(src, src_len, output);
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	63	return ConvertUnicode(src, src_len, output);
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	64	}
				65
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	66	std::wstring UTF8ToWide(const base::StringPiece& utf8) {
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	67	std::wstring ret;
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	68	UTF8ToWide(utf8.data(), utf8.length(), &ret);
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	69	return ret;
				70	}
				71
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	72	// UTF-16 <-> Wide -------------------------------------------------------------
				73
				74	#if defined(WCHAR_T_IS_UTF16)
				75
				76	// When wide == UTF-16, then conversions are a NOP.
[email protected]	2500a0f	2009-11-10 01:43:15	[diff] [blame]	77	bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	78	output->assign(src, src_len);
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	79	return true;
				80	}
				81
[email protected]	2500a0f	2009-11-10 01:43:15	[diff] [blame]	82	string16 WideToUTF16(const std::wstring& wide) {
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	83	return wide;
				84	}
				85
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	86	bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	87	output->assign(src, src_len);
				88	return true;
				89	}
				90
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	91	std::wstring UTF16ToWide(const string16& utf16) {
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	92	return utf16;
				93	}
				94
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	95	#elif defined(WCHAR_T_IS_UTF32)
				96
[email protected]	2500a0f	2009-11-10 01:43:15	[diff] [blame]	97	bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	98	output->clear();
				99	// Assume that normally we won't have any non-BMP characters so the counts
				100	// will be the same.
				101	output->reserve(src_len);
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	102	return ConvertUnicode(src, src_len, output);
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	103	}
				104
[email protected]	2500a0f	2009-11-10 01:43:15	[diff] [blame]	105	string16 WideToUTF16(const std::wstring& wide) {
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	106	string16 ret;
[email protected]	2500a0f	2009-11-10 01:43:15	[diff] [blame]	107	WideToUTF16(wide.data(), wide.length(), &ret);
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	108	return ret;
				109	}
				110
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	111	bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	112	output->clear();
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	113	// Assume that normally we won't have any non-BMP characters so the counts
				114	// will be the same.
				115	output->reserve(src_len);
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	116	return ConvertUnicode(src, src_len, output);
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	117	}
				118
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	119	std::wstring UTF16ToWide(const string16& utf16) {
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	120	std::wstring ret;
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	121	UTF16ToWide(utf16.data(), utf16.length(), &ret);
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	122	return ret;
				123	}
				124
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	125	#endif // defined(WCHAR_T_IS_UTF32)
				126
				127	// UTF16 <-> UTF8 --------------------------------------------------------------
				128
				129	#if defined(WCHAR_T_IS_UTF32)
				130
				131	bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	132	PrepareForUTF16Or32Output(src, src_len, output);
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	133	return ConvertUnicode(src, src_len, output);
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	134	}
				135
[email protected]	39a749c	2011-01-28 02:40:46	[diff] [blame]	136	string16 UTF8ToUTF16(const base::StringPiece& utf8) {
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	137	string16 ret;
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	138	// Ignore the success flag of this call, it will do the best it can for
				139	// invalid input, which is what we want here.
				140	UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
				141	return ret;
				142	}
				143
				144	bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
[email protected]	ce85f60	2009-11-07 01:34:53	[diff] [blame]	145	PrepareForUTF8Output(src, src_len, output);
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	146	return ConvertUnicode(src, src_len, output);
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	147	}
				148
				149	std::string UTF16ToUTF8(const string16& utf16) {
				150	std::string ret;
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	151	// Ignore the success flag of this call, it will do the best it can for
				152	// invalid input, which is what we want here.
				153	UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
				154	return ret;
				155	}
				156
				157	#elif defined(WCHAR_T_IS_UTF16)
				158	// Easy case since we can use the "wide" versions we already wrote above.
				159
				160	bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
				161	return UTF8ToWide(src, src_len, output);
				162	}
				163
[email protected]	39a749c	2011-01-28 02:40:46	[diff] [blame]	164	string16 UTF8ToUTF16(const base::StringPiece& utf8) {
[email protected]	047a03f	2009-10-07 02:10:20	[diff] [blame]	165	return UTF8ToWide(utf8);
				166	}
				167
				168	bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
				169	return WideToUTF8(src, src_len, output);
				170	}
				171
				172	std::string UTF16ToUTF8(const string16& utf16) {
				173	return WideToUTF8(utf16);
				174	}
				175
				176	#endif
[email protected]	f1d8192	2010-07-31 17:47:09	[diff] [blame]	177
[email protected]	39a749c	2011-01-28 02:40:46	[diff] [blame]	178	std::wstring ASCIIToWide(const base::StringPiece& ascii) {
[email protected]	f1d8192	2010-07-31 17:47:09	[diff] [blame]	179	DCHECK(IsStringASCII(ascii)) << ascii;
				180	return std::wstring(ascii.begin(), ascii.end());
				181	}
				182
[email protected]	39a749c	2011-01-28 02:40:46	[diff] [blame]	183	string16 ASCIIToUTF16(const base::StringPiece& ascii) {
[email protected]	f1d8192	2010-07-31 17:47:09	[diff] [blame]	184	DCHECK(IsStringASCII(ascii)) << ascii;
				185	return string16(ascii.begin(), ascii.end());
				186	}