blob: cdf708ba5116dab6dfc0f6a29ad14de3c3513f5f [file] [log] [blame]
[email protected]dabd6f42011-05-05 23:50:381// Copyright (c) 2011 The Chromium Authors. All rights reserved.
[email protected]0477554f2010-01-21 19:29:252// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/string_split.h"
6
[email protected]f1633932010-08-17 23:05:287#include "base/logging.h"
[email protected]0477554f2010-01-21 19:29:258#include "base/string_util.h"
[email protected]7594f6d2010-09-15 13:36:229#include "base/third_party/icu/icu_utf.h"
10#include "base/utf_string_conversions.h"
[email protected]0477554f2010-01-21 19:29:2511
[email protected]76eb0242010-10-14 00:35:3612namespace base {
13
[email protected]4e5ae20f2010-09-24 04:52:1114template<typename STR>
15static void SplitStringT(const STR& str,
16 const typename STR::value_type s,
17 bool trim_whitespace,
18 std::vector<STR>* r) {
19 size_t last = 0;
20 size_t i;
21 size_t c = str.size();
22 for (i = 0; i <= c; ++i) {
23 if (i == c || str[i] == s) {
24 size_t len = i - last;
25 STR tmp = str.substr(last, len);
26 if (trim_whitespace) {
27 STR t_tmp;
28 TrimWhitespace(tmp, TRIM_ALL, &t_tmp);
29 r->push_back(t_tmp);
30 } else {
31 r->push_back(tmp);
32 }
33 last = i + 1;
34 }
35 }
36}
37
[email protected]4e5ae20f2010-09-24 04:52:1138void SplitString(const string16& str,
39 char16 c,
40 std::vector<string16>* r) {
41 DCHECK(CBU16_IS_SINGLE(c));
42 SplitStringT(str, c, true, r);
43}
[email protected]4e5ae20f2010-09-24 04:52:1144
45void SplitString(const std::string& str,
46 char c,
47 std::vector<std::string>* r) {
[email protected]ae136432011-11-21 19:02:4848#if CHAR_MIN < 0
49 DCHECK(c >= 0);
50#endif
51 DCHECK(c < 0x7F);
[email protected]4e5ae20f2010-09-24 04:52:1152 SplitStringT(str, c, true, r);
53}
54
[email protected]0477554f2010-01-21 19:29:2555bool SplitStringIntoKeyValues(
56 const std::string& line,
57 char key_value_delimiter,
58 std::string* key, std::vector<std::string>* values) {
59 key->clear();
60 values->clear();
61
[email protected]650303702010-05-05 00:36:3462 // Find the key string.
[email protected]0477554f2010-01-21 19:29:2563 size_t end_key_pos = line.find_first_of(key_value_delimiter);
64 if (end_key_pos == std::string::npos) {
[email protected]b026e35d2010-10-19 02:31:0365 DVLOG(1) << "cannot parse key from line: " << line;
[email protected]0477554f2010-01-21 19:29:2566 return false; // no key
67 }
68 key->assign(line, 0, end_key_pos);
69
[email protected]650303702010-05-05 00:36:3470 // Find the values string.
[email protected]0477554f2010-01-21 19:29:2571 std::string remains(line, end_key_pos, line.size() - end_key_pos);
72 size_t begin_values_pos = remains.find_first_not_of(key_value_delimiter);
73 if (begin_values_pos == std::string::npos) {
[email protected]b026e35d2010-10-19 02:31:0374 DVLOG(1) << "cannot parse value from line: " << line;
[email protected]0477554f2010-01-21 19:29:2575 return false; // no value
76 }
77 std::string values_string(remains, begin_values_pos,
78 remains.size() - begin_values_pos);
79
[email protected]650303702010-05-05 00:36:3480 // Construct the values vector.
[email protected]0477554f2010-01-21 19:29:2581 values->push_back(values_string);
82 return true;
83}
84
85bool SplitStringIntoKeyValuePairs(
86 const std::string& line,
87 char key_value_delimiter,
88 char key_value_pair_delimiter,
89 std::vector<std::pair<std::string, std::string> >* kv_pairs) {
90 kv_pairs->clear();
91
92 std::vector<std::string> pairs;
93 SplitString(line, key_value_pair_delimiter, &pairs);
94
95 bool success = true;
96 for (size_t i = 0; i < pairs.size(); ++i) {
[email protected]650303702010-05-05 00:36:3497 // Empty pair. SplitStringIntoKeyValues is more strict about an empty pair
98 // line, so continue with the next pair.
99 if (pairs[i].empty())
100 continue;
101
[email protected]0477554f2010-01-21 19:29:25102 std::string key;
103 std::vector<std::string> value;
104 if (!SplitStringIntoKeyValues(pairs[i],
105 key_value_delimiter,
106 &key, &value)) {
107 // Don't return here, to allow for keys without associated
108 // values; just record that our split failed.
109 success = false;
110 }
111 DCHECK_LE(value.size(), 1U);
112 kv_pairs->push_back(make_pair(key, value.empty()? "" : value[0]));
113 }
114 return success;
115}
116
[email protected]e8478ae2010-09-02 02:01:48117template <typename STR>
118static void SplitStringUsingSubstrT(const STR& str,
119 const STR& s,
120 std::vector<STR>* r) {
121 typename STR::size_type begin_index = 0;
122 while (true) {
123 const typename STR::size_type end_index = str.find(s, begin_index);
124 if (end_index == STR::npos) {
125 const STR term = str.substr(begin_index);
126 STR tmp;
127 TrimWhitespace(term, TRIM_ALL, &tmp);
128 r->push_back(tmp);
129 return;
130 }
131 const STR term = str.substr(begin_index, end_index - begin_index);
132 STR tmp;
133 TrimWhitespace(term, TRIM_ALL, &tmp);
134 r->push_back(tmp);
135 begin_index = end_index + s.size();
136 }
137}
138
139void SplitStringUsingSubstr(const string16& str,
140 const string16& s,
141 std::vector<string16>* r) {
142 SplitStringUsingSubstrT(str, s, r);
143}
144
145void SplitStringUsingSubstr(const std::string& str,
146 const std::string& s,
147 std::vector<std::string>* r) {
148 SplitStringUsingSubstrT(str, s, r);
149}
150
[email protected]7594f6d2010-09-15 13:36:22151void SplitStringDontTrim(const string16& str,
152 char16 c,
153 std::vector<string16>* r) {
154 DCHECK(CBU16_IS_SINGLE(c));
155 SplitStringT(str, c, false, r);
156}
[email protected]7594f6d2010-09-15 13:36:22157
158void SplitStringDontTrim(const std::string& str,
159 char c,
160 std::vector<std::string>* r) {
161 DCHECK(IsStringUTF8(str));
[email protected]ae136432011-11-21 19:02:48162#if CHAR_MIN < 0
163 DCHECK(c >= 0);
164#endif
165 DCHECK(c < 0x7F);
[email protected]7594f6d2010-09-15 13:36:22166 SplitStringT(str, c, false, r);
167}
168
[email protected]b87c4a72010-11-15 22:03:42169template<typename STR>
170void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) {
171 const size_t length = str.length();
172 if (!length)
173 return;
174
175 bool last_was_ws = false;
176 size_t last_non_ws_start = 0;
177 for (size_t i = 0; i < length; ++i) {
178 switch (str[i]) {
179 // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
180 case L' ':
181 case L'\t':
182 case L'\xA':
183 case L'\xB':
184 case L'\xC':
185 case L'\xD':
186 if (!last_was_ws) {
187 if (i > 0) {
188 result->push_back(
189 str.substr(last_non_ws_start, i - last_non_ws_start));
190 }
191 last_was_ws = true;
192 }
193 break;
194
195 default: // Not a space character.
196 if (last_was_ws) {
197 last_was_ws = false;
198 last_non_ws_start = i;
199 }
200 break;
201 }
202 }
203 if (!last_was_ws) {
204 result->push_back(
205 str.substr(last_non_ws_start, length - last_non_ws_start));
206 }
207}
208
[email protected]b87c4a72010-11-15 22:03:42209void SplitStringAlongWhitespace(const string16& str,
210 std::vector<string16>* result) {
211 SplitStringAlongWhitespaceT(str, result);
212}
[email protected]b87c4a72010-11-15 22:03:42213
214void SplitStringAlongWhitespace(const std::string& str,
215 std::vector<std::string>* result) {
216 SplitStringAlongWhitespaceT(str, result);
217}
218
[email protected]0477554f2010-01-21 19:29:25219} // namespace base