[email protected] | ed24f7a | 2012-11-14 14:55:02 | [diff] [blame] | 1 | // Copyright 2012 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "ui/gfx/text_utils.h" |
| 6 | |
avi | c89eb8d4 | 2015-12-23 08:08:18 | [diff] [blame] | 7 | #include <stdint.h> |
| 8 | |
[email protected] | ed24f7a | 2012-11-14 14:55:02 | [diff] [blame] | 9 | #include "base/i18n/char_iterator.h" |
xdai | 149bee4 | 2015-06-10 23:36:15 | [diff] [blame] | 10 | #include "base/logging.h" |
| 11 | #include "base/numerics/safe_conversions.h" |
| 12 | #include "third_party/icu/source/common/unicode/uchar.h" |
| 13 | #include "third_party/icu/source/common/unicode/utf16.h" |
[email protected] | ed24f7a | 2012-11-14 14:55:02 | [diff] [blame] | 14 | |
| 15 | namespace gfx { |
| 16 | |
xdai | 149bee4 | 2015-06-10 23:36:15 | [diff] [blame] | 17 | namespace { |
| 18 | |
| 19 | // Returns true if the code point |c| is a combining mark character in Unicode. |
| 20 | bool CharIsMark(UChar32 c) { |
| 21 | int8_t char_type = u_charType(c); |
| 22 | return char_type == U_NON_SPACING_MARK || char_type == U_ENCLOSING_MARK || |
| 23 | char_type == U_COMBINING_SPACING_MARK; |
| 24 | } |
| 25 | |
| 26 | // Gets the code point of |str| at the given code unit position |index|. If |
| 27 | // |index| is a surrogate code unit, returns the whole code point (unless the |
| 28 | // code unit is unpaired, in which case it just returns the surrogate value). |
| 29 | UChar32 GetCodePointAt(const base::string16& str, size_t index) { |
| 30 | UChar32 c; |
| 31 | U16_GET(str.data(), 0, index, str.size(), c); |
| 32 | return c; |
| 33 | } |
| 34 | |
| 35 | } // namespace |
| 36 | |
[email protected] | 031ffed | 2013-06-09 03:32:36 | [diff] [blame] | 37 | base::string16 RemoveAcceleratorChar(const base::string16& s, |
| 38 | base::char16 accelerator_char, |
| 39 | int* accelerated_char_pos, |
| 40 | int* accelerated_char_span) { |
[email protected] | ed24f7a | 2012-11-14 14:55:02 | [diff] [blame] | 41 | bool escaped = false; |
[email protected] | 1a7f427 | 2013-01-15 08:52:31 | [diff] [blame] | 42 | ptrdiff_t last_char_pos = -1; |
[email protected] | ed24f7a | 2012-11-14 14:55:02 | [diff] [blame] | 43 | int last_char_span = 0; |
| 44 | base::i18n::UTF16CharIterator chars(&s); |
[email protected] | 031ffed | 2013-06-09 03:32:36 | [diff] [blame] | 45 | base::string16 accelerator_removed; |
[email protected] | ed24f7a | 2012-11-14 14:55:02 | [diff] [blame] | 46 | |
| 47 | accelerator_removed.reserve(s.size()); |
| 48 | while (!chars.end()) { |
avi | c89eb8d4 | 2015-12-23 08:08:18 | [diff] [blame] | 49 | int32_t c = chars.get(); |
[email protected] | ed24f7a | 2012-11-14 14:55:02 | [diff] [blame] | 50 | int array_pos = chars.array_pos(); |
| 51 | chars.Advance(); |
| 52 | |
| 53 | if (c != accelerator_char || escaped) { |
| 54 | int span = chars.array_pos() - array_pos; |
| 55 | if (escaped && c != accelerator_char) { |
| 56 | last_char_pos = accelerator_removed.size(); |
| 57 | last_char_span = span; |
| 58 | } |
| 59 | for (int i = 0; i < span; i++) |
| 60 | accelerator_removed.push_back(s[array_pos + i]); |
| 61 | escaped = false; |
| 62 | } else { |
| 63 | escaped = true; |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | if (accelerated_char_pos) |
| 68 | *accelerated_char_pos = last_char_pos; |
| 69 | if (accelerated_char_span) |
| 70 | *accelerated_char_span = last_char_span; |
| 71 | |
| 72 | return accelerator_removed; |
| 73 | } |
| 74 | |
xdai | 149bee4 | 2015-06-10 23:36:15 | [diff] [blame] | 75 | size_t FindValidBoundaryBefore(const base::string16& text, size_t index) { |
| 76 | size_t length = text.length(); |
| 77 | DCHECK_LE(index, length); |
| 78 | if (index == length) |
| 79 | return index; |
| 80 | |
| 81 | // If |index| straddles a combining character sequence, go back until we find |
| 82 | // a base character. |
| 83 | while (index > 0 && CharIsMark(GetCodePointAt(text, index))) |
| 84 | --index; |
| 85 | |
| 86 | // If |index| straddles a UTF-16 surrogate pair, go back. |
| 87 | U16_SET_CP_START(text.data(), 0, index); |
| 88 | return index; |
| 89 | } |
| 90 | |
| 91 | size_t FindValidBoundaryAfter(const base::string16& text, size_t index) { |
| 92 | DCHECK_LE(index, text.length()); |
| 93 | if (index == text.length()) |
| 94 | return index; |
| 95 | |
| 96 | int32_t text_index = base::checked_cast<int32_t>(index); |
| 97 | int32_t text_length = base::checked_cast<int32_t>(text.length()); |
| 98 | |
| 99 | // If |index| straddles a combining character sequence, go forward until we |
| 100 | // find a base character. |
| 101 | while (text_index < text_length && |
| 102 | CharIsMark(GetCodePointAt(text, text_index))) { |
| 103 | ++text_index; |
| 104 | } |
| 105 | |
| 106 | // If |index| straddles a UTF-16 surrogate pair, go forward. |
| 107 | U16_SET_CP_LIMIT(text.data(), 0, text_index, text_length); |
| 108 | return static_cast<size_t>(text_index); |
| 109 | } |
| 110 | |
[email protected] | ed24f7a | 2012-11-14 14:55:02 | [diff] [blame] | 111 | } // namespace gfx |