[email protected] | acf9f27 | 2014-04-15 23:04:00 | [diff] [blame] | 1 | // Copyright 2014 The Chromium Authors. All rights reserved. |
license.bot | bf09a50 | 2008-08-24 00:55:55 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 4 | |
avi | f57136c1 | 2015-12-25 23:27:45 | [diff] [blame] | 5 | #include <stddef.h> |
| 6 | |
| 7 | #include "base/macros.h" |
[email protected] | 3b63f8f4 | 2011-03-28 01:54:15 | [diff] [blame] | 8 | #include "base/memory/scoped_vector.h" |
[email protected] | 112158af | 2013-06-07 23:46:18 | [diff] [blame] | 9 | #include "base/strings/utf_string_conversions.h" |
[email protected] | acf9f27 | 2014-04-15 23:04:00 | [diff] [blame] | 10 | #include "components/query_parser/query_parser.h" |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 11 | #include "testing/gtest/include/gtest/gtest.h" |
| 12 | |
[email protected] | acf9f27 | 2014-04-15 23:04:00 | [diff] [blame] | 13 | namespace query_parser { |
| 14 | |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 15 | class QueryParserTest : public testing::Test { |
| 16 | public: |
| 17 | struct TestData { |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 18 | const char* input; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 19 | const int expected_word_count; |
| 20 | }; |
| 21 | |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 22 | std::string QueryToString(const std::string& query); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 23 | |
| 24 | protected: |
| 25 | QueryParser query_parser_; |
| 26 | }; |
| 27 | |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 28 | // Test helper: Convert a user query string in 8-bit (for hardcoding |
| 29 | // convenience) to a SQLite query string. |
| 30 | std::string QueryParserTest::QueryToString(const std::string& query) { |
[email protected] | 439f1e3 | 2013-12-09 20:09:09 | [diff] [blame] | 31 | base::string16 sqlite_query; |
kkimlabs | f1a7a373 | 2014-11-04 10:30:46 | [diff] [blame] | 32 | query_parser_.ParseQuery(base::UTF8ToUTF16(query), |
| 33 | MatchingAlgorithm::DEFAULT, |
| 34 | &sqlite_query); |
[email protected] | 0433872 | 2013-12-24 23:18:05 | [diff] [blame] | 35 | return base::UTF16ToUTF8(sqlite_query); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 36 | } |
| 37 | |
| 38 | // Basic multi-word queries, including prefix matching. |
| 39 | TEST_F(QueryParserTest, SimpleQueries) { |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 40 | EXPECT_EQ("", QueryToString(" ")); |
| 41 | EXPECT_EQ("singleword*", QueryToString("singleword")); |
| 42 | EXPECT_EQ("spacedout*", QueryToString(" spacedout ")); |
| 43 | EXPECT_EQ("foo* bar*", QueryToString("foo bar")); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 44 | // Short words aren't prefix matches. For Korean Hangul |
| 45 | // the minimum is 2 while for other scripts, it's 3. |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 46 | EXPECT_EQ("f b", QueryToString(" f b")); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 47 | // KA JANG |
[email protected] | 0433872 | 2013-12-24 23:18:05 | [diff] [blame] | 48 | EXPECT_EQ(base::WideToUTF8(L"\xAC00 \xC7A5"), |
| 49 | QueryToString(base::WideToUTF8(L" \xAC00 \xC7A5"))); |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 50 | EXPECT_EQ("foo* bar*", QueryToString(" foo bar ")); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 51 | // KA-JANG BICH-GO |
[email protected] | 0433872 | 2013-12-24 23:18:05 | [diff] [blame] | 52 | EXPECT_EQ(base::WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"), |
| 53 | QueryToString(base::WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0"))); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 54 | } |
| 55 | |
| 56 | // Quoted substring parsing. |
| 57 | TEST_F(QueryParserTest, Quoted) { |
[email protected] | d321644 | 2009-03-05 21:07:27 | [diff] [blame] | 58 | // ASCII quotes |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 59 | EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\"")); |
[email protected] | d321644 | 2009-03-05 21:07:27 | [diff] [blame] | 60 | // Missing end quotes |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 61 | EXPECT_EQ("\"miss end\"", QueryToString("\"miss end")); |
[email protected] | d321644 | 2009-03-05 21:07:27 | [diff] [blame] | 62 | // Missing begin quotes |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 63 | EXPECT_EQ("miss* beg*", QueryToString("miss beg\"")); |
[email protected] | d321644 | 2009-03-05 21:07:27 | [diff] [blame] | 64 | // Weird formatting |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 65 | EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many \"\"quotes")); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 66 | } |
| 67 | |
| 68 | // Apostrophes within words should be preserved, but otherwise stripped. |
| 69 | TEST_F(QueryParserTest, Apostrophes) { |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 70 | EXPECT_EQ("foo* bar's*", QueryToString("foo bar's")); |
| 71 | EXPECT_EQ("l'foo*", QueryToString("l'foo")); |
| 72 | EXPECT_EQ("foo*", QueryToString("'foo")); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 73 | } |
| 74 | |
| 75 | // Special characters. |
| 76 | TEST_F(QueryParserTest, SpecialChars) { |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 77 | EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar")); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 78 | } |
| 79 | |
| 80 | TEST_F(QueryParserTest, NumWords) { |
| 81 | TestData data[] = { |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 82 | { "blah", 1 }, |
| 83 | { "foo \"bar baz\"", 3 }, |
| 84 | { "foo \"baz\"", 2 }, |
| 85 | { "foo \"bar baz\" blah", 4 }, |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 86 | }; |
| 87 | |
[email protected] | a6185508 | 2008-11-14 18:54:02 | [diff] [blame] | 88 | for (size_t i = 0; i < arraysize(data); ++i) { |
[email protected] | 439f1e3 | 2013-12-09 20:09:09 | [diff] [blame] | 89 | base::string16 query_string; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 90 | EXPECT_EQ(data[i].expected_word_count, |
[email protected] | 0433872 | 2013-12-24 23:18:05 | [diff] [blame] | 91 | query_parser_.ParseQuery(base::UTF8ToUTF16(data[i].input), |
kkimlabs | f1a7a373 | 2014-11-04 10:30:46 | [diff] [blame] | 92 | MatchingAlgorithm::DEFAULT, |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 93 | &query_string)); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 94 | } |
| 95 | } |
| 96 | |
| 97 | TEST_F(QueryParserTest, ParseQueryNodesAndMatch) { |
| 98 | struct TestData2 { |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 99 | const std::string query; |
| 100 | const std::string text; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 101 | const bool matches; |
[email protected] | 5b4eb8f | 2009-02-02 12:18:46 | [diff] [blame] | 102 | const size_t m1_start; |
| 103 | const size_t m1_end; |
| 104 | const size_t m2_start; |
| 105 | const size_t m2_end; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 106 | } data[] = { |
[email protected] | 2532060 | 2012-10-18 22:05:56 | [diff] [blame] | 107 | { "foo", "fooey foo", true, 0, 3, 6, 9 }, |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 108 | { "foo foo", "foo", true, 0, 3, 0, 0 }, |
| 109 | { "foo fooey", "fooey", true, 0, 5, 0, 0 }, |
[email protected] | 2532060 | 2012-10-18 22:05:56 | [diff] [blame] | 110 | { "fooey foo", "fooey", true, 0, 5, 0, 0 }, |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 111 | { "foo fooey bar", "bar fooey", true, 0, 3, 4, 9 }, |
| 112 | { "blah", "blah", true, 0, 4, 0, 0 }, |
| 113 | { "blah", "foo", false, 0, 0, 0, 0 }, |
| 114 | { "blah", "blahblah", true, 0, 4, 0, 0 }, |
| 115 | { "blah", "foo blah", true, 4, 8, 0, 0 }, |
| 116 | { "foo blah", "blah", false, 0, 0, 0, 0 }, |
| 117 | { "foo blah", "blahx foobar", true, 0, 4, 6, 9 }, |
| 118 | { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 }, |
| 119 | { "\"foo blah\"", "foox blahx", false, 0, 0, 0, 0 }, |
| 120 | { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 }, |
| 121 | { "\"foo blah\"", "\"foo blah\"", true, 1, 9, 0, 0 }, |
| 122 | { "foo blah", "\"foo bar blah\"", true, 1, 4, 9, 13 }, |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 123 | }; |
viettrungluu | 37a447b | 2014-10-16 18:23:27 | [diff] [blame] | 124 | for (size_t i = 0; i < arraysize(data); ++i) { |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 125 | QueryParser parser; |
| 126 | ScopedVector<QueryNode> query_nodes; |
[email protected] | 0433872 | 2013-12-24 23:18:05 | [diff] [blame] | 127 | parser.ParseQueryNodes(base::UTF8ToUTF16(data[i].query), |
kkimlabs | f1a7a373 | 2014-11-04 10:30:46 | [diff] [blame] | 128 | MatchingAlgorithm::DEFAULT, |
[email protected] | 0433872 | 2013-12-24 23:18:05 | [diff] [blame] | 129 | &query_nodes.get()); |
[email protected] | 6956cd6 | 2008-08-29 19:48:58 | [diff] [blame] | 130 | Snippet::MatchPositions match_positions; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 131 | ASSERT_EQ(data[i].matches, |
[email protected] | 0433872 | 2013-12-24 23:18:05 | [diff] [blame] | 132 | parser.DoesQueryMatch(base::UTF8ToUTF16(data[i].text), |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 133 | query_nodes.get(), |
[email protected] | 6956cd6 | 2008-08-29 19:48:58 | [diff] [blame] | 134 | &match_positions)); |
| 135 | size_t offset = 0; |
| 136 | if (data[i].m1_start != 0 || data[i].m1_end != 0) { |
| 137 | ASSERT_TRUE(match_positions.size() >= 1); |
| 138 | EXPECT_EQ(data[i].m1_start, match_positions[0].first); |
| 139 | EXPECT_EQ(data[i].m1_end, match_positions[0].second); |
| 140 | offset++; |
| 141 | } |
| 142 | if (data[i].m2_start != 0 || data[i].m2_end != 0) { |
| 143 | ASSERT_TRUE(match_positions.size() == 1 + offset); |
| 144 | EXPECT_EQ(data[i].m2_start, match_positions[offset].first); |
| 145 | EXPECT_EQ(data[i].m2_end, match_positions[offset].second); |
| 146 | } |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 147 | } |
license.bot | bf09a50 | 2008-08-24 00:55:55 | [diff] [blame] | 148 | } |
[email protected] | 7de9959 | 2008-12-09 19:16:02 | [diff] [blame] | 149 | |
[email protected] | 8c793c8 | 2011-05-19 00:41:33 | [diff] [blame] | 150 | TEST_F(QueryParserTest, ParseQueryWords) { |
[email protected] | 7de9959 | 2008-12-09 19:16:02 | [diff] [blame] | 151 | struct TestData2 { |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 152 | const std::string text; |
| 153 | const std::string w1; |
| 154 | const std::string w2; |
| 155 | const std::string w3; |
[email protected] | 7de9959 | 2008-12-09 19:16:02 | [diff] [blame] | 156 | const size_t word_count; |
| 157 | } data[] = { |
[email protected] | e5366896 | 2010-06-23 15:35:25 | [diff] [blame] | 158 | { "foo", "foo", "", "", 1 }, |
| 159 | { "foo bar", "foo", "bar", "", 2 }, |
| 160 | { "\"foo bar\"", "foo", "bar", "", 2 }, |
| 161 | { "\"foo bar\" a", "foo", "bar", "a", 3 }, |
[email protected] | 7de9959 | 2008-12-09 19:16:02 | [diff] [blame] | 162 | }; |
viettrungluu | 37a447b | 2014-10-16 18:23:27 | [diff] [blame] | 163 | for (size_t i = 0; i < arraysize(data); ++i) { |
[email protected] | d2065e06 | 2013-12-12 23:49:52 | [diff] [blame] | 164 | std::vector<base::string16> results; |
[email protected] | 7de9959 | 2008-12-09 19:16:02 | [diff] [blame] | 165 | QueryParser parser; |
kkimlabs | f1a7a373 | 2014-11-04 10:30:46 | [diff] [blame] | 166 | parser.ParseQueryWords(base::UTF8ToUTF16(data[i].text), |
| 167 | MatchingAlgorithm::DEFAULT, |
| 168 | &results); |
[email protected] | 7de9959 | 2008-12-09 19:16:02 | [diff] [blame] | 169 | ASSERT_EQ(data[i].word_count, results.size()); |
[email protected] | 0433872 | 2013-12-24 23:18:05 | [diff] [blame] | 170 | EXPECT_EQ(data[i].w1, base::UTF16ToUTF8(results[0])); |
[email protected] | 7de9959 | 2008-12-09 19:16:02 | [diff] [blame] | 171 | if (results.size() == 2) |
[email protected] | 0433872 | 2013-12-24 23:18:05 | [diff] [blame] | 172 | EXPECT_EQ(data[i].w2, base::UTF16ToUTF8(results[1])); |
[email protected] | 7de9959 | 2008-12-09 19:16:02 | [diff] [blame] | 173 | if (results.size() == 3) |
[email protected] | 0433872 | 2013-12-24 23:18:05 | [diff] [blame] | 174 | EXPECT_EQ(data[i].w3, base::UTF16ToUTF8(results[2])); |
[email protected] | 7de9959 | 2008-12-09 19:16:02 | [diff] [blame] | 175 | } |
| 176 | } |
[email protected] | acf9f27 | 2014-04-15 23:04:00 | [diff] [blame] | 177 | |
| 178 | } // namespace query_parser |