blob: c99c639b96447493d73b915cd04240f6836f2af0 [file] [log] [blame]
[email protected]acf9f272014-04-15 23:04:001// Copyright 2014 The Chromium Authors. All rights reserved.
license.botbf09a502008-08-24 00:55:552// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit09911bf2008-07-26 23:55:294
avif57136c12015-12-25 23:27:455#include <stddef.h>
6
7#include "base/macros.h"
[email protected]3b63f8f42011-03-28 01:54:158#include "base/memory/scoped_vector.h"
[email protected]112158af2013-06-07 23:46:189#include "base/strings/utf_string_conversions.h"
[email protected]acf9f272014-04-15 23:04:0010#include "components/query_parser/query_parser.h"
initial.commit09911bf2008-07-26 23:55:2911#include "testing/gtest/include/gtest/gtest.h"
12
[email protected]acf9f272014-04-15 23:04:0013namespace query_parser {
14
initial.commit09911bf2008-07-26 23:55:2915class QueryParserTest : public testing::Test {
16 public:
17 struct TestData {
[email protected]e53668962010-06-23 15:35:2518 const char* input;
initial.commit09911bf2008-07-26 23:55:2919 const int expected_word_count;
20 };
21
[email protected]e53668962010-06-23 15:35:2522 std::string QueryToString(const std::string& query);
initial.commit09911bf2008-07-26 23:55:2923
24 protected:
25 QueryParser query_parser_;
26};
27
[email protected]e53668962010-06-23 15:35:2528// Test helper: Convert a user query string in 8-bit (for hardcoding
29// convenience) to a SQLite query string.
30std::string QueryParserTest::QueryToString(const std::string& query) {
[email protected]439f1e32013-12-09 20:09:0931 base::string16 sqlite_query;
kkimlabsf1a7a3732014-11-04 10:30:4632 query_parser_.ParseQuery(base::UTF8ToUTF16(query),
33 MatchingAlgorithm::DEFAULT,
34 &sqlite_query);
[email protected]04338722013-12-24 23:18:0535 return base::UTF16ToUTF8(sqlite_query);
initial.commit09911bf2008-07-26 23:55:2936}
37
38// Basic multi-word queries, including prefix matching.
39TEST_F(QueryParserTest, SimpleQueries) {
[email protected]e53668962010-06-23 15:35:2540 EXPECT_EQ("", QueryToString(" "));
41 EXPECT_EQ("singleword*", QueryToString("singleword"));
42 EXPECT_EQ("spacedout*", QueryToString(" spacedout "));
43 EXPECT_EQ("foo* bar*", QueryToString("foo bar"));
initial.commit09911bf2008-07-26 23:55:2944 // Short words aren't prefix matches. For Korean Hangul
45 // the minimum is 2 while for other scripts, it's 3.
[email protected]e53668962010-06-23 15:35:2546 EXPECT_EQ("f b", QueryToString(" f b"));
initial.commit09911bf2008-07-26 23:55:2947 // KA JANG
[email protected]04338722013-12-24 23:18:0548 EXPECT_EQ(base::WideToUTF8(L"\xAC00 \xC7A5"),
49 QueryToString(base::WideToUTF8(L" \xAC00 \xC7A5")));
[email protected]e53668962010-06-23 15:35:2550 EXPECT_EQ("foo* bar*", QueryToString(" foo bar "));
initial.commit09911bf2008-07-26 23:55:2951 // KA-JANG BICH-GO
[email protected]04338722013-12-24 23:18:0552 EXPECT_EQ(base::WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"),
53 QueryToString(base::WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0")));
initial.commit09911bf2008-07-26 23:55:2954}
55
56// Quoted substring parsing.
57TEST_F(QueryParserTest, Quoted) {
[email protected]d3216442009-03-05 21:07:2758 // ASCII quotes
[email protected]e53668962010-06-23 15:35:2559 EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\""));
[email protected]d3216442009-03-05 21:07:2760 // Missing end quotes
[email protected]e53668962010-06-23 15:35:2561 EXPECT_EQ("\"miss end\"", QueryToString("\"miss end"));
[email protected]d3216442009-03-05 21:07:2762 // Missing begin quotes
[email protected]e53668962010-06-23 15:35:2563 EXPECT_EQ("miss* beg*", QueryToString("miss beg\""));
[email protected]d3216442009-03-05 21:07:2764 // Weird formatting
[email protected]e53668962010-06-23 15:35:2565 EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many \"\"quotes"));
initial.commit09911bf2008-07-26 23:55:2966}
67
68// Apostrophes within words should be preserved, but otherwise stripped.
69TEST_F(QueryParserTest, Apostrophes) {
[email protected]e53668962010-06-23 15:35:2570 EXPECT_EQ("foo* bar's*", QueryToString("foo bar's"));
71 EXPECT_EQ("l'foo*", QueryToString("l'foo"));
72 EXPECT_EQ("foo*", QueryToString("'foo"));
initial.commit09911bf2008-07-26 23:55:2973}
74
75// Special characters.
76TEST_F(QueryParserTest, SpecialChars) {
[email protected]e53668962010-06-23 15:35:2577 EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar"));
initial.commit09911bf2008-07-26 23:55:2978}
79
80TEST_F(QueryParserTest, NumWords) {
81 TestData data[] = {
[email protected]e53668962010-06-23 15:35:2582 { "blah", 1 },
83 { "foo \"bar baz\"", 3 },
84 { "foo \"baz\"", 2 },
85 { "foo \"bar baz\" blah", 4 },
initial.commit09911bf2008-07-26 23:55:2986 };
87
[email protected]a61855082008-11-14 18:54:0288 for (size_t i = 0; i < arraysize(data); ++i) {
[email protected]439f1e32013-12-09 20:09:0989 base::string16 query_string;
initial.commit09911bf2008-07-26 23:55:2990 EXPECT_EQ(data[i].expected_word_count,
[email protected]04338722013-12-24 23:18:0591 query_parser_.ParseQuery(base::UTF8ToUTF16(data[i].input),
kkimlabsf1a7a3732014-11-04 10:30:4692 MatchingAlgorithm::DEFAULT,
[email protected]e53668962010-06-23 15:35:2593 &query_string));
initial.commit09911bf2008-07-26 23:55:2994 }
95}
96
97TEST_F(QueryParserTest, ParseQueryNodesAndMatch) {
98 struct TestData2 {
[email protected]e53668962010-06-23 15:35:2599 const std::string query;
100 const std::string text;
initial.commit09911bf2008-07-26 23:55:29101 const bool matches;
[email protected]5b4eb8f2009-02-02 12:18:46102 const size_t m1_start;
103 const size_t m1_end;
104 const size_t m2_start;
105 const size_t m2_end;
initial.commit09911bf2008-07-26 23:55:29106 } data[] = {
[email protected]25320602012-10-18 22:05:56107 { "foo", "fooey foo", true, 0, 3, 6, 9 },
[email protected]e53668962010-06-23 15:35:25108 { "foo foo", "foo", true, 0, 3, 0, 0 },
109 { "foo fooey", "fooey", true, 0, 5, 0, 0 },
[email protected]25320602012-10-18 22:05:56110 { "fooey foo", "fooey", true, 0, 5, 0, 0 },
[email protected]e53668962010-06-23 15:35:25111 { "foo fooey bar", "bar fooey", true, 0, 3, 4, 9 },
112 { "blah", "blah", true, 0, 4, 0, 0 },
113 { "blah", "foo", false, 0, 0, 0, 0 },
114 { "blah", "blahblah", true, 0, 4, 0, 0 },
115 { "blah", "foo blah", true, 4, 8, 0, 0 },
116 { "foo blah", "blah", false, 0, 0, 0, 0 },
117 { "foo blah", "blahx foobar", true, 0, 4, 6, 9 },
118 { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 },
119 { "\"foo blah\"", "foox blahx", false, 0, 0, 0, 0 },
120 { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 },
121 { "\"foo blah\"", "\"foo blah\"", true, 1, 9, 0, 0 },
122 { "foo blah", "\"foo bar blah\"", true, 1, 4, 9, 13 },
initial.commit09911bf2008-07-26 23:55:29123 };
viettrungluu37a447b2014-10-16 18:23:27124 for (size_t i = 0; i < arraysize(data); ++i) {
initial.commit09911bf2008-07-26 23:55:29125 QueryParser parser;
126 ScopedVector<QueryNode> query_nodes;
[email protected]04338722013-12-24 23:18:05127 parser.ParseQueryNodes(base::UTF8ToUTF16(data[i].query),
kkimlabsf1a7a3732014-11-04 10:30:46128 MatchingAlgorithm::DEFAULT,
[email protected]04338722013-12-24 23:18:05129 &query_nodes.get());
[email protected]6956cd62008-08-29 19:48:58130 Snippet::MatchPositions match_positions;
initial.commit09911bf2008-07-26 23:55:29131 ASSERT_EQ(data[i].matches,
[email protected]04338722013-12-24 23:18:05132 parser.DoesQueryMatch(base::UTF8ToUTF16(data[i].text),
[email protected]e53668962010-06-23 15:35:25133 query_nodes.get(),
[email protected]6956cd62008-08-29 19:48:58134 &match_positions));
135 size_t offset = 0;
136 if (data[i].m1_start != 0 || data[i].m1_end != 0) {
137 ASSERT_TRUE(match_positions.size() >= 1);
138 EXPECT_EQ(data[i].m1_start, match_positions[0].first);
139 EXPECT_EQ(data[i].m1_end, match_positions[0].second);
140 offset++;
141 }
142 if (data[i].m2_start != 0 || data[i].m2_end != 0) {
143 ASSERT_TRUE(match_positions.size() == 1 + offset);
144 EXPECT_EQ(data[i].m2_start, match_positions[offset].first);
145 EXPECT_EQ(data[i].m2_end, match_positions[offset].second);
146 }
initial.commit09911bf2008-07-26 23:55:29147 }
license.botbf09a502008-08-24 00:55:55148}
[email protected]7de99592008-12-09 19:16:02149
[email protected]8c793c82011-05-19 00:41:33150TEST_F(QueryParserTest, ParseQueryWords) {
[email protected]7de99592008-12-09 19:16:02151 struct TestData2 {
[email protected]e53668962010-06-23 15:35:25152 const std::string text;
153 const std::string w1;
154 const std::string w2;
155 const std::string w3;
[email protected]7de99592008-12-09 19:16:02156 const size_t word_count;
157 } data[] = {
[email protected]e53668962010-06-23 15:35:25158 { "foo", "foo", "", "", 1 },
159 { "foo bar", "foo", "bar", "", 2 },
160 { "\"foo bar\"", "foo", "bar", "", 2 },
161 { "\"foo bar\" a", "foo", "bar", "a", 3 },
[email protected]7de99592008-12-09 19:16:02162 };
viettrungluu37a447b2014-10-16 18:23:27163 for (size_t i = 0; i < arraysize(data); ++i) {
[email protected]d2065e062013-12-12 23:49:52164 std::vector<base::string16> results;
[email protected]7de99592008-12-09 19:16:02165 QueryParser parser;
kkimlabsf1a7a3732014-11-04 10:30:46166 parser.ParseQueryWords(base::UTF8ToUTF16(data[i].text),
167 MatchingAlgorithm::DEFAULT,
168 &results);
[email protected]7de99592008-12-09 19:16:02169 ASSERT_EQ(data[i].word_count, results.size());
[email protected]04338722013-12-24 23:18:05170 EXPECT_EQ(data[i].w1, base::UTF16ToUTF8(results[0]));
[email protected]7de99592008-12-09 19:16:02171 if (results.size() == 2)
[email protected]04338722013-12-24 23:18:05172 EXPECT_EQ(data[i].w2, base::UTF16ToUTF8(results[1]));
[email protected]7de99592008-12-09 19:16:02173 if (results.size() == 3)
[email protected]04338722013-12-24 23:18:05174 EXPECT_EQ(data[i].w3, base::UTF16ToUTF8(results[2]));
[email protected]7de99592008-12-09 19:16:02175 }
176}
[email protected]acf9f272014-04-15 23:04:00177
178} // namespace query_parser