1.作者
caicongyang:http://blog.csdn.net/caicongyang
2.代码(注释清晰)
package com.ccy.lucene.app;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
/**
*
* <p>
* Title: QueryTest.java
* Package com.ccy.lucene.app
* </p>
* <p>
* Description: 各种Query测试
* <p>
* @author Tom.Cai
* @created 2015-11-10 下午8:17:50
* @version V1.0
*
*/
public class QueryTest {
//索引文件
String indexPath = "D:\\newWork\\lucene5.3\\luceneIndex";
//分词器
Analyzer analyzer = new SmartChineseAnalyzer();// 词库分词
/**
* 关键字查询(英文经过分词器全部为小写)
* @throws Exception
*/
@Test
public void testTermQuery() throws Exception{
Term term = new Term("content", "room");
TermQuery query = new TermQuery(term);
search(query);
}
/**
* 短语查询
* @throws Exception
*/
@Test
public void testPhraseQuery() throws Exception {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(new Term("content", "房间"));
builder.add(new Term("content", "安排"));
builder.setSlop(5);//中级差几个字
PhraseQuery phraseQuery = builder.build();
search(phraseQuery);
}
/**
* 通配符查询
* '?' 代表一个字符, '*' 代表0个或多个字符
* @throws Exception
*/
@Test
public void testWildcardQuery() throws Exception {
Term term = new Term("content", "roo?");
Query query = new WildcardQuery(term);
search(query);
}
/**
* 布尔关联查询
* @throws Exception
*/
@Test
public void testBooleanQuery() throws Exception {
// 条件1
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(new Term("content", "房间"));
builder.add(new Term("content", "安排"));
builder.setSlop(5);//中间差几个字
PhraseQuery query1 = builder.build();
// 条件2
Term term = new Term("content", "总统");
TermQuery query2 = new TermQuery(term);
// 组合
BooleanQuery.Builder bbuilder = new BooleanQuery.Builder();
bbuilder.add(query1, Occur.MUST);
bbuilder.add(query2, Occur.MUST);
BooleanQuery boolQuery = bbuilder.build();
search(boolQuery);
}
public void search(Query query) throws Exception{
//1、打开索引库
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
IndexSearcher searcher = new IndexSearcher(reader);
//3、根据关键词进行搜索
TopDocs topDocs = searcher.search(query, 100);;
int recordCount = topDocs.totalHits;
System.out.println("总共有【" + recordCount + "】条匹配结果");
List<Document> recordList = new ArrayList<Document>();
//准备高亮器
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
Scorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);
// 摘要
Fragmenter fragmenter = new SimpleFragmenter(50);
highlighter.setTextFragmenter(fragmenter);
// 3,取出当前页的数据
int end = Math.min(10, topDocs.totalHits);
for (int i = 0; i < end; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
int docSn = scoreDoc.doc; // 文档内部编号
Document doc = searcher.doc(docSn); // 根据编号取出相应的文档
// 返回高亮后的结果,如果当前属性值中没有出现关键字,会返回 null
String hc = highlighter.getBestFragment(analyzer, "content", doc.get("content"));
if (hc == null) {
String content = doc.get("content");
int endIndex = Math.min(50, content.length());
hc = content.substring(0, endIndex);// 最多前50个字符
}
doc.removeField("content");
doc.add(new Field("content", hc, TextField.TYPE_STORED));
recordList.add(doc);
}
for (Document document : recordList) {
System.out.println("------------------------------");
System.out.println("name = " + document.get("name"));
System.out.println("content = " + document.get("content"));
System.out.println("size = " + document.get("size"));
System.out.println("path = " + document.get("path"));
}
reader.close();
}
}
3.我的博客