ElasticSearch学习笔记(四)
案例-京东搜索
1.项目准备
创建spring boot项目,引入相关依赖
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.junit.vintage</groupId>
<artifactId>junit-vintage-engine</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
</dependencies>
测试页面
2.爬取数据&封装数据
引入新的依赖
<!-- 引入jsoup包-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
分析网页
封装成一个工具类HtmlParseUtil
分析:每个商品是一个li,拿到对象的响应属性封装返回
package com.jiang.esjd.service;
import com.alibaba.fastjson.JSON;
import com.jiang.esjd.entity.Content;
import com.jiang.esjd.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@Service
public class CotentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
public static void main(String[] args) throws IOException {
boolean java = new CotentService().parseContent("java");
System.out.println(java);
}
// 1、解析数据放入 es 索引中
public Boolean parseContent(String keyword) throws IOException {
// 获取内容
List<Content> contents = HtmlParseUtil.parseJD(keyword);
// 内容放入 es 中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m"); // 可更具实际业务是指
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(
new IndexRequest("jd_goods")
.id(""+(i+1))
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON)
);
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
restHighLevelClient.close();
return !bulk.hasFailures();
}
// 2、根据keyword分页查询结果
public List<Map<String, Object>> search(String keyword, Integer pageIndex, Integer pageSize) throws IOException {
if (pageIndex < 0){
pageIndex = 0;
}
SearchRequest jd_goods = new SearchRequest("jd_goods");
// 创建搜索源建造者对象
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
// 条件采用:精确查询 通过keyword查字段name
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("img", keyword);
searchSourceBuilder.query(termQueryBuilder);
searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));// 60s
// 分页
searchSourceBuilder.from(pageIndex);
searchSourceBuilder.size(pageSize);
// 高亮
// ....
// 高亮 =========
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("img");
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
searchSourceBuilder.highlighter(highlightBuilder);
// 搜索源放入搜索请求中
jd_goods.source(searchSourceBuilder);
// 执行查询,返回结果
SearchResponse searchResponse = restHighLevelClient.search(jd_goods, RequestOptions.DEFAULT);
restHighLevelClient.close();
// 解析结果
SearchHits hits = searchResponse.getHits();
List<Map<String,Object>> results = new ArrayList<>();
for (SearchHit documentFields : hits.getHits()) {
// 使用新的字段值(高亮),覆盖旧的字段值
Map<String, Object> sourceAsMap = documentFields.getSourceAsMap();
// 高亮字段
Map<String, HighlightField> highlightFields = documentFields.getHighlightFields();
HighlightField name = highlightFields.get("img");
// 替换
if (name != null){
Text[] fragments = name.fragments();
StringBuilder new_name = new StringBuilder();
for (Text text : fragments) {
new_name.append(text);
}
sourceAsMap.put("img",new_name.toString());
}
results.add(sourceAsMap);
}
// 返回查询的结果
return results;
}
}
CotentController
package com.jiang.esjd.controller;
import com.jiang.esjd.service.CotentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@RestController
public class CotentController {
@Autowired
private CotentService cotentService;
@GetMapping("/parse/{keyWords}")
public Boolean parse(@PathVariable("keyWords") String keyWords) throws IOException {
return cotentService.parseContent(keyWords);
}
@GetMapping("/search/{keywords}/{pageIndex}/{pageSize}")
public List<Map<String, Object>> searchList(@PathVariable("keywords") String keywords,
@PathVariable("pageIndex") int pageIndex,
@PathVariable("pageSize") int pageSize) throws IOException {
return cotentService.search(keywords, pageIndex, pageSize);
}
}
3.测试
测试运行,数据成功入索引里
访问页面
搜索java
遍历书本,以及高亮关键字显示