POST _analyze
{
"tokenizer": "keyword",
"char_filter": ["html_strip"],
"text": "<b>hello world</b>"
}
//使用char filter进行替换
POST _analyze
{
"tokenizer": "standard",
"char_filter": [{
"type":"mapping",
"mappings":["- => _"]
}],
"text": "123-456,I-test! test-990 650-555-1234"
}
//使用char filter 替换表情符号
POST _analyze
{
"tokenizer": "standard",
"char_filter": [
{
"type":"mapping",
"mappings":[":) => happy",":( => sad"]
}
],
"text": ["I am felling :)","Felling :( today"]
}
//正则表达式
GET _analyze
{
"tokenizer": "standard",
"char_filter": [
{
"type":"pattern_replace",
"pattern":"http://(.*)",
"replacement":"$1"
}],
"text": "http://www.elastic.co"
}
POST _analyze
{
"tokenizer": "path_hierarchy",
"text": "/user/ymruan/a/b/c/d/e"
}
//whitespace与stop
GET _analyze
{
"tokenizer":"whitespace",
"filter":["stop"],
"text":["The rain in Spain falls mainly on the plain."]
}
//remove 加入lowercase后,The 被当成stopword删除
GET _analyze
{
"tokenizer": "whitespace",
"filter": ["lowercase","stop"],
"text": ["The girls in China are playing this game!"]
}
DELETE my_index
PUT my_index
{
"settings": {
"analysis": {
"analyzer": {
"my_custom_analyzer":{
"type":"custom",
"char_filter":[
"emoticons"],
"tokenizer":"punctuation",
"filter":[
"lowercase","english_stop"
]
}
},
"tokenizer": {
"punctuation":{
"type":"pattern",
"pattern":"[ .,!?]"
}
},
"char_filter": {
"emoticons":{
"type":"mapping",
"mappings":[
":) => _happy_",
":( => _sad_"
]
}
},
"filter": {
"english_stop":{
"type":"stop",
"stopwords":"_english_"
}
}
}
}
}
POST my_index/_analyze
{
"analyzer": "my_custom_analyzer",
"text": "I'm a :) person,and you?"
}