一文吃透Elasticsearch基础操作：索引+文档+聚合，全都有例子

原创于 2025-06-09 10:09:55 发布 · 1.1k 阅读

27 ·

CC 4.0 BY-SA版权

文章标签：

#elasticsearch #搜索引擎

ELK 专栏收录该内容

8 篇文章

订阅专栏

Elasticsearch入门必备：最常用的索引与文档操作命令大全

Elasticsearch常用的索引、文档基础操作操作语句，例如创建索引、删除索引、修改索引设置、修改索引mapping字段结构、新增文档、删除文档、查询文档、聚合统计等常用操作。

1. 索引管理命令

1.1. 创建索引

1.1.1. 基本创建

PUT /your_index_name
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1
  },
  "mappings": {
    "properties": {
      "title": {"type": "text"},
      "createTime": {"type": "date"}
    }
  }
}

1.1.2. 带别名创建

PUT /your_index_name_2025
{
	"aliases": {
    "your_index_name": {}
  }
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1
  },
  "mappings": {
    "properties": {
      "title": {"type": "text"},
      "createTime": {"type": "date"}
    }
  }
}

1.2. 删除索引

1.2.1. 删除单个索引

DELETE /your_index_name

1.2.2. 批量删除索引

DELETE /your_index_name_*

1.3. 查看索引信息

1.3.1. 查看索引

GET /your_index_name/_settings

1.3.2. 查看索引映射

GET /your_index_name/_mapping

1.3.3. 查看所有索引

GET /_cat/indices?v

1.4. 修改索引信息

ES不允许直接修改已存在字段的映射类型，可以用如下方法进行处理

1.4.1. 创建新索引定义新映射

PUT /new_index_name
{
  "mappings": {
    "properties": {
      "balance": {
        "type": "double"  // 修改为新的类型
      },
      // 其他字段保持不变...
    }
  }
}

1.4.2. 迁移数据

POST /_reindex
{
  "source": {
    "index": "old_index_name"	// 旧索引名称
  },
  "dest": {
    "index": "new_index_name"	// 新索引名称
  }
}

1.4.3. 切换别名

如果旧索引没有使用别名，则用如下操作

// 需要先删除旧索引
DELETE /old_index_name

// 给新索引加上别名，别名为旧索引的名称
POST /_aliases
{
  "actions": [
    {
      "add": {
        "index": "new_index_name",
        "alias": "old_index_name"
      }
    }
  ]
}

如果旧索引有使用别名，则用如下操作

POST /_aliases
{
  "actions": [
    {
      "remove": {	// 移除旧索引的别名index_alias_name
        "index": "old_index_name",
        "alias": "index_alias_name"
      }
    },
    {
      "add": { // 给新索引加上别名index_alias_name
        "index": "new_index_name",
        "alias": "index_alias_name"
      }
    }
  ]
}

1.5. 已有索引添加新映射

PUT /your_index_name/_mapping
{
  "properties": {
    "newField": {	// 新字段映射
      "type": "text"
    }
  }
}

2. 别名管理命令

2.1. 索引添加别名

2.1.1. 在创建索引时直接设置别名

PUT /your_index_name
{
  "aliases": {
    "your_index_alias_name索引别名": {}
  },
  "settings": {
    // 您现有的settings配置
    ...
  },
  "mappings": {
    // 您现有的mappings配置
    ...
  }
}

2.1.2. 为已存在的索引添加别名

POST /_aliases
{
  "actions": [
    {
      "add": {
        "index": "your_index_name",
        "alias": "your_index_alias_name"
      }
    }
  ]
}

2.2. 索引删除别名

DELETE /index_name/_alias/alias_name

2.3. 查看别名对应的索引

GET /_alias/your_alias_name

3. 文档操作命令

3.1. 新增文档

POST /your_index_name/_doc/1000003
{
  "id": "1000003",
  "className": "16级XXX二班",
  "submitTime": "2024-05-05 12:04:11",
  "state": 1,
  "unitProvince": "420000000000",
  "unitCity": "429000000000",
  "unitArea": "429004000000"
}

3.2. 修改文档数据

POST /your_index_name/_update/1000003
{
  "doc": {
    "submitTime": "2025-04-01 21:15:05"
  }
}

3.3. 删除文档数据

DELETE /your_index_name/_doc/1000003

3.4. 数据查询

3.4.1. 范围查询

# 范围查询
POST data_index_name/_search
{
  "query": {
    "bool": {
      "filter": [
        {
          "range": {
            "createTime": {
              "gte": "2025-01-01 00:00:00",
              "lte": "2025-02-10 23:59:59"
            }
          }
        }
      ]
    }
  }
}

3.4.2. 前缀查询

# 前缀查询(带字母)
POST data_index_name/_search
{
  "query": {
    "prefix": {
      "fieldName.keyword": "CODE250506"
    }
  }
}

类似于SQL里的右模糊查询

select * from data_index_name where fieldName like 'CODE250506%';

3.4.3. 模糊查询

# 模糊查询
POST data_index_name/_search
{
  "query": {
    "wildcard": {
      "fieldName.keyword": "*CODE250506*"
    }
  }
}

类似于SQL里的全模糊查询

select * from data_index_name where fieldName like '%CODE250506%';

3.4.4. 混合模糊查询

POST data_index_name/_search
{
  "query": {
    "query_string": {
      "default_field": "fieldName",
      "query": "CODE*0506* OR CODE0506*"
    }
  }
}

类似于SQL里的组合模糊查询

select * from data_index_name where fieldName like 'CODE%0506%' or fieldName like 'CODE0506%';

3.5. 聚合查询

3.5.1. 统计总数

类比

select count(*) from table;

查询示例

POST /your_index_name/_search
{
  "size": 0, 
  "aggs": {
    "total_count": {
      "value_count": {
        "field": "id"
      }
    }
  }
}

结果示例

{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 113,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "total_count" : {
      "value" : 113	// 统计结果
    }
  }
}

3.5.2. 分组统计总数

类比

select unit_id, count(*) from table group by unit_id;

查询示例

POST /your_index_name/_search
{
  "size": 0, 
  "aggs": {
    "by_unitId": {
      "terms": {
        "field": "unitId",
        "size": 1
      }
    }
  }
}

结果示例

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 113,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "by_unitId" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 60,
      "buckets" : [
        {
          "key" : "96d428ff45644c9dae5dc08313467d86",
          "doc_count" : 53	// 单位Id为96d428ff45644c9dae5dc08313467d86的文档数据量结果
        }
      ]
    }
  }
}

3.5.3. 统计总平均数

类比

select avg(planVolume) from table;

查询示例

POST /your_index_name/_search
{
  "size": 0, 
  "aggs": {
    "avg_planVolume": {
      "avg": {
        "field": "planVolume"
      }
    }
  }
}

结果示例

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 113,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "avg_planVolume" : {
      "value" : 1318.9097345132743	// 平均数统计结果
    }
  }
}

3.5.4. 统计最大数

类比

select max(planVolume) from table;

查询示例

POST /your_index_name/_search
{
  "size": 0, 
  "aggs": {
    "max_planVolume": {
      "max": {
        "field": "planVolume"
      }
    }
  }
}

结果示例

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 113,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "max_planVolume" : {
      "value" : 50000.0	// 最大数统计结果
    }
  }
}

3.5.5. 统计最小数

类比

select min(planVolume) from table;

查询示例

POST /your_index_name/_search
{
  "size": 0, 
  "aggs": {
    "min_planVolume": {
      "min": {
        "field": "planVolume"
      }
    }
  }
}

结果示例

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 113,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "min_planVolume" : {
      "value" : 1.0	// 最小数统计结果
    }
  }
}

3.5.6. 综合统计信息

类比

select count(*), max(planVolume), min(planVolume), avg(planVolume), sum(planVolume) from table

查询示例

POST /your_index_name/_search
{
  "size": 0, 
  "aggs": {
    "stats_planVolume": {
      "stats": {
        "field": "planVolume"
      }
    }
  }
}

结果示例

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 113,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "stats_planVolume" : {	// 综合统计结果
      "count" : 113,	// 总文档数
      "min" : 1.0,		// 最小数
      "max" : 50000.0,// 最大数
      "avg" : 1318.91,// 平均数
      "sum" : 149036.8// 总和
    }
  }
}

3.5.7. 分组综合统计

查询示例

GET /your_index_name/_search
{
  "size": 0,  // 不返回原始文档，只返回聚合结果
  "query": {
    "bool": {
      "filter": [
        {
          "term": {  // state的精确匹配过滤
            "state": 1  // 过滤条件，替换为需要的state值
          }
        }
        // 可以添加其他过滤条件
      ]
    }
  },
  "aggs": {
    "group_by_unit": {
      "terms": {
        "field": "unitId",  // 按指定的字段进行分组
        "size": 100  // 返回的分组数量，根据需要调整
        "order": {
        	"total_volume": "desc"	// 按照每个unitId计算planVolume总和进行降序排序
        }
      },
      "aggs": {
        "total_volume": {
          "sum": {  // 计算planVolume总和
            "field": "planVolume"
          }
        },
        "data_doc_count": {
          "value_count": {  // 统计根据unitId进行分组后，每组unitId的文档数据量
            "field": "id"
          }
        },
        "volume_stats": {
          "stats": {  // 附加统计信息（可选）,会返回unitId组里的文档数量、planVolume的最大值、最小值、平均值、总和
            "field": "planVolume"
          }
        }
      }
    },
    "global_volume": {  // 全局总和（可选），会统计所有unitId组的planVolume之和
      "sum": {
        "field": "planVolume"
      }
    }
  }
}

结果示例

{
  "aggregations": {
    "group_by_unit": {
      "buckets": [
        {
          "key": "100001",  // unitId值
          "doc_count": 123,     // 该unitId下的文档数量
          "total_volume": {
            "value": 4567.89    // planVolume总和
          },
          "data_doc_count": {
            "value": 123	// 该unitId下的文档数量
          }
          "volume_stats": {     // 附加统计信息
            "count": 123,
            "min": 10.5,
            "max": 50.2,
            "avg": 37.13,
            "sum": 4567.89
          }
        },
        // 其他unitId分组...
      ]
    },
    "global_volume": {
      "value": 12345.67  // 所有匹配文档的总和
    }
  }
}

3.6. 分页查询

3.6.1. From/Size 分页（基础分页）

GET /your_index/_search
{
  "query": { "match_all": {} },
  "from": 0,   	// 起始位置
  "size": 10    // 每页大小
}

特点

优点：简单易用，适合小数据量分页
缺点：深度分页性能差（如 from=10000）
限制：from + size 不能超过 index.max_result_window（默认 10000）

适用场景

前几页的简单分页
数据量小于 10,000 条的场景

3.6.2. Search After 游标分页（深度分页推荐方案）

# 第一次查询（需要指定排序）
GET /your_index/_search
{
  "query": { "match_all": {} },
  "size": 10,
  "sort": [
    {"timestamp": "desc"},  
    {"_id": "asc"}          // 必须包含唯一性字段,确保排序唯一
  ]
}

# 后续查询（使用上次最后一条的sort值）
GET /your_index/_search
{
  "query": { "match_all": {} },
  "size": 10,
  "search_after": [         // 使用上次结果的sort值
    "2023-05-20 12:00:00",
    "doc123"
  ],
  "sort": [
    {"timestamp": "desc"},
    {"_id": "asc"}
  ]
}

特点

优点：适合深度分页，性能稳定
缺点：必须指定稳定排序，不能随机跳页
无限制 ：不受 max_result_window 限制

适用场景

大数据量的深度分页（如导出所有数据）
无限滚动（Infinite Scroll）类应用

3.6.3. 聚合查询分页

不分页的聚合查询为

POST /your_index_name/_search
{
  "size": 0,
  "aggs": {
    "by_unitId": {
      "terms": {
        "field": "unitId",
        "size": 10
      }
    }
  }
}

分页的聚合查询为

第一页查询

POST /your_index_name/_search
{
  "size": 0,
  "aggs": {
    "by_unitId": {
      "terms": {
        "field": "unitId",
        "size": 10,
        "order": {
          "_key": "asc"  // 或 "_count": "desc" 按数量排序
        }
      }
    }
  }
}

注意：如果你要分页，建议显式设置 "order"，否则默认是按 _count 降序。为了稳定分页，推荐使用 _key 排序。

第二页查询

如果第一页最后一条bucket数据的key是“1000001”（也就是第一页最后一条数据的unitId），则第二页请求语句如下

POST /your_index_name/_search
{
  "size": 0,
  "aggs": {
    "by_unitId": {
      "terms": {
        "field": "unitId",
        "size": 10,
        "order": {
          "_key": "asc"
        },
        "include": {
          "after": {
            "term": "1000001"
          }
        }
      }
    }
  }
}