Es 之旅 第一站


ElasticSearch

ES 之 增删改查

GET _search
{
  "query": {
    "match_all": {}
  }
}

# 添加doc文本数据
PUT ttt/doc/1
{
  "name":"ttt"
}

# 获取数据  格式:数据库/类型/索引
GET ttt/doc/2

PUT dengshttt 
{
  "name":"zz",
  "age":19
}

# 获取所有数据doc 文本数据
GET dengshuai/dottt


# 指定id删除
DELETE dengshuai/doc/2
tttengshuai


#---------ttt查CRUD---------------# 

### 新增 格式: 数据库/类型/索引
PUT s23/doc/1
{
  "name":"帅",
  "age":24,
  "sex":"man",
  "desc":"笨",
  "tag":["睡觉"],
  "both":"1997-11-16"
}

PUT s23/doc/2
{
  "name":"亚伟",
  "age":26,
  "sex":"woman",
  "desc":"吃",
  "tag":["打游戏"],
  "both":"1995-11-16"
}

PUT s23/doc/3
{
  "name":"崽崽",
  "age":9,
  "sex":"woman",
  "desc":"白",
  "tag":["代码"],
  "both":"1992-11-16"
}

PUT s23/doc/4
{
  "name":"崽1",
  "age":25,
  "sex":"woman",
  "desc":"白",
  "tag":["代码"],
  "both":"1993-11-16"
}
PUT s23/doc/5
{
  "name":"崽2",
  "age":27,
  "sex":"woman",
  "desc":"白",
  "tag":["代码"],
  "both":"1993-11-16"
}


#### 查 
# 查所有   格式: 数据库/格式/_search/ 
GET s23/doc/_search/

#  查一个  格式: 数据库/类型/索引/
GET s23/doc/1




### 修改  格式: 数据库/类型/索引id/_update
POST s23/doc/1/_update
{
 "doc":{
   "tag":["王者","手机"]
}
  
}



### 删除
# 删一个  格式: 数据库/类型/索引
DELETE s23/doc/3
# 删所有 ,删库
DELETE s23




#### 字符串 条件查询

GET s23/doc/_search?q=age:24&sex:man


ES 之 DSL 查询

# Query DSL 灵活查询
### DSL 查询, Query DSL 灵活查询

### DSL 之 match 系列查询 ,match 返回所有匹配的分词.

GET s23/doc/_search
{
  "query": {
    "match": {
      "age": "24"
    }
  }
}


### DSL 之 matchall,查询全部 
GET s23/doc/_search
{
  "query": {
    "match_all": {
      
    }
  }
}


### DSL 之 sort排序查询 ,  注意:无法按照字符串查询
GET s23/doc/_search
{
  "query": {
    "match_all": {
      
    }
  },
  "sort": [
    {
      "age": {
        "order": "desc"
      }
    }
  ]
}



### from/size 可以做简单分页
# from 从第几条开始获取, 超出返回空列表
# size 获取多少条
GET s23/doc/_search
{
  "query": {
    "match_all": {}
  },
  "from":0,
  "size": 2 ,
  "sort": [
    {
      "age": {
        "order": "desc"
      }
    }
  ]
  
  
}

### source , 过滤字段
GET s23/doc/_search
{
  "query": {
    "match_all": {}
  },
  "sort": [
    {
      "age": {
        "order": "desc"
      }
    }
  ],
  "_source": ["name","age"]
}



### HEAD 操作 , 查看状态
HEAD s23   ===》 200

ES 访问地址:

http://127.0.0.1:9200/

K8 访问地址

http://127.0.0.1:5601/app/kibana

ES 之 进阶操作

# 1. 短语查询  match_phrase
# 2. 分词   match
# 3. 前缀查询   match_phrase_prefix
# 4. 多字段查询  multi_match
# 5. 高亮查询   highlight
# 6. 聚合函数  sum avg
# 7. mapping 映射
# 8. settings 配置

# -------- match_phrase 短语查询

GET s23/doc/_search


GET s23/doc/_search
{
  "query": {"match": {
    "desc"  : "白"
  }}
}


PUT a1/doc/3
{
  "title":"美国和中国是好邻居"
}


### match 会将词语分词,只要符合条件就返回
GET a1/doc/_search
{
  "query": {
    "match": {
    "title": "中国的首都"
  }
    
  }
}

### match_phrase  短语查询,
# slop 是控制 字符之间的间隔,默认是1
GET a1/doc/_search
{
  "query": {
    "match_phrase": {
      "title": {
        "query": "中国的首都",
        "slop": 1 
      }
    }
  }
}


### match_phrase_prefix  词组最左前缀查询,根据短语最后一个词查询

PUT a2/doc/2
{
  "title":"you love you"
}
GET a2/doc/_search
{
  "query": {
    "match_phrase_prefix": {
      "title": "lo"
    }
  }
}


### multi_match , 多字段查询
# query: 放置查询的内容,
# fields: 字段
GET a2/doc/_search
{
  "query": {
    "multi_match": {
      "query": "i",
      "fields": ["title"]
    }
  }
}





### ------高亮查询--------


PUT t3/doc/2
{
  "title":"我的同学是李四"
}

GET t3/doc/_search
{
  "query": {
    "match_all": {}
  }
}

# 1. 高亮 highlight , 默认是em标签
GET t3/doc/_search
{
  "query": {
    "match": {
      "title": "同学"
    }
  },
  "highlight": {
    "fields": {"title": {}}
  }
}
# 2. 自定义高亮 ,pre_tags和post_tags
GET t3/doc/_search
{
  "query": {
    "match": {
      "title": "同学"
    }
  },
  "highlight": {
    "pre_tags": "
", "post_tags": "
", "fields": {"title": {}} } } # --------聚合函数---------- # aggs # name是自定义标题 # 类型 , 字段 # avg 平均 GET s23/doc/_search { "query": { "match_all": { } }, "aggs": { "my_avg_age": { "avg": { "field": "age" } } } } # sum 求和 GET s23/doc/_search { "query": { "match_all": { } }, "aggs": { "my_sum_age": { "sum": { "field": "age" } } } } # range 范围 # from 闭合 # to 不闭合 GET s23/doc/_search { "query": { "match_all": {} }, "aggs": { "my_range": { "range": { "field": "age", "ranges": [ { "from": 30, "to": 35 }, { "from": 0, "to":10 }, { "from": 10, "to":30 } ] } } } } # ------mapping映射(如何存储和索引的过程) :自定义表结构----- GET s23 # 映射爆炸: 字段索引太多,导致内存溢出 # 1. 自定义 表结构 PUT t1 { "mappings": { "doc":{ "properties":{ "name":{ "type":"text" }, "age":{ "type":"long" } } } } } GET t1 # dynamic:true 动态映射,没有的字段,自动补充类型 DELETE t2 PUT t2 { "mappings": { "doc":{ "dynamic":true, "properties":{ "t1":{ "type":"text" }, "t2":{ "type":"text" } } } } } PUT t2/doc/1 { "t1":"大撒旦撒旦所", "t2":"都是" } PUT t2/doc/2 { "t1":"大撒旦撒旦所", "t2":"都是", "t3":"alex" } GET t2/doc/_search { "query": { "match": { "t3": "alex" } } } # dynamic:false 静态映射 : 忽略未定义的字段,但是还是会存储数据 PUT t4 { "mappings": { "doc":{ "dynamic":false, "properties":{ "t1":{ "type":"text" }, "t2":{ "type":"text" } } } } } PUT t4/doc/1 { "t1":"大撒旦撒旦所", "t2":"都是" } PUT t4/doc/2 { "t1":"大撒旦撒旦所", "t2":"都是", "t3":"alex" } PUT t4/doc/3 { "t3":"ale" } GET t4/doc/3 GET t4/doc/_search { "query": { "match": { "t3": "alex" } } } # dynamic : strict 严格的映射类型 DELETE t5 PUT t5 { "mappings": { "doc":{ "dynamic":"strict", "properties":{ "t1":{ "type":"text" }, "t2":{ "type":"text" } } } } } PUT t5/doc/1 { "t1":"大撒旦撒旦所", "t2":"都是" } PUT t5/doc/2 { "t1":"大撒旦撒旦所", "t2":"都是", "t3":"alex" } PUT t5/doc/3 { "t3":"ale" } GET t5/doc/1 GET t5/doc/_search { "query": { "match": { "t3": "alex" } } } # PUT s23/doc/6 { "name":"alex", "age":33, "desc":"dsb", "tag":["鸡汤"] } PUT s23/doc/7 { "name":"wusir", "age":33, "desc":"wem", "tag":["出风机"] } GET s231/doc/_search { "query": { "match_all": {} } } #### ----------Bool 查询-------- # 1. must and # 2. should or # 3. must_not ! 非 # 4. filter 过滤筛选 # 1. 年龄是33,姓名是wusir GET s23/doc/_search { "query": { "bool": { "must": [ { "match": { "age": "33" } },{ "match": { "name": "wusir" } } ] } } } # 2. 年龄是23 ,或者姓名是wusir GET s23/doc/_search { "query": { "bool": { "should": [ { "match": { "age": "33" } },{ "match": { "name": "wusir" } } ] } } } # 3. 年龄既不是33,姓名也不是wusir GET s23/doc/_search { "query": { "bool": { "must_not": [ { "match": { "age": "33" } },{ "match": { "name": "wusir" } } ] } } } # 4. 查询年龄 大于30的 # 伪代码 :bool -> filter -> range -> 字段 # gt 大于 gte 大于等于 # lt 小于 lte 小于等于 GET s23/doc/_search { "query": { "bool": { "filter": { "range": { "age": { "lte": 33 } } } } } } # 5. should 和 filter 不推荐一起使用,查询内容会出现匹配失败的问题 ## ----- index ---- # index 为真,可以存储.false 则不能存储 PUT a7 { "mappings":{ "doc":{ "properties":{ "t1":{ "type":"text", "index":true }, "t2":{ "type":"text", "index":false } } } } } # put index .eg.1 PUT a7/doc/1 { "t1":"a", "t2":"b" } # get index # t2 字段查询不到 GET a7/doc/_search { "query": { "match": { "t2": "b" } } } # ------copy_to------ # 将字段 t1 的值 拷贝到f1中 DELETE a8 GET a8 PUT a8 { "mappings": { "doc":{ "properties":{ "t1":{ "type":"text", "copy_to":"f1" }, "t2":{ "type":"text", "copy_to":"f1" }, "f1":{ "type":"text" } } } } } PUT a8/doc/1 { "t1":"里斯", "t2":"张三" } GET a8/doc/_search { "query": {"match": { "t2": "张三" }} } GET a8/doc/_search { "query": { "match": { "f1": "张三" } } } #-----嵌套设计表结构mappings------ PUT a9 { "mappings": { "doc":{ "properties":{ "name":{ "type":"text" }, "age":{ "type":"long" }, "info":{ "properties":{ "addr":{ "type":"text" }, "tel":{ "type":"long" } } } } } } } PUT a10/doc/1 { "name":"ttt", "age":"23", "info":{ "addr":"Dsahjkd", "tel":"12321312 " } } GET a10 ## ----settings----- # number_of_shards : 主分片 # number_of_replicas :复制分片 PUT a11 { "settings": { "number_of_shards":3, "number_of_replicas":1 } } GET a11 PUT m5 { "mappings": { "doc": { "dynamic":false, "properties": { "first_name":{ "type": "text", "copy_to": "full_name" }, "last_name": { "type": "text", "copy_to": "full_name" }, "full_name": { "type": "text" } } } } } PUT m5/doc/1 { "first_name":"tom", "last_name":"ben" } PUT m5/doc/2 { "first_name":"john", "last_name":"smith" } GET m5/doc/_search { "query": { "match": { "first_name": "tom" } } } GET m5/doc/_search { "query": { "match": { "full_name": "john" } } } #--------ignore_above---------- # 长度超过 ignore_avove设置的字符串长度. 将不会被索引和存储 PUT w1 { "mappings": { "doc":{ "properties":{ "t1":{ "type":"keyword", "ignore_above":5 }, "t2":{ "type":"keyword", "ignore_above":10 } } } } } PUT w1/doc/1 { "t1":"deng", "t2":"dengshuainb" } GET w1/doc/_search { "query": { "match_all": {} } } # 1. 超过 ignore_above 定义长度的字段,值将不会被创建索引 GET w1/doc/_search { "query": { "match": { "t1": "dengshuainb" } } }

ES 之 分词

#### ik 分词器  
# ik_max_word 最细力度
# ik_smart 最粗力度

POST _analyze
{
  "analyzer": "ik_max_word",
  "text": "To be or not to be, ---莎士比亚"
}

POST _analyze
{
  "analyzer": "ik_smart",
  "text": "上海自来水来自海上"
}
POST _analyze
{
  "analyzer": "ik_max_word",
  "text": "上海自来水来自海上"
}

Python操作 ES

# 模块安装
pip3 install elasticsearch


# 实例化 es 对象
from elasticsearch import Elasticsearch

es = Elasticsearch()

"""
### es 集群
Elasticsearch(
["192.168.0.1","192.168.0.2","192.168.0.3"], # 集群列表
sniff_on_start=True,  # 连接前测试
sniff_on_connection_fail=True, # 节点无响应时,刷新节点
sniffer_timeout=60, # s设置超时时间
ignore=400 # 设置忽略状态码

)
"""

基础操作

### 1. search 查询
 
# filter_pat参数  结果过滤
    # hits.hits 是 第二层结果
    # 背后技术 jsonpath

body = {
    "query":{
        "match":{
            "age":19
        }
    }
}
print(
    es.search(index='dengshuai',body=body)
)
print(
    es.search(index='dengshuai',body=body,filter_path=['hits.hits','hits.total'])
)

print(
    es.search(index='dengshuai',body=body,filter_path=['hits.*'])
)


ES 对象操作

# get  查不到会报错 , (不常用)
print(es.get(index='dengshuai',doc_type='doc',id=2))
# get 报错
# print(es.get(index='dengshuai',doc_type='doc',id=3))

# index 创建索引, 索引存在就更新,不存在就创建
print(es.index(index='b2',doc_type='doc',id=1,body={
    "name":"zhangsan",
    "desc":"hh"
}))
print(es.index(index='b2',doc_type='doc',id=2,body={
    "name":"lisi",
    "desc":"jj"
}))

# get_source 直接返回成字典数据
print(es.get_source(index='b2',doc_type='doc',id=1))

# count() 统计数量
print(es.count(index='b2',doc_type='doc'))


# create() 创建索引 , 只创建 ,不更新
print(es.create(index='b3',doc_type='doc',id=1,body={
    'name':"dsads",
    "desc":"dddd"
}))

# delete() 删除
print(es.delete(index='b3',doc_type='doc',id=1))

# delete_by_query() 删除符合条件的
    # 备注: body 必须符合 es 语法
body = {
    "query":{
        "match":{
            "name": "zhangsan",
        }
    }
}
print(es.delete_by_query(index='b2', body=body))

# exists() 是否存在
print(es.exists(index='b2',doc_type='doc',id=2))

# info 返回集群的信息
print(es.info())

# ping()  是否能否ping通集群
print(es.ping())



ES Indices 索引操作

######### Indices 索引操作
# indices.get_mapping() 返回mapping 映射信息
print(es.indices.get_mapping(index="b2"))

# indices.get_settings() 返回 settings 配置信息
print(es.indices.get_settings(index='b2'))

# get 获取 mapping 和setting 信息
print(es.indices.get(index="b2"))

# exists 是否存在 索引库
print(es.indices.exists(index='b4'))

# create 创建索引库, 创建mapping信息,setting信息.  ******* 必须会
body={}
print(es.indices.create(index='b4',body=body))
print(es.indices.create(index='b5',body={
    "mappings":{
        "doc":{
            "properties":{
                "name":{
                    "type":"text",
                }
            }
        }
    }
}))



# delete() 删除
print(es.indices.delete(index='b5'))

# close ()  关闭索引库, 深夜 维护
print(es.create(index='b6',doc_type='doc',id=1,body={
    "name":"123",
}))

print(es.get(index='b6',doc_type="doc",id=1))
print(es.indices.close(index='b6'))


# open() 开启索引
print(es.indices.open("b6"))
time.sleep(1)
print(es.get(index='b6',doc_type="doc",id=1))


# analyze() 分析
print(es.indices.analyze(body={
    "analyzer":"ik_smart",
    "text":"上海自来水来自海上"
}))



# #  cat.health 查看 集群是否是健康的
print(es.cat.health(format="json"))
print(es.cat.health())