elasticsearch之python操作

　　总结使用python对于elasticsearch的常用操作

安装

pip  install elasticsearch

　　2. 连接

from elasticsearch import Elasticsearch

es = Elasticsearch([{'host':'49.232.6.227' , 'port':9200}], timeout=3600)

# 添加验证
# http_auth=('xiao', '123456')

es = Elasticsearch([{'host':'49.232.6.227' , 'port':9200}], http_auth=http_auth, timeout=3600)

　　3. 查询

1）全部查询

query = {
    'query': {
        'match_all': {}
    }
}

result = es.search(index=account_index, body=query)

for row in result['hits']['hits']:
    print(row)

2）term 过滤--term主要用于精确匹配哪些值，比如数字，日期，布尔值或 not_analyzed 的字符串(未经切词的文本数据类型)

query = {
    "query": {
        "term":{
            'age': 32
        }
    }
}
result = es.search(index="megacorp", body=query)
print(result)
# first_name 可能经过切词了
query = {
    "query": {
        "term":{
            'first_name': 'Jane'
        }
    }
}
result = es.search(index="megacorp", body=query)
print(result)

3）terms 过滤--terms 跟 term 有点类似，但 terms 允许指定多个匹配条件。如果某个字段指定了多个值，那么文档需要一起去做匹配

query = {
    'query': {
        'terms': {
            'name': ['111111', '22222']
        }
    }
}

4）查询文档中是否某个字段

query = {
    'query': {
        'exists': {
            'field': 'age'
        }
    }
}

5）布尔值

bool 过滤--合并多个过滤条件查询结果的布尔逻辑

must :: 多个查询条件的完全匹配,相当于 and。
must_not :: 多个查询条件的相反匹配，相当于 not。
should :: 至少有一个查询条件匹配, 相当于 or。

query = {
    'query': {
        'bool': {
            'must': {
                'term': {"_score": 1.0},
                'term': {'name': 'lanlang'}
            }
        }
    }
}

# 匹配name为lanlang 并且没有age字段的记录

query = {
    'query': {
        'bool': {
            'must': {
                'term': {
                    'name': 'lanlang'
                }
            },
            'must_not': {
                'exists': {
                    'field': 'age'
                }
            }
        }
    }
}

6）范围查找

gt : 大于
gte : 大于等于
lt : 小于
lte : 小于等于

query = {
    'query': {
        'range': {
            'age': {
                'lt': 10
            }
        }
    }
}

7）match标准查询

# 做精确匹配搜索时，你最好用过滤语句，因为过滤语句可以缓存数据。
# match查询只能就指定某个确切字段某个确切的值进行搜索，而你要做的就是为它指定正确的字段名以避免语法错误。
query = {
    "query": {
        "match": {
            "about": "rock"
        }
    }
}

8）multi_match 查询--match查询的基础上同时搜索多个字段，在多个字段中同时查一个

query = {
    'query': {
        'multi_match': {
            'query': 'lanlang',
            'fields': ['name','wife']
        }
    }
}

9 ）wildcards 查询--使用标准的shell通配符查询

query = {
    'query': {
        'wildcard': {
            'name': 'lan*'
        }
    }
}

10 ）regexp查询

query = {
    "query": {
        "regexp": {
            "about": ".a.*"
        }
    }
}

11）prefix 以什么开头

query = {
    'query': {
        'prefix': {
            'name': 'lan'
        }
    }
}

12）短语匹配(Phrase Matching) -- 寻找邻近的几个单词

query = {
    "query": {
        "match_phrase": {
            "about": "I love"
        }
    }
}

13）统计查询

query = {
    "query": {
        "match_phrase": {
            "about": "I love"
        }
    }
}
result = es.count(index="megacorp", body=query)

{'count': 4, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}}

　　4. 插入数据

1）不指定ID

# body = {
#     'name': 'xing',
#     'age': 9,
#     'sex': 0,
#     'wife': 'maomao'
# }

# result = es.index(index=account_index, body=body)

2）指定ID

es.index(index="megacorp",id=4,body={"first_name":"xiao","last_name":"wu", 'age': 66, 'about': 'I love to go rock climbing', 'interests': ['sleep', 'eat']})

　　5. 删除数据

1）指定ID删除

id = '5DhJUHEBChSA6Z-1wbVW'

ret = es.delete(index=account_index, id=id)

2）根据查询条件删除

query = {
    "query": {
        "match": {
            "first_name": "xiao"
        }
    }
}
result = es.delete_by_query(index="megacorp", body=query)

　　6. 更新

1）指定ID更新

id = '5ThEVXEBChSA6Z-1OrVA'

# 删除字段
doc_body = {
    'script': 'ctx._source.remove("wife")'
}

ret = es.update(index=account_index, id=id, body=doc_body)
print(ret)

# 增加字段   
doc_body = {
    'script': "ctx._source.address = '合肥'"
}


# 修改部分字段

doc_body = {
    'doc': {'name': 'xing111'}
}

2）满足条件进行更新

query = {
    "query": {
        "match": {
            "last_name": "xiao"
        }
    },
    "script":{
        "source": "ctx._source.last_name = params.name;ctx._source.age = params.age",
        "lang": "painless",
        "params" : {
            "name" : "wang",
            "age": 100,
        },  
    }

}
result = es.update_by_query(index="megacorp", body=query)