ES数据导入导出

ES数据导入导出

 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
'''
    Export and Import ElasticSearch Data.
    Simple Example At __main__
    @author: wgzh159@163.com
    @note:  uncheck consistency of data, please do it by self
'''
 
import json
import os
import sys
import time
import urllib2
 
reload(sys)
sys.setdefaultencoding('utf-8'# @UndefinedVariable
 
class exportEsData():
    size = 10000
    def __init__(self, url,index,type):
        self.url = url+"/"+index+"/"+type+"/_search"
        self.index = index
        self.type = type
    def exportData(self):
        print("export data begin...")
        begin = time.time()
        try:
            os.remove(self.index+"_"+self.type+".json")
        except:
            os.mknod(self.index+"_"+self.type+".json")
        msg = urllib2.urlopen(self.url).read()
        print(msg)
        obj = json.loads(msg)
        num = obj["hits"]["total"]
        start = 0
        end =  num/self.size+1
        while(start<end):
            msg = urllib2.urlopen(self.url+"?from="+str(start*self.size)+"&size="+str(self.size)).read()
            self.writeFile(msg)
            start=start+1
        print("export data end!!! total consuming time:"+str(time.time()-begin)+"s")
    def writeFile(self,msg):
        obj = json.loads(msg)
        vals = obj["hits"]["hits"]
        try:
            f = open(self.index+"_"+self.type+".json","a")
            for val in vals:
                a = json.dumps(val["_source"],ensure_ascii=False)
                f.write(a+" ")
        finally:
            f.flush()
            f.close()
 
class importEsData():
    def __init__(self,url,index,type):
        self.url = url+"/"+index+"/"+type
        self.index = index
        self.type = type
         
    def importData(self):
        print("import data begin...")
        begin = time.time()
        try:
            f = open(self.index+"_"+self.type+".json","r")
            for line in f:
                self.post(line)
        finally:
            f.close()
        print("import data end!!! total consuming time:"+str(time.time()-begin)+"s")
    def post(self,data):
        req = urllib2.Request(self.url,data,{"Content-Type":"application/json; charset=UTF-8"})
        urllib2.urlopen(req)
 
if __name__ == '__main__':
    '''
        Export Data
        e.g.
                            URL                    index        type
        exportEsData("http://10.100.142.60:9200","watchdog","mexception").exportData()
         
        export file name: watchdog_mexception.json
    '''
    #exportEsData("http://10.100.142.60:9200","watchdog","mexception").exportData()
    exportEsData("http://10.100.142.60:9200","watchdog","mexception").exportData()
     
     
    '''
        Import Data
         
        *import file name:watchdog_test.json    (important)
                    "_" front part represents the elasticsearch index
                    "_" after part represents the  elasticsearch type
        e.g.
                            URL                    index        type
        mportEsData("http://10.100.142.60:9200","watchdog","test").importData()
    '''
    #importEsData("http://10.100.142.60:9200","watchdog","test").importData()
    importEsData("http://10.100.142.60:9200","watchdog","test").importData()
原文地址:https://www.cnblogs.com/timssd/p/7407465.html