之前用 R 语言一直感觉 .Rdata 格式的文件很好用,可以把每次执行的中间文件保存便于下次调用,刚熟悉 Python 还没接触这块知识,所以有时候做项目不太顺手,索性上网搜了下,整理如下:
模型存档
############# # joblib 库 from sklearn.linear_model import LogisticRegression from sklearn.externals import joblib # 模型保存 lr_model = LogisticRegression() joblib.dump(lr_model, 'xx.model') # 模型载入 lr_model = joblib.load('xx.model') ############# # pickle 库 # 模型保存 import pickle with open('lr_model.pickle', 'wb') as fp: pickle.dump(lr_model, fp) # 模型载入 with open('lr_model.pickle', 'rb') as fp: lr_model = pickle.load(fp)
命令行参数解析
import argparse # 定义参数解析 def build_arg_parser(): parser = argparse.ArgumentParser(description='Trains the classifier') parser.add_argument("--feature-map-file", dest="feature_map_file", required=True, help="Input pickle file containing the feature map") parser.add_argument("--model-file", dest="model_file", required=False, help="Output file where the trained model will be stored") return parser args = build_arg_parser().parse_args() feature_map_file = args.feature_map_file model_file = args.model_file
对象保存
############# # pickle 库 import pickle x, y = 1, range(10) with open('xx.pickle', 'wb') as fp: # 通过传递protocol = -1到dump()来减少文件大小 pickle.dump([x, y], fp) # 对象载入 with open('xx.pickle', 'rb') as fp: x, y = pickle.load(fp) print(x) ############# # _pickle 库 # 对象保存 import _pickle as cpickle x, y = 1, range(10) with open('xx.pickle', 'wb') as fp: # 通过传递protocol = -1到dump()来减少文件大小 cpickle.dump([x, y], fp) # 对象载入 del x, y with open('xx.pickle', 'rb') as fp: x, y = cpickle.load(fp) print(x) ############ # dill 库 import dill # 文件保存 filename = 'globalsave.pkl' dill.dump_session(filename) # 文件载入 dill.load_session(filename) ############ # 其他库如:pmml,shelve