隐马尔科夫模型的Python3实现代码

下面给出计算隐马尔科夫模型的编程代码：
from hmmlearn.hmm import GaussianHMM
import datetime
import numpy as np
from matplotlib import cm, pyplot as plt
import matplotlib.dates as dates
import pandas as pd
import seaborn as sns#导入模块

beginDate = '20100401'
endDate = '20160401'
data=DataAPI.MktIdxdGet(ticker='000001',beginDate=beginDate,endDate=endDate,field=['tradeDate','closeIndex','lowestIndex','highestIndex','turnoverVol'],pandas="1")
data1=DataAPI.FstTotalGet(exchangeCD=u"XSHE",beginDate=beginDate,endDate=end
Date,field=['tradeVal'],pandas="1")#深圳交易所融资融券信息,'tradeval'是指当日 融资融券余额
data2=DataAPI.FstTotalGet(exchangeCD=u"XSHG",beginDate=beginDate,endDate=end
Date,field=['tradeVal'],pandas="1")#上海交易所融资融券信息,'tradeval'是指当日 融资融券余额
tradeVal = data1 + data2 #数据汇总
tradeDate = pd.to_datetime(data['tradeDate'][5:])
volume = data['turnoverVol'][5:] #成交量
closeIndex = data['closeIndex']#收盘价
deltaIndex=(np.log(np.array(data['highestIndex']))-np.log(np.array(data['lowestIndex'])))[5:]
logReturn1 = np.array(np.diff(np.log(closeIndex)))[4:]#取对数收益率
logReturn5=np.log(np.array(closeIndex[5:]))- np.log(np.array(closeIndex[:-5]))#5 日对数收益率
logReturnFst = (np.array(np.diff(np.log(tradeVal['tradeVal']))))[4:]
closeIndex = closeIndex[5:]#收盘价截取


from scipy import stats # To perform box-cox transformation
from sklearn import preprocessing #导入模块
boxcox_volume,lmbda = stats.boxcox(volume/100000000000)
# Standardize the observation sequence distribution
rescaled_boxcox_volume=preprocessing.scale(boxcox_volume,axis=0, with_mean=True,
with_std=True, copy=False)#数据处理
boxcox_deltaIndex,lmbda = stats.boxcox(deltaIndex)
# Standardize the observation sequence distribution
rescaled_boxcox_deltaIndex=preprocessing.scale(boxcox_deltaIndex, axis=0,
with_mean=True, with_std=True, copy=False)
X=np.column_stack([logReturn1,logReturn5,rescaled_boxcox_deltaIndex,rescaled_boxco
x_volume,logReturnFst])#数据整合
# Make an HMM instance and execute fit
model=GaussianHMM(n_components=5,covariance_type="diag", n_iter=10000).fit([X])
# Predict the optimal sequence of internal hidden state#计算模型
hidden_states = model.predict(X)#估计状态
print("Transition matrix")
print(model.transmat_)
print()
#print("Means and vars of each hidden state")
for i in range(model.n_components):
    print("{0}th hidden state".format(i))
    print("mean = ", model.means_[i])
    print("var = ", np.diag(model.covars_[i]))
plt.figure(figsize=(15, 8))
plt.title('hidden states')
for i in range(model.n_components):
    idx = (hidden_states==i)
plt.plot_date(tradeDate[idx],closeIndex[idx],'.',label='%dth hidden state'%i,lw=1)
plt.legend()
plt.grid(True)
model.transmat_


import xlwt
import xlrd
wb3=xlwt.Workbook()
wb3.add_sheet('first',cell_overwrite_ok=True)
ws_1=wb3.get_sheet(0)
for r in range(model.transmat_.shape[0]): #行数
    for c in range(model.transmat_.shape[1]): #列数
        ws_1.write(r,c,model.transmat_[r,c])
wb3.save('文件夹 0/上证指数转移概率矩阵.xls')

#整合数据
res=pd.DataFrame({'tradeDate':tradeDate,'logReturn1':logReturn1,'logReturn5':logReturn
5,'volume':volume,'zstate':hidden_states}).set_index('tradeDate')
plt.figure(figsize=(15, 8))
plt.xlabel('time')
plt.ylabel('earninigs multiplier')
for i in range(model.n_components):
   idx = (hidden_states==i)
   idx = np.append(0,idx[:-1])#获得状态结果后第二天进行买入操作
   #fast factor backtest
   df = res.logReturn1
   res['sig_ret%s'%i] = df.multiply(idx,axis=0)
   plt.plot(np.exp(res['sig_ret%s'%i].cumsum()),label='%dth hidden state'%i)
plt.legend()
plt.grid(1)