线型回归分析（csv文件的存储，修改，读取构造新数据）

# hanbb
# come on!!!
import pandas as pd
import urllib
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt

es_url = 'http://www.stoxx.com/download/historical_values/hbrbcpe.txt'
vs_url = 'http://www.stoxx.com/download/historical_values/h_vstoxx.txt'

# urllib.request.urlretrieve(es_url,'E:\downloades.txt')
# urllib.request.urlretrieve(vs_url,'E:\downloadhs.txt')

lines = open('E:\downloades.txt','r').readlines()
lines = [line.replace(' ','') for line in lines]

# print(lines[:6])

# for line in lines[3883:3890]:
#     print(line[41:])

'''
new_file = open('E:\downloades50.txt','w')
new_file.writelines('data'+lines[3][:-1]+';DEL'+lines[3][-1])
new_file.writelines(lines[4:-1])
new_file.close()

new_lines = open('E:\downloades50.txt','r').readlines()
print(new_lines[:5])
'''


es = pd.read_csv('E:\downloades50.txt',index_col=0,parse_dates=True,sep=';',dayfirst=True)
del es['DEL']
print(es.tail())              # 原始值
# print(np.round(es.tail()))    # np.round(四舍五入)


'''
# 直接读取
cols = ['SX5P',    'SX5E',   'SXXP',   'SXXE' ,  'SXXF'  , 'SXXA' ,   'DK5F'  , 'DKXF']
es = pd.read_csv(es_url,index_col=0,parse_dates=True,sep=';',dayfirst=True,header=None,skiprows=4,names=cols)
print(es.tail())
'''

vs=pd.read_csv('E:\downloadhs.txt',index_col=0,header=2,parse_dates=True,dayfirst=True,sep=',')
print(vs.tail())

# 数据构造
data = pd.DataFrame({'EUROSTOXX':es['SX5E'][es.index>dt.datetime(1999,1,1)]})
data = data.join(pd.DataFrame({'VSTOXX':vs['V2TX'][vs.index>dt.datetime(1999,1,1)]}))
data = data.fillna(method='ffill')
print(data.head())
print(data.tail())
data.plot(subplots=True,grid=True,style="b",figsize=(8,6))
plt.show()