[Python] 练习代码

# from random import randrange
# num = int(input('摇几次骰子: '))
# sides=int(input('筛子有几个面: '))
# sum=0
# for i in range(num):
#     sum+= randrange(sides)+1
# print('最终的点数和是 ',sum,'平均点数是:',sum/num)



# from random import shuffle
# from pprint import pprint
# values=list(range(1,11))+'Jack Queen King'.split()  #并入列表中
# card_suits='diamonds clubs hearts spades'.split()
# value_suit=['{} of {}'.format(v,c) for v in values for c in card_suits]
# shuffle(value_suit)  #打乱顺序
# pprint(value_suit[:12])
# while value_suit:
#     input(value_suit.pop())



f=open('a123.txt','a')
f.write('hello aaaaaaaaaaaaadddddddddddddddddd')
f.close()

f=open('a123.txt','r')
for i in range(10):
    print(f.readline(),end='')


f = open('a123.txt','a')
f.write('This
is no
haikou')
f.close()



def process(string):
    print('处理中...',string)
    

# with open('a123.txt','r') as f:
#     while True:
#         line=f.readline()
#         if not line:
#             break
#         process(line)
with open('a123.txt','r') as f:
    for line in f:
        process(line)

with open('a123.txt','r') as f:
    for line in f.readlines():
        process(line)
        
        
        



def triangles():
    row = [1]
    while True:
        yield(row)
        row = [1] + [row[k] + row[k + 1] for k in range(len(row) - 1)] + [1]
n = 0
results = []
for t in triangles():
    print(t)
    results.append(t)
    n = n + 1
    if n == 10:
        break
if results == [
    [1],
    [1, 1],
    [1, 2, 1],
    [1, 3, 3, 1],
    [1, 4, 6, 4, 1],
    [1, 5, 10, 10, 5, 1],
    [1, 6, 15, 20, 15, 6, 1],
    [1, 7, 21, 35, 35, 21, 7, 1],
    [1, 8, 28, 56, 70, 56, 28, 8, 1],
    [1, 9, 36, 84, 126, 126, 84, 36, 9, 1]
]:
    print('测试通过!')
else:
    print('测试失败!')
    
    
' a test module '

__author__ = 'Michael Liao'

import sys

def test():
    args = sys.argv
    if len(args)==1:
        print('Hello, world!')
    elif len(args)==2:
        print('Hello, %s!' % args[1])
    else:
        print('Too many arguments!')

if __name__=='__main__':
    test()

class Student(object):
    pass
bart = Student()
bart.name='jojo'
bart.name



class Student(object):
    def __init__(self, name, score):
        self.name = name
        self.score = score

    def get_grade(self):
        if self.score >= 90:
            return 'A'
        elif self.score >= 60:
            return 'B'
        else:
            return 'C'

gg=Student('aaa',100)
gg.get_grade()


for c in "python":
    if c=='t':
        continue
    print(c,end=' ')
    

s='python'
while s !='':
    for c in s:
        print(c,end='')
    s=s[:-1]

import random
from pprint import pprint
pprint(random.seed(10))
random.random()


from random import random
from time import perf_counter
DARTS=1000*10000
hits=0.0
start=perf_counter()
for i in range(1,DARTS+1):
    x,y=random(),random()
    dist=pow(x**2+y**2,0.5)
    if dist <= 1:
        hits=hits+1
pi = 4*(hits/DARTS)
print("圆周率值是:{}".format(pi))
print('运行时间是:{:.20f}s'.format(perf_counter()-start))

import requests
r=requests.get('http://www.shipxy.com/')
r.status_code
r.text



for i in range(1,5):
    for j in range(1,5):
        for k in range(1,5):
            if (i!=j)and(j!=k)and(k!=i):
                print(i,j,k)
                
                
profit = int(input('输入发放的利润值(万元): '))
if 0 <= profit <10:
    print('提成为:',profit*0.1,'万元')
if 10 <= profit < 20:
    print('提成为:',(profit-10)*0.075+10*0.1,'万元')
if 20 <= profit < 40:
    print('提成为:',(profit-20)*0.05+10*0.075+10*0.1,'万元')
if 40 <= profit < 60:
    print('提成为:',(profit-40)*0.03+20*0.05+10*0.075+10*0.1,'万元')
if 60 <= profit < 100:
    print('提成为:',(profit-60)*0.015+20*0.03+20*0.05+10*0.075+10*0.1,'万元')
if profit >= 100:
    print('提成为:',(profit-100)*0.01+40*0.015+20*0.03+20*0.05+10*0.075+10*0.1,'万元')
    
profit = int(input('输入企业的利润值(万元): '))
gap = [100,60,40,20,10,0]
ratio =[0.01,0.015,0.03,0.05,0.075,0.1]
bonus=0
for idx in range(0,6):
    if profit >= gap[idx]:
        bonus += (profit-gap[idx])*ratio[idx]
        profit=gap[idx]
print('提成为:',bonus,'万元')


profit = int(input('输入企业的利润值(万元): '))
def get_bonus(profit):
    bonus = 0
    if 0 <= profit <= 10:
        bonus = 0.1*profit
    elif (profit > 10) and (profit <= 20):
        bonus = (profit-10)*0.075 + get_bonus(10)
    elif (profit > 20) and (profit <= 40):
        bonus = (profit-20)*0.05 + get_bonus(20)
    elif (profit > 40) and (profit <= 60):
        bonus = (profit-40)*0.03 + get_bonus(40)
    elif (profit > 60) and (profit <= 100):
        bonus = (profit-60)*0.015 + get_bonus(60)
    elif (profit >100):
        bonus = (profit-100)*0.01 + get_bonus(100)
    else:
        print("利润输入值不能为负")
    return bonus

if __name__ == '__main__':
    print('提成为:',get_bonus(profit),'万元')





'''
分析：
x + 100 = m^2
x + 100 + 168 = n^2
n^2 - m^2 = 168
(n + m) * (n - m) = 168
n > m >= 0
n - m 最小值为 1
n + m 最大为 168
n 最大值为 168
m 最大值为 167
'''

def _test():
    for m in range(0, 168):
        for n in range(m + 1, 169):
        #print('n=%s,m=%s' % (n, m))
            if (n + m) * (n - m) == 168:
                print("该数为:" + str(n * n - 168 - 100))
                print("该数为:" + str(m * m - 100))
                print('n为%s,m为%s' % (n, m))
if __name__ == '__main__':
    _test()
    
def test1():
    for n in range(0,168):
        for m in range(n,169):
            if (m+n)*(m-n) == 168:
                print("这个整数是: ",str(n*n-100))
if __name__ =='__main__':
    test1()

import pandas as pd
df = pd.read_csv(r'c:UsersclementeDesktopall	rain.csv',index_col='Id')
df.head()


for i in range(0,7):
    for j in range(0,7):
        for k in range(0,7):
            for g in range(0,7):
                for h in range(0,7):
                    while (i!=j) and(i!=g) and(g!=h)and(h!=k)and(k!=i):
                        if (i+j+k+g+h)==15:
                            print (i,j,k,g,h)
                


import random
def gen5num():
    alldigit=[0,1,2,3,4,5,6,0]
    first=random.randint(0,6)  #randint包含两端,0和6
    alldigit.remove(first)
    second=random.choice(alldigit)
    alldigit.remove(second)
    third=random.choice(alldigit)
    alldigit.remove(third)
    forth=random.choice(alldigit)
    alldigit.remove(forth)
    fiveth=random.choice(alldigit)
    alldigit.remove(fiveth)
    if (first+second+third+forth+fiveth)==15:
        return first,second,third,forth,fiveth
if __name__=='__main__':
    for i in range(100):
        print(gen5num())



#!/usr/bin/env python3
#coding=utf-8

from itertools import permutations
t = 0
for i in permutations('0123456',5):
    print(''.join(i))
    t += 1

print("不重复的数量有:%s"%t)


def sum_1():
    """
    aaaddd
    """
    for i in '01234567':
        p += int(i)
        print(sum(p))
sum_1()

np.*load*?



#题目：数组中找出两个元素之和 等于给定的整数

# 思路：
# 1、将数组元素排序；
# 2、array[i]与a[j](j的取值：i+1到len_array-1) 相加；
# 3、如两两相加＜整数继续，如＝整数则输出元素值；
# 4、如＞则直接退出，i+1 开始下一轮相加比较

def addData(array, sumdata):
    """
    aaaadddd
    """
    temp_array = array
    temp_sumdata = sumdata
    print ("sumdata: {}".format(temp_sumdata))
    sorted(temp_array)
    len_temp_array = len(temp_array)

# 计数符合条件的组数
    num = 0
    
    for i in range(0, len_temp_array-1):
        for j in range(i+1, len_temp_array):
            for k in range(j+1,len_temp_array):
            if temp_array[i] + temp_array[j] + temp_array[k] < temp_sumdata:
                continue
            elif temp_array[i] + temp_array[j] + temp_array[k] == temp_sumdata:
                num += 1
                print("Group {} :".format(num))
                print("下标：{}, 元素值： {}".format(i, temp_array[i]))

            else:
                break
    
if __name__=="__main__":
    test_array = [0,1,2,3,4,5,6,0]
    test_sumdata = 4
    addData(test_array, test_sumdata)
    


#题目：数组中找出两个元素之和 等于给定的整数

# 思路：
# 1、将数组元素排序；
# 2、array[i]与a[j](j的取值：i+1到len_array-1) 相加；
# 3、如两两相加＜整数继续，如＝整数则输出元素值；
# 4、如＞则直接退出，i+1 开始下一轮相加比较

import numpy as np
names=np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
data=np.random.randn(7,4)
names
data
names == 'Bob'
data[names=='Bob']


arr[[4,3,0,6]]



import matplotlib.pyplot as plt
points = np.arange(-5,5,0.01)
xs,ys=np.meshgrid(points,points)
z=np.sqrt(xs**2+ys**2)

plt.imshow(z,cmap=plt.cm.gray)
plt.colorbar()
plt.title("图像  $sqrt{x^2+y^2}$")

import pandas as pd
obj=pd.Series(range(3),index=["a","b","c"])
index=obj.index
index[1]='d'
import numpy as np
import pandas as pd
data=pd.DataFrame(np.arange(16).reshape(4,4),index=[1,2,3,4],columns=["one","two","three","forth"])
data<3


df1=pd.DataFrame({"A":[1,2]})
df1

obj=pd.Series(["a","a","b","c"]*4)
obj
obj.describe()


import json 
result = json.loads(obj)
result



import pandas as pd
ages=[12,34,23,45,67,30,20,55,98,30,43]
bins=[1,20,30,40,50,100]
cats=pd.cut(ages,bins)
cats
cats.codes
pd.value_counts(cats)



DataF=pd.DataFrame(np.arange(5*4).reshape((5,4)))
DataF
sample_1=np.random.permutation(5*4)
sample_1.reshape(5,4)


df=pd.DataFrame({'key':['b','b','a','c','a','b'],'data1':range(6)})
df
df[["data1"]]


import pandas as pd
left=pd.DataFrame({'key1':['foo','foo','bar'],'key2':['one','two','one'],'lval':[1,2,3]})
right=pd.DataFrame({'key1':['foo','foo','bar','bar'],'key2':['one','one','one','two'],'rval':[4,5,6,7]})
pd.merge(left,right,on=['key1'])



import matplotlib.pyplot as plt
import numpy as np
data=np.arange(10000)
plt.plot(data)

fig=plt.figure()
ax1=fig.add_subplot(2,2,1)
ax2=fig.add_subplot(2,2,2)
ax3=fig.add_subplot(2,2,3)

ax1.hist(np.random.randn(100),bins=20,color='k',alpha=0.5)
ax2.scatter(np.arange(30),np.arange(30)+3*np.random.randn(30))
ax3.plot(np.random.randn(50).cumsum(),drawstyle='steps-post')



fig=plt.figure()
ax=fig.add_subplot(1,1,1)
rect=plt.Rectangle((0.5,0.8),0.4,0.4,color='g',alpha=0.4)
ax.add_patch(rect)

plt.savefig("真的.svg",bbox_inches='tight')


s=pd.Series(np.random.randn(10).cumsum())
s.plot()

s=pd.Series(np.random.randn(10).cumsum(),index=np.arange(0,100,10))
s.plot()



df=pd.DataFrame(np.random.randn(10,4).cumsum(0),columns=['A','B','C','D'],index=np.arange(0,100,10))
df.plot()

fig,axes=plt.subplots(2,1)
data=pd.Series(np.random.rand(16),index=list("abcdefghijklmnop"))
data.plot.bar(ax=axes[0],color='k',alpha=0.7)
data.plot.barh(ax=axes[1],color='g',alpha=0.7)
plt.show()


df=pd.DataFrame(np.random.rand(6,4),index=['one','two','three','four','five','six'],columns=pd.Index(['A','B','C','D'],name='Genus'))
df
df.plot.bar()
df.plot.barh(stacked=True,alpha=0.5)


tips=pd.read_csv('tips.csv')
party_counts = pd.crosstab(tips['day'],tips['size'])
party_counts
party_counts=party_counts.loc[:,2:5]
party_counts

party_counts.sum(1)

party_pcts= party_counts.div(party_counts.sum(1),axis=0)
party_pcts.plot.bar()



import seaborn as sns
tips=pd.read_csv('tips.csv')
tips['tip_pct']=tips['tip']/(tips['total_bill']-tips['tip'])
tips.head()
sns.barplot(x='tip_pct',y='day',data=tips,orient='h')
sns.barplot(x='tip_pct',y='day',hue='time',data=tips,orient='h')
sns.set(style='whitegrid')


tips['tip_pct'].plot.hist(bins=50)
tips['total_bill'].plot.hist(bins=50)


tips['tip_pct'].plot.density()
tips['total_bill'].plot.density()


comp1=np.random.normal(0,1,size=200)
comp2=np.random.normal(10,2,size=200)
values=pd.Series(np.concatenate([comp1,comp2]))
sns.distplot(values,bins=101,color='k')


macro=pd.read_csv('macrodata.csv')
data=macro[['cpi','m1','tbilrate','unemp']]
trans_data=np.log(data).diff().dropna()
trans_data.head()
trans_data[-5:]

sns.regplot("m1","unemp",data=trans_data)
plt.title('Changes in log {} versus log {}'.format('m1','unemp'))
sns.set(style="ticks", color_codes=True)
sns.pairplot(trans_data,diag_kind='kde',kind='reg')
sns.pairplot(trans_data,diag_kind='hist',kind='reg')

sns.factorplot(x='day',y='tip_pct',row='time',hue='smoker',kind='box',data=tips[tips.tip_pct<0.5])

tips.describe()

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
df=pd.DataFrame({'key1':['a','a','b','b','a'],'key2':['one','two','one','two','one'],'data1':np.random.randn(5),'data2':np.random.randn(5)})
df

group_1=df['data1'].groupby(df['key1'])
group_1.describe()
group_2=df['data1'].groupby([df['key1'],df['key2']]).mean()
group_2



states=np.array(['Ohio','California','California','Ohio','Ohio'])
years=np.array([2005,2005,2006,2005,2006])
df['data1'].groupby([states,years]).mean()


dict(list(df.groupby('key1')))


try:
    year=input("输入年份:")
    month=input("输入月份: ")
    day=input("输入日期号: ")
finally:
    print("正在计算")

months2days=[0,31,59,90,120,151,181,212,243,273,304,334]
# 闰年
if int(year) % 4 ==0:
    for i in range(2,12,1):
        months2days[i] +=1
        
month_index=[]
for j in range(12):
    month_index.append(i+1)
dict_md=dict(zip(month_index,months2days))
whichday=dict_md[int(month)]+int(day)
print('结果是: 第{}天'.format(whichday))





def unsortedSearch(list, i, u):
    found = False
    pos = 0
    pos2 = 0

    while pos < len(list) and not found:
        if int(list[pos]) < int(u) :
            if int(list[pos2]) > int(i):
                found = True
            pos2 = pos2 + 1
        pos = pos + 1
    return found

unsortedList = ['1', '3', '4', '2', '6', '9', '2', '1', '3', '7']
num1 = '3'
num2 = '5'

isItThere = unsortedSearch(unsortedList, num1, num2)

if isItThere:
    print ("There is a number between those values")
else:
    print ("There isn't a number between those values")





def get_nums():
    nums=[]
    n=int(input("一共有几个整数?"))
    for i in range(n):
        x=int(input('请按次随机输入第{}个整数(剩余{}次输入)：'.format(i+1,n-i)))
        nums.append(x)
    return nums
if __name__=='__main__':
    list_nums=get_nums()

def BubbleSort(nums):  #冒泡法
    print('初始整数集合为:{}'.format(nums))
    for i in range(len(nums)-1):
        for j in range(len(nums)-i-1):
            if nums[j]>nums[j+1]:
                nums[j],nums[j+1]=nums[j+1],nums[j] #调换位置,相互赋值
        print("第{}次迭代排序结果:{}".format((len(nums)-j-1),nums))
    return nums
if __name__=='__main__':
    print('经过冒泡法排序最终得到:{}'.format(BubbleSort(list_nums)))
    
    
def get_nums():
    nums=[]
    n=int(input("一共有几个整数?"))
    for i in range(n):
        x=int(input('请按次随机输入第{}个整数(剩余{}次输入)：'.format(i+1,n-i)))
        nums.append(x)
    return nums
if __name__=='__main__':
    myList=get_nums()

def selectedSort(myList):
    #获取list的长度
    length = len(myList)
    #一共进行多少轮比较
    for i in range(0,length-1):
        #默认设置最小值得index为当前值
        smallest = i
        #用当先最小index的值分别与后面的值进行比较,以便获取最小index
        for j in range(i+1,length):
            #如果找到比当前值小的index,则进行两值交换
            if myList[j]<myList[smallest]:
                tmp = myList[j]
                myList[j] = myList[smallest]
                myList[smallest]=tmp
        #打印每一轮比较好的列表
        print("Round ",i,": ",myList) #根据第一个i循环进行打印,而不是选j循环


print("选择排序法:迭代过程 ")
selectedSort(myList)



    

def merge_sort(LIST):
    start = []
    end = []
    while len(LIST) > 1:
        a = min(LIST)
        b = max(LIST)
        start.append(a)
        end.append(b)
        LIST.remove(a)
        LIST.remove(b)
    if LIST: 
        start.append(LIST[0])
    end.reverse()
    return (start + end)

if __name__=='__main__':
    nums=[]
    n=int(input('一共几位数: '))
    for i in range(n):
        x=int(input("请依次输入整数:"))
        nums.append(x)
    print(merge_sort(nums))
    
    
# =============================================================================
#10.1.2
# =============================================================================
import pandas as pd
df=pd.DataFrame({'key1':['a','a','b','b','a'],'key2':['one','two','one','two','one'],'data1':np.random.randn(5),'data2':np.random.randn(5)})
df
df.groupby(['key1','key2'])['data1'].mean()



people=pd.DataFrame(np.random.randn(5,5),columns=['a','b','c','d','e'],index=['joe','steve','wes','jim','travis'])
people
mapping={'a':'red','b':'red','c':'blue','d':'blue','e':'red','f':'orange'}
by_column=people.groupby(mapping,axis=1)
by_column.mean()
map_series=pd.Series(mapping)

people.groupby(len).sum()


# =============================================================================
# 分组加权
# =============================================================================

import pandas as pd
df=pd.DataFrame({'目录':['a','a','a','a','b','b','b','b'],'data':np.random.randn(8),'weights':np.random.randn(8)})
df
grouped=df.groupby('目录')
get_weighpoint=lambda x: np.average(x['data'],weights=x['weights'])
grouped.apply(get_weighpoint)


# =============================================================================
# 
# =============================================================================

spx=pd.read_csv('stock_px_2.csv',index_col=0,parse_dates=True)
spx
spx.info()




from datetime import datetime

datestrs=['7/6/2011','8/6/2011']
[datetime.strptime(x,'%m/%d/%Y')for x in datestrs]


dates=pd.date_range('1/1/2018',periods=1000)
dates
long_df=pd.DataFrame(np.random.randn(1000,4),index=dates,columns=['Colorado','Texas','New York','Ohio'])
long_df

pd.date_range('2018-10-1',periods=30,freq='1h')



# =============================================================================
# 
# =============================================================================
close_px_all=pd.read_csv("stock_px_2.csv",parse_dates=True,index_col=0)
close_px=close_px_all[['AAPL','MSFT','XOM']]
close_px=close_px.resample("B").ffill()
close_px.AAPL.plot()
close_px.AAPL.rolling(250).mean().plot()




import pandas as pd
import numpy as np
values=pd.Series(['apple','orange','apple','apple']*2)
values
pd.unique(values)
pd.value_counts(values)



import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import RANSACRegressor, LinearRegression, TheilSenRegressor
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, median_absolute_error, r2_score
from sklearn.svm import SVR
from sklearn.linear_model import Ridge,Lasso,ElasticNet,BayesianRidge
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.cross_validation import train_test_split

data = pd.read_csv('../cement_data.csv')
# 查看数据记录的长度，共1030行
print(len(data))
# 查看前五行数据
data.head()



import pandas
titanic=pandas.read_csv('train.csv')
titanic.head()
titanic.describe()
titanic['Age']=titanic['Age'].fillna(titanic['Age'].median())
print(titanic['Sex'].unique()) #找Sex特征里的分类字符名，只有两种可能性
titanic.loc[titanic['Sex']=='female','Sex']=1#把分类字符名转换成整数1,0形式，进行标记
titanic.loc[titanic['Sex']=='male','Sex']=0
#对embarked　登船地　进行填充（按最多标记）
print(titanic['Embarked'].unique())
titanic['Embarked']=titanic['Embarked'].fillna('S')
titanic.loc[titanic['Embarked']=='S']=0
titanic.loc[titanic['Embarked']=='C']=1
titanic.loc[titanic['Embarked']=='Q']=2

# =============================================================================
# 引进模型，线性回归
# =============================================================================
from sklearn.linear_model import LinearRegression
from sklearn.cross_validation import KFold
#cross_validation　交叉验证，进行调参，训练数据集分成三份，三份做交叉验证

predictors=['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked'] #需要输入并做预测的特征列
alg=LinearRegression()
kf=KFold(titanic.shape[0],n_folds=3,random_state=1) #shape[0]一共有多少行,random_state=1 随机种子开启，n_fold=3把训练集分为三份

predictions=[]
for train,test in kf:
    train_predictors=titanic[predictors].iloc[train,:]  #交叉验证中，除开训练的部分
    train_target=titanic['Survived'].iloc[train]#获取目标训练集
    alg.fit(train_predictors,train_target) #依据模型，训练
    
    test_predictions=alg.predict(titanic[predictors].iloc[test,:]) #测试集
    predictions.append(test_predictions)
    
import numpy as np
predictions=np.concatenate(predictions,axis=0)
# 整理输出值,按照可能性分类到0,1
predictions[predictions>=0.5]=0
predictions[predictions<0.5]=1
accuracy=sum(predictions[predictions==titanic['Survived']])/len(predictions)
print(accuracy)


# =============================================================================
# 逻辑回归
# =============================================================================
from sklearn import cross_validation
from sklearn.linear_model import LogisticRegression
alg=LogisticRegression(random_state=1)
scores=cross_validation.cross_val_score(alg,titanic[predictors],titanic['Survived'],cv=3)
print(scores.mean())

# =============================================================================
# 随机森林
# =============================================================================
from sklearn import cross_validation
from sklearn.ensemble import RandomForestClassifier
predictors=['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']
alg=RandomForestClassifier(random_state=1,n_estimators=10,min_samples_split=2,min_samples_leaf=1)
kf=cross_validation.KFold(titanic.shape[0],n_folds=3,random_state=1)
scores=scores=cross_validation.cross_val_score(alg,titanic[predictors],titanic['Survived'],cv=kf)
print(scores.mean())