交叉验证cross

# 导入库
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import svm

# 导入数据，特征及标签
X, y = datasets.load_iris(return_X_y=True)
X.shape, y.shape

((150, 4), (150,))

# 划分数据集，测试集0.4，训练集0.6
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
X_train.shape, y_train.shape

((90, 4), (90,))

X_test.shape, y_test.shape

((60, 4), (60,))

# 模型，代入训练数据
clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)

# 测试训练集得分
clf.score(X_test, y_test)

0.9666666666666667

# 导入交叉验证所需的库
from sklearn.model_selection import cross_val_score

# 模型
clf = svm.SVC(kernel='linear', C=1, random_state=42)

scores = cross_val_score(clf, X, y, cv=5) # 模型，特征，标签，五折交叉验证
scores

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

0.98 accuracy with a standard deviation of 0.02