R随机森林交叉验证 + 进度条

library(data.table)
library(randomForest)
data <- iris
str(data)
#交叉验证,使用rf预测sepal.length
k = 5
data$id <- sample(1:k, nrow(data), replace = TRUE)
list <- 1:k
# 每次迭代的预测用数据框,测试用数据框
# the folds
prediction <- data.table()
testsetCopy <- data.table()
# 写一个进度条,用来了解CV的进度
progress.bar <- create_progress_bar("text")
progress.bar$init(k)
#k层的函数
for(i in 1:k){
    # 删除id为i的行,创建训练集
    # 选id为i的行,创建训练集
    trainingset <- subset(data, id %in% list[-i])
    testset <- subset(data, id %in% c(i))
    #运行一个随机森林模型
    mymodel <- randomForest(trainingset$Sepal.Length ~ ., data = trainingset, ntree = 100)
    #去掉回应列1, Sepal.Length
    temp <- as.data.frame(predict(mymodel, testset[,-1]))
    # 将迭代出的预测结果添加到预测数据框的末尾
    prediction <- rbind(prediction, temp)
    # 将迭代出的测试集结果添加到测试集数据框的末尾
    # 只保留Sepal Length一列
    testsetCopy <- rbind(testsetCopy, as.data.frame(testset[,1]))
    progress.bar$step()
}
# 将预测和实际值放在一起
result <- cbind(prediction, testsetCopy[, 1])
names(result) <- c("Predicted", "Actual")
result$Difference <- abs(result$Actual - result$Predicted)
# 用误差的绝对平均值作为评估 
summary(result$Difference)

  

  交叉验证伪代码

for each epoch
    for each training data instance
        propagate error through the network
        adjust the weights
        calculate the accuracy over training data
    for each validation data instance
        calculate the accuracy over the validation data
    if the threshold validation accuracy is met
        exit training
    else
        continue training
原文地址:https://www.cnblogs.com/iupoint/p/10175090.html