R语言学习笔记（十）：重抽样与自助法

#置换实验 Coin包
install.packages(c("coin"))

#lmPerm包
install.packages("lmPerm")


#独立两样本和K样本检验
library(coin)
score<-c(40,57,45,55,58,57,64,55,62,65)
treatment<-factor(c(rep("A",5),rep("B",5)))
mydata<-data.frame(treatment,score)
t.test(score~treatment,data=mydata,var.equal=TRUE)

Two Sample t-test

data: score by treatment
t = -2.345, df = 8, p-value = 0.04705
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-19.0405455 -0.1594545
sample estimates:
mean in group A mean in group B
51.0 60.6

#置换T检验
library(coin)
oneway_test(score~treatment,data=mydata,distribution="exact")

Exact Two-Sample Fisher-Pitman Permutation Test

data: score by treatment (A, B)
Z = -1.9147, p-value = 0.07143
alternative hypothesis: true mu is not equal to 0

结果显著：P<0.05

结果不显著：P>0.05

上面这两个测试中所得的P值相差比较大，并且样本容易比较小，所以置换T检验更有说服力，因此我们认为改组样本得到的结果不显著P>0.05

#Wilcox.test
library(MASS)
UScrime<-transform(UScrime,So=factor(So))
wilcox_test(Prob~So,data=UScrime,distribution="exact")

Exact Wilcoxon-Mann-Whitney Test

data: Prob by So (0, 1)
Z = -3.7493, p-value = 8.488e-05
alternative hypothesis: true mu is not equal to 0

wilcox.test(Prob~So,data=UScrime,distribution="exact")

Wilcoxon rank sum test

data: Prob by So
W = 81, p-value = 8.488e-05
alternative hypothesis: true location shift is not equal to 0

#chisq_Test
library(coin)
library(vcd)
Arthritis<-transform(Arthritis,Improved=as.factor(as.numeric(Improved)))
set.seed(1234)
chisq_test(Treatment~Improved,data=Arthritis,distribution=approximate(B=9999))

Approximative Pearson Chi-Squared Test

data: Treatment by Improved (1, 2, 3)
chi-squared = 13.055, p-value = 0.0018

#spearman.test 数值变量间的独立性
states<-as.data.frame(state.x77)
set.seed(1234)
spearman_test(Illiteracy~Murder,data=states,distribution=approximate(B=9999))

Approximative Spearman Correlation Test

data: Illiteracy by Murder
Z = 4.7065, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0

#两样本和K样本相关性检验
library(coin)
library(MASS)
wilcoxsign_test(U1~U2,data=UScrime,distribution="exact")

Exact Wilcoxon-Pratt Signed-Rank Test

data: y by x (pos, neg)
stratified by block
Z = 5.9691, p-value = 1.421e-14
alternative hypothesis: true mu is not equal to 0

#lmPerm包的置换检验
library(lmPerm)

#简单线性回归置换
set.seed(1234)
fit<-lmp(weight~height,data=women,perm="Prob")
summary(fit)

Call:
lmp(formula = weight ~ height, data = women, perm = "Prob")

Residuals:
Min 1Q Median 3Q Max
-1.7333 -1.1333 -0.3833 0.7417 3.1167

Coefficients:
Estimate Iter Pr(Prob)
height 3.45 5000 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.525 on 13 degrees of freedom
Multiple R-Squared: 0.991, Adjusted R-squared: 0.9903
F-statistic: 1433 on 1 and 13 DF, p-value: 1.091e-14

#多项式回归的置换检验
set.seed(1234)
fit<-lmp(weight~height+I(height^2),data=women,perm="Prob")
summary(fit)

Call:
lmp(formula = weight ~ height + I(height^2), data = women, perm = "Prob")

Residuals:
Min 1Q Median 3Q Max
-0.509405 -0.296105 -0.009405 0.286151 0.597059

Coefficients:
Estimate Iter Pr(Prob)
height -7.34832 5000 <2e-16 ***
I(height^2) 0.08306 5000 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3841 on 12 degrees of freedom
Multiple R-Squared: 0.9995, Adjusted R-squared: 0.9994
F-statistic: 1.139e+04 on 2 and 12 DF, p-value: < 2.2e-16

#多元回归
library(lmPerm)
set.seed(1234)
states<-as.data.frame(state.x77)
fit<-lmp(Murder~Population+Illiteracy+Income+Frost,data=states,perm="Prob")
summary(fit)

Call:
lmp(formula = Murder ~ Population + Illiteracy + Income + Frost,
data = states, perm = "Prob")

Residuals:
Min 1Q Median 3Q Max
-4.79597 -1.64946 -0.08112 1.48150 7.62104

Coefficients:
Estimate Iter Pr(Prob)
Population 2.237e-04 51 1.0000
Illiteracy 4.143e+00 5000 0.0004 ***
Income 6.442e-05 51 1.0000
Frost 5.813e-04 51 0.8627
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.535 on 45 degrees of freedom
Multiple R-Squared: 0.567, Adjusted R-squared: 0.5285
F-statistic: 14.73 on 4 and 45 DF, p-value: 9.133e-08

#单因素方差分析和协方差分析
library(lmPerm)
library(multcomp)
set.seed(1234)
fit<-aovp(response~trt,data=cholesterol,perm="Prob")
anova(fit)

Analysis of Variance Table

Response: response
Df R Sum Sq R Mean Sq Iter Pr(Prob)
trt 4 1351.37 337.84 5000 < 2.2e-16 ***
Residuals 45 468.75 10.42
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

#双因素方差分析
library(lmPerm)
set.seed(1234)
fit<-aovp(len~supp*dose,data=ToothGrowth,perm="Prob")
anova(fit)

Analysis of Variance Table

Response: len
Df R Sum Sq R Mean Sq Iter Pr(Prob)
supp 1 205.35 205.35 5000 < 2e-16 ***
dose 1 2224.30 2224.30 5000 < 2e-16 ***
supp:dose 1 88.92 88.92 2032 0.04724 *
Residuals 56 933.63 16.67
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

#自助法
library(boot)
set.seed(1234)
rsq<-function(formula,data,indices){
  d<-data[indices,]
  fit<-lm(formula,data=d)
  return(summary(fit)$r.square)
  
}


results<-boot(data=mtcars,statistic=rsq,R=1000,formula=mpg~wt+disp)
print(results)

boot.ci(results,type=c("perc","bca"))

BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates

CALL :
boot.ci(boot.out = results, type = c("perc", "bca"))

Intervals :
Level Percentile BCa
95% ( 0.6838, 0.8833 ) ( 0.6344, 0.8549 )
Calculations and Intervals on Original Scale
Some BCa intervals may be unstable

#多个统计量的自助法
bs<-function(formula,data,indices){
  d<-data[indices,]
  fit<-lm(formula,data=d)
  return(coef(fit))
  
}

library(boot)
set.seed(1234)
results<-boot(data=mtcars,statistic=bs,R=1000,formula=mpg~wt+disp)
print(results)


plot(results,index=1)