#1.1#### rm(list=ls()) age <- c(1,3,5,2,11,9,3,9,12,3) weight<-c(4.4,5.3,7.2,5.2,8.5,7.3,6.0,10.4,10.2,6.1) mean(weight) sd(weight) cor(age,weight) plot(age,weight) #matrix#### a<-matrix(1:20,nrow = 4,ncol = 5);a cells<-c(1,26,24,68) rnames<-c("R1","R2") cnames<-c("C1","C2") mymatrix1<-matrix(cells,nrow = 2,ncol = 2, byrow = FALSE, dimnames = list(rnames,cnames));mymatrix1 mymatrix2<-matrix(cells,nrow = 2,ncol = 2, byrow = TRUE, dimnames = list(rnames,cnames));mymatrix2 x<-matrix(1:20,nrow = 2) x x[2,] x[,3] x[2,5] x[1,c(4,5)] x[,c(4:7)] #array#### dim1<-c("A1","A2") dim2<-c("B1","B2","B3") dim3<-c("c1","c2","c3","c4") mnarray<-array(1:24,c(2,3,4),dimnames = list(dim1,dim2,dim3)) View(mnarray) #data frame#### patientID<-c(1,2,3,4) age<-c(25,34,28,52) diabetes<-c("TYPE1","TYPE2","TYPE1","TYPE1") status<-c("Poor","Improved","Excellent","Poor") patientdata<-data.frame(patientID,age,diabetes,status) edit(patientdata) #临时修改 fix(patientdata) #改动的地方保存在dataframe rownames(patientdata) <- c('a','b','c','d') patientdata[,1:2] patientdata[3:4] patientdata[c("diabetes","status")] patientdata$status table(patientdata$patientID,patientdata$age) #交叉验证 summary(patientID) summary(age) cor(age,patientID) cov(age,patientID) plot(patientdata$age,patientdata$patientID) attach(CO2) edit(CO2) plot(uptake,conc) detach(CO2) head(mtcars) with(mtcars,{ # with(data,{expression}) 好处是不用频繁使用dataname$ print(summary(mpg)) plot(mpg,disp) plot(mpg,wt) } ) with(mtcars, print(summary(mpg)) ) with(mtcars,{ nokeepstats<-summary(mpg) #with内命名新变量必须使用<<- keepstats<<-summary(mpg)}) keepstats nokeepstats status<-factor(status,order=TRUE) diabetes<-factor(diabetes) class(status) ls(patientdata) patientdata<-data.frame(patientID,age,diabetes,status) str(patientdata) # show data `s structure summary(patientdata) status<-factor(status,ordered = TRUE, levels = c("Poor","Improved","Excellent")) sex<-c("1","2","2","1") sex<-factor(sex,levels = c(1,2),labels = c("male","female")) #list#### g<-"my list" h<-c(25,26,18,39) j<-matrix(1:20,nrow = 2) k<-c("one","two","three") mylist<-list(title=g,ages=h,j,k) mylist mylist[[3]] mylist[["ages"]] #create new variate and recode rename manager<-c(1,2,3,4,5) date<-c("10/24/14","10/28/14","10/01/14","10/02/14","05/01/14") country<-c("M","F","F","M","F") age<-c(32,45,25,39,99) q1<-c(5,3,3,3,2) q2<-c(4,54,4,3,2) q3<-c(5,2,5,4,1) q4<-c(5,5,5,NA,2) q5<-c(5,5,2,NA,1) leadership<-data.frame(manager,date,country,age,q1,q2,q3,q4,q5, stringsAsFactors = FALSE) leadership$age[leadership$age==99]<-NA leadership$agecat[leadership$age>75]<-"elder" leadership$agecat[leadership$age<=75& leadership$age>=55]<-"middle aged" leadership$agecat[leadership$age<55]<-"young" #or leadership<-within(leadership,{ ageact<-NA ageact[age>75]<-"elder" ageact[age>=55&age<=75]<-"middle aged" ageact[age<55]<-"yough"}) leadership<-within(leadership,{ grade<-NA grade[q1>3]<-"good" #常用语修改dataframe时不用频繁使用dataframe$ grade[q1<=3]<-"bad"}) #compare 'with' at 64 with 'within' #修改行名、列名 names(leadership)[2] <- 'testdate' #列名 names(leadership) row.names(leadership) #行名 install.packages("plyr") library(plyr) rename(leadership,c(manager="managerID")) #datetime#### Sys.Date() date() mydate<-as.Date(c("2018-01-12","2008-01-11")) #must be default format mydate class(mydate) #or#### strdate<-c("2018/01/12","2008/01/12")# character dates<-as.Date(strdate,"%Y/%m/%d") # Date#,default format dates manager<-c(1,2,3,4,5) date<-c("10/24/14","10/28/14","10/01/14","10/02/14","05/01/14") country<-c("M","F","F","M","F") age<-c(32,45,25,39,99) q1<-c(5,3,3,3,2) q2<-c(4,5,4,3,2) q3<-c(5,2,5,4,1) q4<-c(5,5,5,NA,2) q5<-c(5,5,2,NA,1) leadership<-data.frame(manager,date,country,age,q1,q2,q3,q4,q5, stringsAsFactors = FALSE) #format #### myformat<-'%m/%d/%y' leadership$date<-as.Date(leadership$date,myformat) ;leadership$date class(leadership$date) View(leadership) date<-c("10/24/14","10/28/14","10/01/14","10/02/14","05/01/14") date<-as.Date(date,"%m/%d/%y");date format(date,format="%m-%d-%Y") #修改时间显示格式 format(date,format="%m/%d/%Y") #or today<-Sys.Date() format(today,format="%a") format(today,format="%A") startdate<-as.Date(today) endate <-as.Date("1994-01-21") days <-endate-startdate ;days # difftime today<-Sys.Date() anniversary<-as.Date("2012-10-29") difftime(today,anniversary,units = "days") 9145/365 today<-as.character(today) today #more <{ help("as.Date")!"help("strftime"),package(timeDate) #order#### rm(list = ls()) manager<-c(1,2,3,4,5) date<-c("10/24/14","10/28/14","10/01/14","10/02/14","05/01/14") country<-c("US","US","UK","UK","UK") gender<-c("M","F","F","M","F") age<-c(32,45,25,39,99) q1<-c(5,3,3,3,2) q2<-c(4,4,4,3,2) q3<-c(5,2,5,4,1) q4<-c(5,5,5,NA,2) q5<-c(5,5,2,NA,1) leadership<-data.frame(manager,date,country,gender,age,q1,q2,q3,q4,q5, stringsAsFactors = FALSE) newdata<-leadership[order(-leadership$age),];newdata newdata<-leadership[order(leadership$age),] ;newdata #or attach(leadership) newdata<-leadership[order(gender,age),];newdata #性别内再排序 newdata1<-leadership[order(gender,-age),];newdata1 detach() # merge dataset #### rm(list = ls()) manager<-c(1,2,3,4,5) country<-c("US","US","UK","UK","UK") gender<-c("M","F","F","M","F") age<-c(32,45,25,39,99) q1<-c(5,3,3,3,2) q2<-c(4,4,4,3,2) leadership1<-data.frame(manager,country,gender,age,q1,q2, stringsAsFactors = FALSE) manager<-c(1,2,3,4,5) date<-c("10/24/14","10/28/14","10/01/14","10/02/14","05/01/14") q3<-c(5,2,5,4,1) q4<-c(5,5,5,NA,2) q5<-c(5,5,2,NA,1) leadership2<-data.frame(manager,date,q3,q4,q5, stringsAsFactors = FALSE) View(leadership1) View(leadership2) total<-merge(leadership1,leadership2,by="manager") total<-cbind(data.frame(leadership1),data.frame(leadership2)) View(total) #subset#### rm(list = ls()) manager<-c(1,2,3,4,5) date<-c("10/24/14","10/28/14","10/01/14","10/02/14","05/01/14") country<-c("US","US","UK","UK","UK") gender<-c("M","F","F","M","F") age<-c(32,45,25,39,99) q1<-c(5,3,3,3,2) q2<-c(4,4,4,3,2) q3<-c(5,2,5,4,1) q4<-c(5,5,5,NA,2) q5<-c(5,5,2,NA,1) leadership<-data.frame(manager,date,country,gender,age,q1,q2,q3,q4,q5, stringsAsFactors = FALSE) leadership["q1"] newdata<-leadership[,6:10];newdata newdata1<-leadership[,c(6:10)];newdata1 #or mycars<-c("q1","q2","q3","q4","q5") newdata2<-leadership[mycars];newdata2 #or mycars1<-paste("q",1:5,sep="") newdata3<-leadership[mycars1];newdata3 mycars2<-names(leadership)%in%c("q3","q4");mycars2 newdata4<-leadership[!mycars2];newdata4 #or newdata5<-leadership[c(-8,-9)];newdata5 #or leadership$q3<-NULL leadership$q3<-leadership$q4<-NULL newdata<-leadership[1:3,] ;newdata newdata<-leadership[,1:3] ;newdata #or newdata<-leadership[leadership$gender=="M"& leadership$age>30,];newdata #or attach(leadership) newdata<-leadership[gender=="M"&age>30,] detach(leadership) leadership$date<-as.Date(leadership$date,"%m/%d/%y") startdate<-as.Date("2014-10-02") enddate<-as.Date("2014-10-25") newdata<-leadership[which(leadership$date>=startdate&leadership$date<=enddate),];newdata #4-10-4 subset()#### newdata <-subset(leadership,age>30&gender=="M", select = c(q1,q2,q3,q4)) newdata6<-subset(leadership,age>30&age<50,select = c(q1:q5));newdata6 newdata7<-subset(leadership,age>50|age<30,select = gender:q1) ;newdata7 mysample<-leadership[sample(1:8,size = 3,replace = TRUE)];mysample #samples random column patientID<-c(1,2,3,4) age<-c(25,34,28,52) diabetes<-c("TYPE1","TYPE2","TYPE1","TYPE1") status<-c("Poor","Improved","Excellent","Poor") status<-factor(status,order=TRUE)# diabetes<-factor(diabetes) class(status) ls(patientdata) patientdata<-data.frame(patientID,age,diabetes,status) mysample<-patientdata[sample(1:ncol(patientdata),size = 3,replace = FALSE)];mysample nrow(leadership) #chapter 3#### attach(mtcars) plot(wt,mpg) abline(lm(mpg~wt)) #adds a line of best fit title("regression of mpg on weight") detach(mtcars) pdf("mygraph.pdf") attach(mtcars) plot(wt,mpg) abline(lm(mpg~wt)) #adds a line of best fit title("regression of mpg on weight") detach(mtcars) dev.off()