R exercise

data(USArrests)
se <- function(x){v <- var(x); n <- length(x);return(sqrt(v/n))}
se(c(45,2,3,5,76,2,4))
length(x)
source('mycode.R')
save.image()
library(DMwR)
head(algae)
algae[1:5,]
hist(algae$mxPH)
library(car)
qq.plot()
par(mfrow=c(1,2))
boxplot(algae$oPO4,ylab="Orthoposphate(oPO4)")
rug(jitter(algae$oPO4),side=2)
###xyplot
articleHitsComments <- read.csv("http://www.headfirstlabs.com/books/hfda/hfda_ch12_articleHitsComments.csv",header=TRUE)
head(articleHitsComments)
library(lattice)
xyplot(webHits~commentCount|authorName,data=articleHitsComments)
########regex
hfhh <- read.csv("http://www.headfirstlabs.com/books/hfda/hfda_ch13_data_for_R.csv",header=TRUE)
head(hfhh)
newlastName <- sub("\(.*\)","",hfhh$LastName)
head(newlastName)
hfhh$LastName <- NULL
head(hfhh)
hfhh["LastName"] <- newlastName
head(hfhh)
#sort
hfhhSorted <- hfhh[order(hfhh$PersonID),]
head(hfhhSorted,n=50)
#delete duplicates data
hfhhNamesOnly <- hfhhSorted
hfhhNamesOnly$CallID <- NULL
hfhhNamesOnly$Time <- NULL
hfhhNamesOnly <- unique(hfhhNamesOnly)
head(hfhhNamesOnly,n=50)
write.csv(hfhhNamesOnly,file="hfhhNamesOnly.csv")