R语言数据可视化部分

数据挖掘

R语言数据可视化部分

散点图

library(datasets)

head(cars)

plot(cars)

 

plot(cars$dist,cars$speed,xlab = "dist", ylab = "speed",xlim = c(0,20))

 

attach(cars)

plot(dist,speed,xlab = "dist",ylab = "speed")

 

plot(cars$dist,cars$speed,xlab = "dist",ylab = "speed",main = "cars 散点图",type="b",pch=4,lwd=2,col="red")

 

a<-c(20,30,40,50,60)

b<-c(11,22,33,44,66)

c<-c(15,25,35,45,55)

plot(a,b,type = "b")#有点,用线连接

plot(a,b,type = "o")#有点,线连接,线过点

plot(a,b,type = "p")#点图

plot(a,b,type = "h")#用线表示值

plot(a,b,type = "l")#直接线链接

plot(a,b,type = "s")#梯形图

plot(a,b,type = "S")#梯形图

plot(a,b,type = "n")#不做图

 

plot(a,b,pch=0)

plot(a,b,pch=1)

plot(a,b,pch=2)

plot(a,b,pch=3)

plot(a,b,lty=2,lwd=2,pch=15,cex=2)

plot(a,b,type="b",lty=2,lwd=2,pch=15,cex=2)

 

install.packages("RColorBrewer")

library(RColorBrewer)

plot(a,b,type="b",lty=2,lwd=2,pch=15,cex=2,col=2)

opar<-par(no.readonly = FALSE)

par(pin=c(2,3))

par(lwd=2,cex=1.5)

par(cex.axis=.75,font.axis=3)

 

plot(a,b,type="b",pch=19,lty=2,col=2)

 

plot(a,b,type="b",pch=23,lty=5,col=2,bg="green")

par(opar)

 

par(mfrow = c(2, 2))

plot(a,c,type = "o",col=3,lty=3,pch=3,lwd=4,

     main = "实验图形",xlab="a",ylab="c",

     xlim = c(0,100),ylim=c(0,100))

x<-c(1:15)

y<-x

z<-10/x

opar<-par(no.readonly = TRUE)

par(mar=c(5,4,4,8)+0.1)

plot(x,y,type = "b",pch=21,col=2,yaxt="n",lty=3,ann = FALSE)

lines(x,z,type = "b",pch=3,col=4,lty=2)

axis(side = 2,at=x,labels = x,col.axis=2,las=2)

axis(side = 4,at=z,labels = round(z,digits = 2),

     cex.axis=2,las=2)

par(opar)

 

library(datasets)#加载数据包

attenu#

attach(attenu)

subset(attenu,dist>=40)#选出dist列大于等于14的一行

df<-subset(attenu,event=="2" & dist=="107",select=c(dist));df#显示特定的行

s=subset(attenu,event=="2" & dist=="107",select=c(event,dist));s#多条件查询

df1<-as.list(attenu);df1#转换成列表

append(df1$event,123)#在event后部分追加123这个数

append(df1,1235,after = 55)#指定长度追加

data1= data.frame(Id=c(1:6),Product=c(rep("Toaster",3),rep("Radio",3)))

data2 = data.frame(Id=c(2,4,6,7),State=c(rep("Alabama",3),rep("Ohio",1)))

merge(data1,data2,all=T)#取两数据框并集

merge(data1,data2,all = F)#取交集

merge(data1,data2,all=FALSE)#只是列先后的区别

merge(data1, data2, all.x=TRUE)#以df1所拥有的显示

merge(data1,data2)

#有两个及以上相同的列,合并则会自动选择两个均相同的,

#结果同merge(df1, df2, by = c("id", "sex"))

merge(data1,data2,by="Id")#两数据框中共有的sex那一列则会以sex.x和sex.y形式输出。

#aggregate()函数

 

 

 

 

 

 

 

#aggregate()函数

mtcars#R语言自带的汽车数据集

attach(mtcars)

str(mtcars)#查看数据集的列名

aggregate(mtcars,by=list(cyl,gear),FUN = mean)#使用aggregate函数按cyl(汽缸数)进行分类计算平均值

#公式是一种特殊的R数据对象,在aggregate函数中使用公式参数可以对数据框的部分指标进行统计

aggregate(cbind(mpg,hp) ~ cyl+gear, FUN=mean)

#频次统计table

table(hp)#统计mtcars数据中的hp列中出现结果频次

#函数apply(),list,matrix,array三种形式均可以

rname = c("r1","r2","r3")

cname = c("c1","c2")

b<-matrix(1:6,nrow=3, dimnames = list(rname, cname))

apply(b,1,sum)#1—表示按行计算,2—按列计算;

apply(b,2,sum)#1—表示按行计算,2—按列计算;

m<-c("SPYDERMAN","BATMAN","VERTIGO","CHINATOWN")

m1<-lapply(m,tolower)#把大写的字符改成小写

m1

sapply(mtcars,min)#查找数据框中最小的

tapply(mtcars$mpg, INDEX=mtcars$mpg, FUN = mean)#计算数据框中的列或者向量里的均值

 

 

 

条形图

#条形图

install.packages("vcd")

install.packages("plottrix")

install.packages("sm")

install.packages("vioplot")

library(vcd)

a<-table(Arthritis$Treatment);a

hist(a)

barplot(a,main = "条形图",ylab="Frequency",

        xlab="Treatment",ylim=c(0,50))#条形图绘制

barplot(a,main = "条形图",ylab="Treatment",

        xlab="Frequency",xlim=c(0,50),horiz=TRUE)#条形图绘制

plot(Arthritis$Improved,main = "条形图",xlab="Improved",

     ylab="Frequency")

 

counts <- table(Arthritis$Improved, Arthritis$Treatment)

counts

#堆切

barplot(counts, main = "条形图", xlab = "Treatment",

        ylab = "Frequency", col = c("red", "yellow", "green"),

        legend = rownames(counts))

 

#分组

barplot(counts, main = "条形图", xlab = "Treatment",

        ylab = "Frequency", col = c("red", "yellow", "green"),

        legend = rownames(counts),

        beside = TRUE)

 

#均值条形图

states <- data.frame(state.region, state.x77)

means <- aggregate(states$Illiteracy,

                   by = list(state.region),

                   FUN = mean)#以列表的形式计算均值

means

means <- means[order(means$x), ]#排序

means

barplot(means$x, names.arg = means$Group.1)#把均值做条形图

title("Mean Illiteracy Rate")

 

#调条形图

par(mar = c(5, 8, 4, 2))#增加Y的边界

par(las = 2)#旋转条形图的标签

counts <- table(Arthritis$Improved)

barplot(counts, main = "Treatment Outcome", horiz = TRUE,

        cex.names = 0.8,

        names.arg = c("No Improvement",

                      "Some Improvement", "Marked Improvement"))

 

#棘状图用spine(),看比例

attach(Arthritis)

counts <-table(Treatment, Improved)

spine(counts, main = "棘状图")

detach(Arthritis)

 

饼图

#饼图

par(mfrow = c(2, 2))

s<- c(10, 12, 4, 16, 8)

lbls <- c("A", "B", "C", "D", "E")

pie(s,labels = lbls, main = "饼图")

pct <- round(s/sum(s) * 100)#求百分比

lbls2 <- paste(lbls, " ", pct, "%", sep = "")

pie(s, labels = lbls2, col = rainbow(length(lbls)),

    main = "饼图")

 

install.packages("plotrix")

library(plotrix)

pie3D(s, labels = lbls, explode = 0.1, main = "3D 饼图 ")

mydata<-table(state.region)

lbls <-paste(names(mydata), " ", mydata, sep = "")

pie3D(mydata, labels = lbls, main = "饼图1")

 

 

#扇形图

slices<-c(10, 12, 4, 16, 8)

lbls<-c("US", "UK", "Australia", "Germany", "France")

fan.plot(slices, labels = lbls, main = "扇形图")

 

散点图

#散点图

attach(mtcars)                                                    

par(mfrow=c(2,2))

plot(wt, mpg,

     main="Basic Scatterplot of MPG vs. Weight",      

     xlab="Car Weight (lbs/1000)",

     ylab="Miles Per Gallon ", pch=19)

abline(lm(mpg ~ wt), col="red", lwd=2, lty=1)           

lines(lowess(wt, mpg), col="blue", lwd=2, lty=2) 

#loess(),lowess()

library(car)

scatterplot(mpg ~ wt | cyl, at=cyl,data=mtcars, lwd=2,

            main="Scatter Plot of MPG vs. Weight by  Cylinders",

            xlab="Weight of Car (lbs/1000)",

            ylab="Miles Per Gallon", id.method="identify",

            legend.plot=TRUE, labels=row.names(mtcars),

            boxplots="xy")

 

 

#散点图矩阵

attach(mtcars)

pairs(~mpg+disp+wt+drat,data=mtcars,main="汽车数据矩阵散点图")

library(car)

scatterplotMatrix(~mpg+disp+drat+wt,data=mtcars,spread=FALSE,

                  smoother.args=list(lty=2),

                  main="散点图矩阵")

#spread=FALSE;选项表示不添加展示分散度和对称信息的直线

#smoother.args=list(lty=2);设定loess()拟合曲线是用虚线而不是实线

 

scatterplotMatrix(~mpg+disp+drat+wt,data=mtcars,spread=TRUE,

                  smoother.args=list(lty=1),

                  main="散点图矩阵")

#不同风格

install.packages("glus")

library(glus)

install.packages("TeachingDemos")

library(TeachingDemos)

pairs2()

install.packages("HH")

installed.packages("ResourceSelection")

 

#高密度散点图

set.seed(1234)

n <- 10000

c1 <- matrix(rnorm(n, mean=0, sd=0.5), ncol=2)

c2 <- matrix(rnorm(n, mean=3, sd=2), ncol=2)

mydata <- rbind(c1, c2)

mydata <- as.data.frame(mydata)

names(mydata) <- c("x", "y")

 

with(mydata,

     plot(x, y, pch=19, main="高密度散点图"))

 

 

with(mydata,

     smoothScatter(x, y, main="高密度散点图"))

 

library(hexbin)

with(mydata, {

  bin <- hexbin(x, y, xbins=50)

  plot(bin, main="Hexagonal Binning with 10,000 Observations")

})

 

library(IDPmisc)#做的散点图对大数据集的创建可读性比较好

with(mydata,

     iplot(x, y, main="Image Scatter Plot with Color Indicating Density"))

par(opar)

 

#多维可视化散点图

install.packages("scatterplot3d")

library(scatterplot3d)

attach(mtcars)

scatterplot3d(wt,disp,mpg,main = "3d 散点图")

scatterplot3d(wt, disp, mpg,

              pch=16,

              highlight.3d=TRUE,

              type="h",

              main="3D 散点图")

#highlight.3d=TRUE;填充颜色

s3d <-scatterplot3d(wt, disp, mpg,

                    pch=16,

                    highlight.3d=TRUE,

                    type="h",

                    main="3D 散点图")

 

 

fit <- lm(mpg ~ wt+disp)

s3d$plane3d(fit)

detach(mtcars)

 

#3d旋转散点图1

install.packages("rgl")

library(rgl)

attach(mtcars)

plot3d(wt,disp,mpg,col="blue",size=5)

 

折线图

#展示五种树的生长情况

Orange$Tree<- as.numeric(Orange$Tree)#转换成数值型     

ntrees <-max(Orange$Tree);ntrees

#创建图形

#range返回一个包含所有给定参数的最小值和最大值的向量。

xrange <- range(Orange$age);xrange                

yrange <- range(Orange$circumference);yrange       

plot(xrange, yrange,                       

     type="n",                              

     xlab="Age (days)",                     

     ylab="Circumference (mm)")                                         

#rainbow创建一个由n个相邻颜色组成的向量。

colors <-rainbow(ntrees)                  

linetype <- c(1:ntrees)#绘制1:5的颜色                    

plotchar <-seq(18,18+ntrees,1);plotchar#绘图的类型          

#绘制图形

for (i in 1:ntrees) {                      

  tree<-subset(Orange,Tree==i)         

  lines(tree$age,tree$circumference,     

        type="b",                          

        lwd=2,                             

        lty=linetype[i],                     

        col=colors[i],                     

        pch=plotchar[i]                    

  )                                      

}                                           

#lty=linetype[i], 1-5的绘图风格                   

#col=colors[i],颜色从1-5的编号                      

#pch=plotchar[i],18-23 的绘制符号

title("树子的生长折线图", "绘图例子")

#添加标签legend

legend(xrange[1], yrange[2],               

       1:ntrees,                               

       cex=0.8,                               

       col=colors,                            

       pch=plotchar,                          

       lty=linetype,                          

       title="Tree"                           

)                 

#  cex=0.8图形缩放0.8倍

 

不同包作图比较

#不同的程序包作图比较

data<-read.csv("J:shuju/Facet_Data.csv",sep = ",",header = T)

str(data)

attach(data)

#graphics包作图

library(ggplot2)

plot(SOD,tau,main = "实验散点图")#散点图

hist(SOD,breaks=30,ylim=c(0,40),main = "")

boxplot(SOD~Class,data = data,xlab = "Class",ylab = "SOD")

 

#lattice包作图

library(lattice)

xyplot(SOD~tau,col="black",main="实验散点图")

histogram(~SOD,data,type="count",nint=30,col="white")

bwplot(SOD~Class,data,xlab = "Class",par.settings=canonical.theme(color = FALSE))

 
 
 

 

#ggplot2包作图

library(graphics)

ggplot(data,aes(x=SOD,y=tau))+geom_point(shape=21,main="实验散点图")

ggplot(data,aes(SOD))+geom_histogram(bins=30,colour="black",fill="white")

ggplot(data,aes(x=Class,y=SOD))+geom_boxplot()

 

 

 

核密度函数图

#密度图

mtcars

attach(mtcars)

par(mfrow = c(2, 1))

d<-density(mpg)

plot(d)

d<-density(mpg)

plot(d, main = "Kernel Density of Miles Per Gallon")

polygon(d, col = "red", border = "blue")#填充

rug(mpg, col = "brown")

 

#多组密度图

library(sm)

a<-factor(cyl,levels = c(4,6,8),

          labels = c("4A","6A","8A"))

sm.density.compare(mpg,cyl,xlab="hfhhj")

title(main = "hyhbhkajbkjhu")

c<-c(2:(1+length(levels(a))))

length(locator(1),levels(a),fill=colfill)

 

马瑟克图

#马赛克图
###############################################################
#逻辑指定是否应该使用gp(参见gp)。如果TRUE和expected未指定, #
#;则会拟合一个默认模型:如果指定了condvars(请参见strucplot), #
#则会拟合一个相应的条件独立模型,否则将拟合一个完全独立模型 #
###############################################################
ftable(Titanic)#创建“ 泰坦尼克数据”列联表。
library(vcd)
library(grid)
mosaic(Titanic, shade=TRUE, legend=TRUE)

箱线图

#箱线图
attach(mtcars)
boxplot(mpg,main="箱线图",ylab="gh")
#两个组之间的比较箱线图
boxplot(mpg~cyl,data=mtcars,main="jjgjh",
xlab = "hjklk",ylab = "ghjgkh")
#两个组之间的比较箱线图
boxplot(mpg ~ cyl, data = mtcars, notch = TRUE,
varwidth = TRUE, col = "red",
main = "Car Mileage Data",
xlab = "Number of Cylinders",
ylab = "Miles Per Gallon")
#两个交叉因子的箱线图
mtcars$cyl.f <- factor(mtcars$cyl,
levels = c(4, 6, 8),
labels = c("4", "6", "8"))

mtcars$am.f <- factor(mtcars$am, levels = c(0, 1),
labels = c("auto", "standard"))

boxplot(mpg ~ am.f * cyl.f, data = mtcars,
varwidth = TRUE, col = c("gold", "darkgreen"),
main = "MPG Distribution by Auto Type",
xlab = "Auto Type",notch = TRUE)

 

 

 

相关图

#相关图
options(digits=5)#保留小数位数
cor(mtcars)
#
library(corrgram)
corrgram(mtcars, order=TRUE,lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="mtcars相互关系的相关图")
#order=TRUE;确定是否排序;
#panel,用于绘制每个面板内容的函数
#####################################################################################
#labels,用于对角面板的标签(而不是数据帧变量名)。 #
#如果使用“order”选项,这个标签向量也会被函数适当地重新排序。 #
####################################################################################
#lower.panel(upper.panel)在对角线下方/上方使用单独的面板功能。
#diag.panel面板函数用于对角线。
#text.panel,面板图加上标签
#label.pos,水平和垂直的标签放置在对角板。
#label.str,斜线标签的字符串旋转
#cex.labels,对角线面板的图形参数。
#font.labels,字体元素
corrgram(mtcars, order=TRUE, lower.panel=panel.ellipse,
upper.panel=panel.pts, text.panel=panel.txt,
diag.panel=panel.minmax,
main="使用散点图和椭圆的mtcar数据的相关图")

corrgram(mtcars, lower.panel=panel.shade,
upper.panel=NULL, text.panel=panel.txt,
main="汽车里程数据(未排序)相关图")
col.corrgram <- function(ncol){
colorRampPalette(c("darkgoldenrod4", "burlywood1",
"darkkhaki", "darkgreen"))(ncol)}
#colorRampPalette,这些函数返回内插一组给定颜色的函数,以创建新的调色板(如topo.colors)和颜色坡道(将间隔[0,1]映射到颜色(如灰色)的函数)。
corrgram(mtcars, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="拥有不同颜色的相关图")

 

 

 

原文地址:https://www.cnblogs.com/cxy-0902/p/12690580.html