发表级ggplot绘图流程和技巧

发表级ggplot绘图流程和技巧 | 论文发表

themes - https://www.r-graph-gallery.com/ggplot2-package.html#themes

要开始修饰以前的核心图片，准备发表论文了。

把之前比较raw的图修饰格式，统一生成高清晰图片，准备放入paper中。

会慢慢补充所有常见的绘图代码。

一个raw image的代码：

p1 <- ggplot(oxidation.df, aes(x=group, y=score, color=group)) + 
        geom_boxplot() + 
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        labs(title = "Fatty acid metabolism")
p1

第一步：修改df里的标签

oxidation.df$group <- plyr::mapvalues(oxidation.df$group, 
                                      from = c("GFP- early","GFP+ early","GFP- late","GFP+ late"),
                                      to = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))

第二步：修改标签顺序

oxidation.df$group <- factor(oxidation.df$group, levels = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))

第三部：精修格式主题字体

主题

常用的主题：https://www.r-graph-gallery.com/ggplot2-package.html#themes

theme_bw - 去掉了灰白背景，加了边框，最常用
theme_classic - 只留下了加粗的左下边框，最经典，适合实验图
egg::theme_article - 只有四周的边框，最适合发表文章，缺点：图例间隔太小
theme_minimal - 只留下了grid，没有边框
theme_minimal_hgrid - 只留下了hgrid
theme_void - 只留下了图例，适合tSNE图

theme_bw()

theme_void()

# remove grid
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

修改title

labs(x = "", y = "Pathway score
", title = "Fatty acid metabolism")

xy轴标签字体大小

theme(axis.text.x  = element_text(face="plain", angle=30, size = 14, color = "black", vjust=0.6),
        axis.text.y  = element_text(size = 10),
        axis.title.y = element_text(size = 14))

去掉多余的图例

theme(legend.position = "none")

填充颜色

library(RColorBrewer)

scale_fill_manual(values=brewer.pal(9,"Paired"))
scale_color_manual(values=brewer.pal(9,"Paired")[c(3,4,5,6)])

其他

限制xy坐标范围

scale_x_continuous(limits = c(0,2.5))
scale_y_continuous(limits = c(0,2.5))

一组数据的比较

library(ggpubr)
stat_compare_means(label.y = 2.3, label.x = 1, size=5)

多组数据的比较

library(ggpubr)
my_comparisons <- list(c("HhOFF early", "HhON early"), c("HhOFF late", "HhON late"))

stat_compare_means(method = "anova", label.y = 1.29) + # global
stat_compare_means(comparisons = my_comparisons, label.y = 1, label = "p.signif") + # paired
scale_y_continuous(limits = c(-0.52, 1.3))

代码汇总

tmp$group <- plyr::mapvalues(tmp$group, from = c("GFP- early","GFP+ early","GFP- late","GFP+ late"),
                                      to = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))

tmp$group <- factor(tmp$group, levels = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))

library(ggpubr)
my_comparisons <- list(c("HhOFF early", "HhON early"), c("HhOFF late", "HhON late"))

options(repr.plot.width=4, repr.plot.height=4)
p1 <- ggplot(tmp, aes(x=group, y=score, color=group)) + 
        geom_boxplot() + 
        theme_bw() +
        labs(x = "", y = "Pathway score
", title = "Fatty acid metabolism") +
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        theme(legend.position = "none") + 
        theme(axis.text.x  = element_text(face="plain", angle=30, size = 14, color = "black", vjust=0.6),
        axis.text.y  = element_text(size = 10),
        axis.title.y = element_text(size = 14)) +
        # scale_fill_manual(values=brewer.pal(9,"Paired"))
        scale_color_manual(values=brewer.pal(9,"Paired")[c(3,4,5,6)]) +
        stat_compare_means(method = "anova", label.y = 1.29) + # global
        stat_compare_means(comparisons = my_comparisons, label.y = 1, label = "p.signif") + # paired
        scale_y_continuous(limits = c(-0.52, 1.3))
p1

多图拼接

options(repr.plot.width=8, repr.plot.height=9)
cowplot::plot_grid(p1,p2,p3,p4,ncol = 2)

PDF出图

ggsave(filename = "HhOFF HhON metabolic pathways.pdf", width = 8, height = 9)

有些图不能这么保存，比如heatmap，这时就要用到pdf函数

# traditional save
pdf("manuscript/HSCR.cluster.heatmap.pdf", width=8, height=7)
p
dev.off()

lnkscape里修改文字【对齐，上下标等等】

OK, 一个准发表级的图就制作好了，可能还需要精修。

其余细节

点的类型

# change the border of point
geom_point(shape = 21, colour = "black", fill = "white", size = 5, stroke = 5)

把点拟合成线

stat_smooth(method = "loess", size = 1.1, se = F, span = 0.2)

散点图显示mean

stat_summary(fun.y=mean, geom="point", shape=20, size=7, color="black", fill="black") +

图例，比如改legend title，改点大小，去掉图例

labs(x = "
Transcriptional level",y = "
Post-transcriptional level", title = "", color = "Clinical score")

# change legend dot size
guides(colour = guide_legend(override.aes = list(size=10)))

# ggplot remove legend title
theme(legend.title = element_blank())

# position
theme(legend.text = element_text(size = 12), legend.position = c(0.8, 0.75))

# remove legend background
theme(legend.background=element_blank())

去掉legend的白色背景布，一步到位

theme(legend.title = element_blank(), legend.text = element_text(size = 11), legend.position = c(0.85, 0.15),
          legend.background = element_blank())

标题格式，比如居中

theme(plot.title = element_text(hjust = 0.5, size = 18))

去掉边框，轴线，刻度；去掉右上边框

# empty border, ticks, text
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_blank()) +
labs(x = "",y = "", title = "") +
theme(axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())

# remove top and right border
theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank())

坐标轴，比如修改起点，范围

# force y start from 0
scale_y_continuous(expand = c(0, 0), limits = c(0, NA))

去掉画布中的网格线条

# just remove inside grid
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

添加文本

# add text
annotate("text", label = "Wilcoxon test
P-value = 1.48e-12", x = 0.5, y =2, size = 6, colour = "black")

添加背景色

# add background color to mark different region
geom_rect(xmin=0, xmax=2.5, ymin=-2, ymax=-1, fill="#4DAF4A", alpha=1, color=NA)

修改填充颜色

# color
scale_color_manual(values=brewer.pal(9,"Set1")[c(1:5,7:9)])

精准控制圈图的两种alpha，比如venn图

scale_color_manual(values = sample.colors) +
scale_fill_manual(values = alpha(sample.colors, .2))

快速统计分析

# quick statistic testing
# Wilcoxon test
wilcox.test(subset(time.df,GeneSet=="Common risk")$Time,
subset(time.df,GeneSet=="L-HSCR specific")$Time, alternative = "two.sided")

# packages
# significance
https://github.com/const-ae/ggsignif

分组计算，如取mean，单列

# quick data process
# get group mean
weather %>% group_by(city) %>% summarise(mean_temperature = mean(temperature))

分组取mean，多列　　

d <- read.table(text=
'Name     Month  Rate1     Rate2
Aira       1      12        23
Aira       2      18        73
Aira       3      19        45
Ben        1      53        19
Ben        2      22        87
Ben        3      19        45
Cat        1      22        87
Cat        2      67        43
Cat        3      45        32', header=TRUE)

aggregate(d[, 3:4], list(d$Name), mean)

  Group.1    Rate1    Rate2
1    Aira 16.33333 47.00000
2     Ben 31.33333 50.33333
3     Cat 44.66667 54.00000

小数点保留，科学计数法

# format decimals
formatC(0.46, format = "e", digits = 1)
library(scales)
scientific(0.46, digits = 2)

查看默认的颜色 - 画图的结果数据

# see the colors in ggplot
# To see what colors are used to make your plot you can use function ggplot_build() and then look at data part of this object (in column colour are codes).
ggplot_build(p)$data

其他图种

配对的箱线图、柱状图、折线图 - 用于比较case和control

data

这里想加点需要用另一个函数geom_dotplot

    lineage	lineage.sub	stage	S.Score	G2M.Score	cc.score
    <chr>	<fct>	<chr>	<dbl>	<dbl>	<dbl>
ctrl_AAACCTGAGACATAAC	NP	NPlate	Control	-0.8162696	-0.98076576	-0.8162696
ctrl_AAACCTGCAAGTAATG	BP	BP	Control	0.3118349	-0.05584626	0.3118349
ctrl_AAACCTGCATGCTAGT	GP	GP	Control	0.4443853	0.27702244	0.4443853

# http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
library(ggpubr)

options(repr.plot.width=5, repr.plot.height=4)
p <- ggplot(cc.df, aes(x=lineage.sub, y=cc.score, fill=stage)) +
  geom_boxplot(position=position_dodge(1)) +
  geom_dotplot(binaxis='y', stackdir='center', position = "dodge", dotsize=0.15, binwidth=1/25, binpositions="all") +
  theme_bw() +
  labs(x = "", y = "Proliferation score
", title = "") +
  theme(axis.text.x  = element_text(face="plain", angle=0, size = 14, color = "black", vjust=0.6),
        axis.text.y  = element_text(size = 10),
        axis.title.y = element_text(size = 14)) +
  scale_fill_manual(values=c("blue","red")) +
  stat_compare_means(aes(group = stage), label = "p.signif", label.y = 4) +
  theme(legend.title = element_blank())
p

封装好的函数

ggbarplot(ToothGrowth, x = "dose", y = "len", add = "mean_se",
          color = "supp", palette = "jco", 
          position = position_dodge(0.8))+
  stat_compare_means(aes(group = supp), label = "p.signif", label.y = 29)
ggline(ToothGrowth, x = "dose", y = "len", add = "mean_se",
          color = "supp", palette = "jco")+
  stat_compare_means(aes(group = supp), label = "p.signif", 
                     label.y = c(16, 25, 29))

热图 - 最直观

# heatmap
https://jokergoo.github.io/ComplexHeatmap-reference/book/

热图骚操作

聚类热图怎么按自己的意愿调整分支的顺序？

平滑热图 - smooth heatmap

monocle里面的一种热图，很多顶刊都在用，也确实很漂亮。对应函数：plot_pseudotime_heatmap

问题是不够灵活，需要用monocle处理后才行，需要自定义一个处理函数。

小提琴图marker - 分布

stacked violin plot for visualizing single-cell data in Seurat

参见：mouse/singleCell/case/Kif7_ENCC/Kif7-integration/integration_public_and_Kif7.ipynb

Venn韦恩图/UpSetR图 - 交集

R绘制韦恩图 | Venn图 | UpSetR图

Beeswarm Plot 蜂群图 - 序列数据展开

https://github.com/eclarke/ggbeeswarm

#With different beeswarm point distribution priority
dat <- data.frame(x=rep(1:3,c(20,40,80)))
dat$y <- rnorm(nrow(dat),dat$x)
dat$z <- 1

ggplot(dat, aes(z,y)) + 
    geom_beeswarm(size=2,priority='descending', cex=3) + 
    ggtitle('Descending') + 
    scale_x_continuous(expand=expansion(add=c(0.5, 0.5)))

我的代码

set.seed(49)
library(ggplot2)
library(ggbeeswarm)

pca_HSCR2$z <- 1
pca_HSCR2$pseudotime <- -pca_HSCR2$X2

options(repr.plot.width=6, repr.plot.height=4)
ggplot(pca_HSCR2, aes(x=z, y=pseudotime, fill=severity, color=severity)) + 
    geom_beeswarm(size=1.2,priority='ascending', cex=1.4) + 
    # ggtitle('ascending') + # Descending
    scale_x_continuous(expand=expansion(add=c(0.5, 0.5))) +
    coord_flip() +
    theme_void() +
    scale_color_manual(values=severity.colors)

基因模块在pseudotime表达的line图

参见：mouse/singleCell/case/Kif7_ENCC/Kif7/Kif7_basic_analysis.ipynb

火山图/对角线图 - 特殊散点图　　

参考：mouse/singleCell/case/Kif7_ENCC/Kif7-integration/Ezh2_analysis.ipynb

# prepare data
log2FC <- data.frame(gene=rownames(HSCR.DEG.log2FC.df.final), S_log2FC=S.log2FC, L_log2FC=L.log2FC)

# add color label
log2FC$color <- "none"
log2FC[log2FC$gene %in% c("HDAC1"),]$color <- "red"

# the genes want to be labeled
label.genes <- c('RAMP2', 'HEY1', 'STAMBP', 'CCNB1IP1', 'LMOD3', 'NUP107', 'HEY2', 'FOXO1', 'CRLF1', 'ZFP36L2', 'NR2F2', 'TUBB3', 
                'ZNF385A', 'TMEM14C', 'FLNA', 'TFAP2A', 'SOX11', 'HDAC1', 'GLI3', 'BCL11A')
label.df <- subset(log2FC, gene %in% label.genes)

options(repr.plot.width=4.5, repr.plot.height=5)
library(ggplot2)
library("ggrepel")
# Basic scatter plot
ggplot(log2FC, aes(x=S_log2FC, y=L_log2FC, color=color)) + # , color=coregene
    geom_hline(yintercept=0) +
    geom_vline(xintercept=0) +
    geom_abline(intercept = 0, slope = 1, color="black", linetype="dashed", size=1) +
    geom_point(size=0.5) +
    geom_point(data = label.df, size=2, color = "red") +
    theme_bw() +
    labs(x = "
Log2FC in S-HSCR",y = "Log2FC in L-HSCR", title = "") +
    theme(legend.title=element_blank()) +
    # Change fontface. Allowed values : 1(normal), 2(bold), 3(italic), 4(bold.italic)
    geom_text_repel(data=label.df, aes(label = gene), size = 3.5, fontface=3, color="red",
                    box.padding = 0.4, max.overlaps = Inf) +
    theme(legend.position = "none", 
        axis.text  = element_text(size = 10),
        # axis.text.y  = element_text(size = 10),
        axis.title = element_text(size = 16, face="plain")) +
    scale_x_continuous(limits = c(-8, 8)) +
    scale_y_continuous(limits = c(-8, 8)) +
    scale_color_manual(values=c("grey","red"))

点的文本标记

geom_text_repel，基本用法

进阶篇 - 风格统一

为什么顶刊的图那么的赏心悦目？而自己的图拼到一起却那么的不和谐，都被自己丑哭了，却不知从何下手。

这里有几个教程还不错：

文章用图的修改和排版