R版本与运行环境信息
Date:2021-7-22R version 4.0.3 (2020-10-10)Platform: x86_64-w64-mingw32/x64 (64-bit)Running under: Windows 10 x64 (build 18363)
veen图
使用ggVennDiagram绘制veen图
#install.package("ggVennDiagram")library(tidyverse)library(ggVennDiagram)library(ggsci)#构建数据genes <- paste0("gene",1:1000)set.seed(20210502)#将维恩图的每个圈里的数据放到一个list中gene_list <- list(A = sample(genes,100),B = sample(genes,200),C = sample(genes,300),D = sample(genes,200))#绘制韦恩图ggVennDiagram(gene_list,#指定每个list的名字category.names = c("A","B","C","D"),#指定每个名字的颜色set_color = c("red1","red2","red3","red4"),#指定每个名字的大小set_size = 10,#指定线的类型,具体线条类型见下edge_lty = "dashed",#指定线的粗细edge_size = 1,#指定标签的内容: counts(仅仅计数结果),percent(百分比),both(全部显示),nonelabel = "both",#指定label的颜色label_color = "black",#标签的透明度label_alpha = 0.5) +#具体配色代码见下方scale_fill_distiller(palette = "RdBu") +#自行指定外框的颜色scale_color_manual(values = c(rep("black",4)))

R的线条类型与点类型以及调色板
#install.packages("ggpubr")a <- ggpubr::show_point_shapes()b <- ggpubr::show_line_types()cowplot::plot_grid(a,b)

#调色板RColorBrewer::display.brewer.all()

Upset图
韦恩图不适合展示维度过高的数据,个人认为最多四个维度,当维度更高时,用Upset图会更好
基本使用
- 基本用法, 使用数据集为package里自带的
movies数据集
#install.package("UpSetR")library(UpSetR)#载入数据集movies <- read.csv( system.file("extdata", "movies.csv", package = "UpSetR"), header=TRUE, sep=";" )#简单查看数据集,数据集第一列为电影的名称,第二列为发行年份,第三列往后为电影类型,1代表为对应类型,> head(movies)Name ReleaseDate Action Adventure Children Comedy Crime Documentary Drama Fantasy Noir Horror1 Toy Story (1995) 1995 0 0 1 1 0 0 0 0 0 02 Jumanji (1995) 1995 0 1 1 0 0 0 0 1 0 03 Grumpier Old Men (1995) 1995 0 0 0 1 0 0 0 0 0 04 Waiting to Exhale (1995) 1995 0 0 0 1 0 0 1 0 0 05 Father of the Bride Part II (1995) 1995 0 0 0 1 0 0 0 0 0 06 Heat (1995) 1995 1 0 0 0 1 0 0 0 0 0Musical Mystery Romance SciFi Thriller War Western AvgRating Watches1 0 0 0 0 0 0 0 4.15 20772 0 0 0 0 0 0 0 3.20 7013 0 0 1 0 0 0 0 3.02 4784 0 0 0 0 0 0 0 2.73 1705 0 0 0 0 0 0 0 3.01 2966 0 0 0 0 1 0 0 3.88 940#绘制Upset图upset(data = movies,sets = c("Drama", "Comedy", "Action", "Thriller", "Western", "Documentary"))
data =: 指定数据集sets =: 指定作图的数据
#图表的左下角为指定的六个数据集合的大小,即所有上映的电影中属于各个类型电影的数量> movies %>% select(c("Drama", "Comedy", "Action", "Thriller", "Western", "Documentary")) %>% colSums()Drama Comedy Action Thriller Western Documentary1603 1200 503 492 68 127#右上角的图是对应右下方的集合中的元素的个数,如第一根柱子为1184,代表仅属于Daram类型的电影有1184部,第七根柱子则代表同时属于Drama和Comedy的电影为211部

展示指定部分
- 展示指定部分(intersections)
使用queries选项来对指定的数据进行展示,默认提供intersections和elements两种展示方式,首先是intersections,对指定的交集进行展示
命令格式,query为指定展示数据的方式, 数据类型为列表,params, 指定展示哪个交集,数据类型为列表,列表内的元素为需要展示部分的数据名称,color为指定展示的颜色,active为是否对柱子着色,=T代表上色,=F则在柱子上用一个与color指定的一致颜色的小三角表示
命令格式queries = list(query = intersects, params = list(a,b,c,d), color = "", active = T)
#展示"Action","Comedy", "Drama"三种类型的交集#展示"Drama", "Comedy"两种类型的交集#展示"Western","Drama"两种类型的交集,但是柱子不填充颜色#展示"Action"特有的部分upset(data = movies,sets = c("Drama", "Comedy", "Action", "Thriller", "Western", "Documentary"),queries = list(list(query = intersects,params = list("Action","Comedy", "Drama"),color = "red1",active = T),list(query = intersects,params = list("Drama", "Comedy"),color = "blue2",active = T),list(query = intersects,params = list("Western","Drama"),color = "green2",active = F)))

展示指定部分在某因子上的水平
- 展示某部分在某因子上的水平(elements)
命令格式
queries = list(query = elements, params = list("factors",b,c,d), color = "", active = T)
#展示各个交集在分布在1998和1995年的数量upset(data = movies,sets = c("Drama", "Comedy", "Action", "Thriller", "Western", "Documentary"),queries = list(list(query = intersects,params = list("Action","Comedy", "Drama"),color = "red1",active = T),list(query = intersects,params = list("Drama", "Comedy"),color = "blue2",active = T),list(query = intersects,params = list("Western","Drama"),color = "green2",active = F),list(query = intersects,params = list("Action"),color = "pink",active = T),list(query = elements, params = list("ReleaseDate", 1998,1995),color = "purple",active = T)))

增加箱线图
- 可以通过指定因子来展示在不同因子上,每个交集的分布情况,如,通过箱线图展示在1920-2000年间,不同的intersects在每所有年份的分布情况
boxplot.summary = "factors": 可以指定多个因子
upset(data = movies,sets = c("Drama", "Comedy", "Action", "Thriller", "Western", "Documentary"),queries = list(list(query = intersects,params = list("Action","Comedy", "Drama"),color = "red1",active = T),list(query = intersects,params = list("Drama", "Comedy"),color = "blue2",active = T),list(query = intersects,params = list("Western","Drama"),color = "green2",active = F),list(query = intersects,params = list("Action"),color = "pink",active = T),list(query = elements, params = list("ReleaseDate", 1998,1995),color = "purple",active = T)),boxplot.summary = "ReleaseDate")

增加其他图表
- 通过
attribute.plots可以增加其他的图表,如直方图,点图,箱线图等等……
选项用法:attribute.plots = list(),可以通过构建函数的方法进行绘图,每张图片放在一个list中,gridrows为图的高度,ncols用于控制列数
使用方法
#添加一个直方图,表示每个年份的电影数量情况#首先构建一个绘制直方图的functionplot1 <- function(data,x){plot.his <- (ggplot(data,aes_string(x=x,fill = "color"))) +geom_histogram(bins = 30)+scale_fill_identity() +labs(y = "Number of movies",x = "Years") +theme_bw()}#将直方图写入attribute.plots,其中x= "ReleaseDate"为传给x的参数,queries = TRUE表示将上面展示的数据同样展示在直方图中,upset(data = movies,sets = c("Drama", "Comedy", "Action", "Thriller", "Western", "Documentary"),queries = list(list(query = intersects,params = list("Action","Comedy", "Drama"),color = "red1",active = T),list(query = intersects,params = list("Drama", "Comedy"),color = "blue2",active = T),list(query = intersects,params = list("Western","Drama"),color = "green2",active = F),list(query = intersects,params = list("Action"),color = "pink",active = T),list(query = elements, params = list("ReleaseDate", 1998,1995),color = "purple",active = T)),attribute.plots = list(gridrows = 55,plots = list(list(plot = plot1,x= "ReleaseDate", queries = TRUE)),ncols = 1))

#添加一个散点图(如,Avgrating和观看人数的关系),同样是先构建绘图函数plot2 <- function(data,x,y){polt.point <- (ggplot(data,aes_string(x=x,y=y,color = "color"))) +geom_point() +scale_color_identity() +theme_bw()}##将散点图图写入attribute.plotsupset(data = movies,sets = c("Drama", "Comedy", "Action", "Thriller", "Western", "Documentary"),queries = list(list(query = intersects,params = list("Action","Comedy", "Drama"),color = "red1",active = T),list(query = intersects,params = list("Drama", "Comedy"),color = "blue2",active = T),list(query = intersects,params = list("Western","Drama"),color = "green2",active = F),list(query = intersects,params = list("Action"),color = "pink",active = T),list(query = elements, params = list("ReleaseDate", 1998,1995),color = "purple",active = T)),attribute.plots = list(gridrows = 55,plots = list(list(plot = plot1,x= "ReleaseDate", queries = TRUE),list(plot = plot2,x = "AvgRating",y = "Watches",queries = TRUE)),ncols = 2))

#增加一个抖动图plot3 <- function(data,x,y){polt.jit <- (ggplot(data,aes_string(x=x,y=y,color = "color"))) +geom_jitter() +scale_color_identity() +theme_bw()}##将抖动图写入attribute.plotsupset(data = movies,sets = c("Drama", "Comedy", "Action", "Thriller", "Western", "Documentary"),queries = list(list(query = intersects,params = list("Action","Comedy", "Drama"),color = "red1",active = T),list(query = intersects,params = list("Drama", "Comedy"),color = "blue2",active = T),list(query = intersects,params = list("Western","Drama"),color = "green2",active = F),list(query = intersects,params = list("Action"),color = "pink",active = T),list(query = elements, params = list("ReleaseDate", 1998,1995),color = "purple",active = T)),attribute.plots = list(gridrows = 55,plots = list(list(plot = plot1,x= "ReleaseDate", queries = TRUE),list(plot = plot2,x = "AvgRating",y = "Watches",queries = TRUE),list(plot = plot3,x = "Watches",y = "AvgRating")),ncols = 3))

