qq plot 是为了检验实际分布与假设分布是否相同。
我的数据来自于plink处理后生成的p值,所以不同的分布根据实际情况修改。
下面函数来自于 qqman 包,可定制性太差,就改下用ggplot2画。
qq = function(pvector, ...) {
# Check for sensible input
if (!is.numeric(pvector)) stop("Input must be numeric.")
# limit to not missing, not nan, not null, not infinite, between 0 and 1
pvector <- pvector[!is.na(pvector) & !is.nan(pvector) & !is.null(pvector) & is.finite(pvector) & pvector<1 & pvector>0]
# Observed and expected
o = -log10(sort(pvector,decreasing=FALSE))
e = -log10( ppoints(length(pvector) ))
# # The old way
# plot(e, o, pch=20,
# xlab=expression(Expected~~-log[10](italic(p))),
# ylab=expression(Observed~~-log[10](italic(p))),
# ...)
# The new way to initialize the plot.
## See http://stackoverflow.com/q/23922130/654296
## First, define your default arguments
def_args <- list(pch=20, xlim=c(0, max(e)), ylim=c(0, max(o)),
xlab=expression(Expected~~-log[10](italic(p))),
ylab=expression(Observed~~-log[10](italic(p)))
)
## Next, get a list of ... arguments
#dotargs <- as.list(match.call())[-1L]
dotargs <- list(...)
## And call the plot function passing NA, your ... arguments, and the default
## arguments that were not defined in the ... arguments.
tryCatch(do.call("plot", c(list(x=e, y=o), def_args[!names(def_args) %in% names(dotargs)], dotargs)), warn=stop)
# Add diagonal
abline(0,1,col="red")
}
使用 ggplot2 作图
qq_dat <- data.frame(obs=-log10(sort(pd_dat$P,decreasing=FALSE)),
exp=-log10( ppoints(length(pd_dat$P))))
pd_qq <- ggplot(data=qq_dat,aes(exp,obs))+
geom_point(alpha=0.7,color="#7F7F7FFF")+
geom_abline(color="#D62728FF")+
xlab("Expected -log10(P-value)")+
ylab("Observed -log10(P-value)")+
scale_x_continuous(limits = c(0,7))+
scale_y_continuous(limits = c(0,7))+
theme(
axis.title = element_text(size=12,face="bold"),
axis.text = element_text(face="bold",size=8,color = "black"),
#axis.line = element_line(size=0.8,color="black"),
axis.ticks= element_line(size=0.8,colour = "black"),
panel.grid =element_blank(),
panel.border = element_rect(fill=NA,size = 0.8),
panel.background = element_blank())