LuluShi Blog
open main menu
Part of series:Visualize

QQ图的各种画法

/ 4 min read

各种qq图的画法,简单示例

CMplot: Single trait

CMplot(pig60K,plot.type="q",box=FALSE,file="jpg",file.name=NULL,dpi=300,
    conf.int=TRUE,conf.int.col=NULL,threshold.col="red",threshold.lty=2,
    file.output=TRUE,verbose=TRUE,width=5,height=5)

cm_sqq

CMplot: Multi traits

CMplot(pig60K,plot.type="q",col=c("dodgerblue1", "olivedrab3", "darkgoldenrod1"),multraits=TRUE,
        threshold=1e-6,ylab.pos=2,signal.pch=c(19,6,4),signal.cex=1.2,signal.col="red",
        conf.int=TRUE,box=FALSE,axis.cex=1,file="jpg",file.name=NULL,dpi=300,file.output=TRUE,
        verbose=TRUE,ylim=c(0,8),width=5,height=5)

cm_mqq

Custom Script: single trait

qqplot = function(pval, ylim=0, lab=1.4, axis=1.2){
    par(mgp=c(5,1,0))
    p1 <- pval 
    p2 <- sort(p1)
    n  <- length(p2)
    k  <- c(1:n)
    alpha <- 0.05
    lower <- qbeta(alpha/2, k, n+1-k)
    upper <- qbeta((1-alpha/2), k, n+1-k)
    expect <- (k-0.05)/n 
    biggest <- ceiling(max(-log10(p2), -log10(expect)))
    xlim <- max (-log10(expect)+0.1);
    if (ylim==0) ylim=biggest;
    plot(-log10(expect), -log10(p2), xlim=c(0, xlim), ylim=c(0, ylim),
        ylab=expression(paste("Observed ", "-", log[10], "(", italic(P), "-value)", sep="")),
        xlab=expression(paste("Expected ", "-", log[10], "(", italic(P), "-value)", sep="")),
        type="n", mgp=c(2,0.5,0), tcl=-0.3, bty="n", cex.lab=lab, cex.axis=axis)
    polygon(c(-log10(expect), rev(-log10(expect))), c(-log10(upper), rev(-log10(lower))),
            col=adjustcolor("grey", alpha.f=0.5), border=NA)
    abline(0,1,col="white", lwd=2)
    points(-log10(expect), -log10(p2), pch=20, cex=0.6, col=2)
}

qq

Custom Script: multi trait

qqplot_multi <- function(pval_list, ylim=0, lab=1.4, axis=1.2, p_floor=1e-300, cols=NULL, pch=20, cex=0.6, 
legend=TRUE, legend_pos="topleft", legend_cex=0.9, show_ci=TRUE, ci_scale=1, ci_alpha=0.6, ci_border=TRUE, diag_col="white", ...) {
    if (!is.list(pval_list)) pval_list=as.list(as.data.frame(pval_list))
    trait = names(pval_list)
    if(is.null(trait) || any(trait == "")){
        trait=paste0("Trait", seq_along(pval_list))
        names(pval_list)=trait
    }

    m = length(pval_list)
    if(m < 1) stop("pval_list must contain at least one trait ! ! !")
    
    recycle1m = function(x, nm, name){
        if (length(x) == 1) return(rep(x, nm))
        if (length(x) == nm) return(x)
        stop(name, " length must be 1 or ", nm, ".")
    }
    
    ci_scale = recycle1m(ci_scale , m, "ci_xfrac")
    ci_scale[ci_scale <= 0] = 0.01
    ci_scale[ci_scale > 1]  = 1

    if(is.null(cols)){
        if ("hcl.colors" %in% getNamespaceExports("grDevices")){
            cols = grDevices::hcl.colors(m, palette="Dark 3")
        } else {
            cols = grDevices::rainbow(m)
        }
    }
    cols = recycle1m(cols, m, "cols")
    pch  = recycle1m(pch, m, "pch")
    
    lighten_col = function(col, amount=0.65, alpha=ci_alpha) {
        rgb  = grDevices::col2rgb(col) / 255
        rgb2 = rgb + (1-rgb) * amount
        grDevices::rgb(rgb2[1,], rgb2[2,], rgb2[3,], alpha=alpha)
    }

    dat=vector("list", m)
    for(i in seq_len(m)){
        p = pval_list[[i]]
        p = p[is.finite(p) & !is.na(p)]
        p = p[p <= 1 & p >= 0]
        if (length(p) == 0) stop("Trait '", trait[i], "' has no valid P in [0,1].")
        p[p == 0] = p_floor

        p2 = sort(p)
        n  = length(p2)
        k  = seq_len(n)
        alpha = 0.05
        lower = qbeta(alpha/2, k, n+1-k)
        upper = qbeta(1 - alpha/2, k, n+1-k)
        expect = (k-0.05) / n 
        expect[expect <= 0] = min(expect[expect > 0])

        dat[[i]] = list(x=-log10(expect), y=-log10(p2), y_low=-log10(upper), y_high=-log10(lower), n=n)
    }

    xlim = max(vapply(dat, function(d) max(d$x, na.rm=TRUE), numeric(1))) + 0.1
    ymax = max(vapply(dat, function(d) max(d$y, d$y_high, na.rm=TRUE), numeric(1)))
    if (ylim==0) ylim=ceiling(ymax)

    par(mgp=c(5,1,0))
    plot(NA, xlim=c(0, xlim), ylim=c(0, ylim),
    ylab=expression(paste("Observed", " -", log[10], "(", italic(P), "-value", ")", sep="")),
    xlab=expression(paste("Expected", " -", log[10], "(", italic(P), "-value", ")", sep="")),
    type="n", mgp=c(2,0.5,0), tcl=-0.3, bty="n", cex.lab=lab, cex.axis=axis, ...)

    if (show_ci){
        for(i in seq_len(m)){
            d = dat[[i]]
            o = order(d$x)
            x = d$x[o]
            ylow  = d$y_low[o]
            yhigh = d$y_high[o]
            x_ci  = x * ci_scale[i]
            ylow  = ylow * ci_scale[i]
            yhigh = yhigh * ci_scale[i]

            ci_col = lighten_col(cols[i])
            if (ci_border) {
                border=cols[i]
            } else {
                border=NA
            }
            polygon(c(x_ci, rev(x_ci)), c(ylow, rev(yhigh)), col=ci_col, border=border)
        }
    }
    abline(0, 1, col=diag_col, lwd=2)

    for(i in seq_len(m)){
        d = dat[[i]]
        points(d$x, d$y, pch=pch[i], cex=cex, col=cols[i])
    }

    if (legend){
        legend(legend_pos, legend=trait, col=cols, pch=pch, pt.cex=cex, bty="n", cex=legend_cex)
    }
    invisible(dat)
}

example for multi trait

set.seed(20260108)

N <- 50000

# 1) 纯零假设(均匀分布)
p_null <- runif(N)

# 2) “多基因/少量真实信号”:97% null + 3% 偏小p
p_poly <- runif(N)
sig_poly <- rbinom(N, 1, 0.03) == 1
p_poly[sig_poly] <- rbeta(sum(sig_poly), shape1 = 0.4, shape2 = 1)  # shape1<1 会产生更多小p

# 3) “膨胀”:|Z|整体偏大(sd>1),p会更偏小但不一定有真实峰
z_inf <- rnorm(N, mean = 0, sd = 1.2)
p_infl <- 2 * pnorm(-abs(z_inf))

# 4) “强信号”:1% 极强关联(生成极小p),并故意加入几个0
p_strong <- runif(N)
idx_strong <- sample.int(N, size = round(0.01 * N))
p_strong[idx_strong] <- 10^(-runif(length(idx_strong), min = 6, max = 10)) # 1e-6 到 1e-30
# p_strong[sample.int(N, 5)] <- 0  # 故意放几个0,测试防 Inf

pvals_list <- list(
  Null      = p_null,
  Polygenic = p_poly,
  Inflated  = p_infl,
  Strong    = p_strong
)

绘图

png("qqplot_multi.png", width=2400, height=2400, res=500, type="cairo")
qqplot_multi(
  pvals_list,
  cols = c("#1b9e77", "#d95f02", "#7570b3", "#e7298a"),
  pch  = c(16, 17, 15, 18),
  cex  = c(0.8, 0.8, 0.8, 0.8),
  ci_scale = seq(0.8, 1.0, length.out = 4),
  ci_alpha=0.6)
dev.off()

1767864721368