LuluShi Blog
open main menu
Part of series:Visualize

GWFM结果可视化

/ 7 min read

GWFM

When you want draw locus plot of GWFM, follow scripts will meet your require

plot_GWFM.r

# ===========================================================================================//
# @Author  :    Loren Shi
# @Time    :    2025/06/05 15:42:36
# @File    :    plot_GWFM.r
# @Mails   :    crazzy_rabbit@163.com
# @line    :    https://github.com/Crazzy-Rabbit
#
# R script to draw regional plot for genome wide fine mapping SNPs.
#
# Part of code are adopted from plot_smr script which provided by SMR
#
# Amendment:
#  2025/06/05
#    1. script completed 
#    2. first realeased
#    3. fixed teh bug where the drawn gene was not in the middle of the genelayer when nrowgene=1
#
# Usages:
#     source("plot_GWFM.r")
#     PData <- ReadPvalueFromFiles(gwas="gwas_chrpos.gz", gwfm="gwaf.snpRes", glist="glist_hg19_refseq.txt", windowsize=200000, highlight="rs641221")
#     pdf('gwfm_plot.pdf',width = 8,height = 8)
#     MultiPvalueLocusPlot(data=PData)
#     dev.off()
# ===========================================================================================//
is.installed <- function(mypkg){
    is.element(mypkg, installed.packages()[,1])
}
# check if package "TeachingDemos" is installed
if (!is.installed("TeachingDemos")){
    install.packages("TeachingDemos");
}
library(TeachingDemos)
library(data.table)

# parmeters for plot
genemove=0.01; txt=1.1; cex=1.3; lab=1.1; axis=1; top_cex=1.2;

GeneRowNum = function(GENELIST) {
    BP_THRESH = 0.03; MAX_ROW = 5
    # get the start and end position
    GENELIST = GENELIST[!duplicated(GENELIST$GENE),]
    START1 = as.numeric(GENELIST$GENESTART); 
    END1 = as.numeric(GENELIST$GENEEND)
    STRLENGTH = nchar(as.character(GENELIST$GENE))
    MIDPOINT = (START1 + END1)/2
    START2 = MIDPOINT-STRLENGTH/250; 
    END2 = MIDPOINT+STRLENGTH/250
    START = cbind(START1, START2); 
    END = cbind(END1, END2);
    START = apply(START, 1, min); 
    END = apply(END, 1, max)
    GENELIST = data.frame(GENELIST, START, END)
    GENELIST = GENELIST[order(as.numeric(GENELIST$END)),]
    START = as.numeric(GENELIST$START); END = as.numeric(GENELIST$END)
    # get the row index for each gene
    NBUF = dim(GENELIST)[1]
    ROWINDX = rep(1, NBUF)
    ROWEND = as.numeric(rep(0, MAX_ROW))
    MOVEFLAG = as.numeric(rep(0, NBUF))
    if(NBUF>1) {
        for( k in 2 : NBUF ) {
            ITERFLAG=FALSE
            if(START[k] < END[k-1]) {
                INDXBUF=ROWINDX[k-1]+1
            } else INDXBUF = 1
            if(INDXBUF>MAX_ROW) INDXBUF=1;
            REPTIME=0
            repeat{
                if( ROWEND[INDXBUF] > START[k] ) {
                    ITERFLAG=FALSE
                    INDXBUF=INDXBUF+1
                    if(INDXBUF>MAX_ROW) INDXBUF = 1
                } else {
                    ITERFLAG=TRUE
                }
                if(ITERFLAG) break;
                REPTIME = REPTIME+1
                if(REPTIME==MAX_ROW) break;
            }
            ROWINDX[k]=INDXBUF;

            if( (abs(ROWEND[ROWINDX[k]]-START[k]) < BP_THRESH)
            | ((ROWEND[ROWINDX[k]]-START[k])>0) ) {
                MOVEFLAG[k] = 1
                SNBUF = tail(which(ROWINDX[c(1:k)]==ROWINDX[k]), n=2)[1]
                MOVEFLAG[SNBUF] = MOVEFLAG[SNBUF] - 1
            }
            if(ROWEND[ROWINDX[k]]<END[k]) {
                ROWEND[ROWINDX[k]] = END[k]  }
        }
    }
    GENEROW = data.frame(as.character(GENELIST$GENE),
    as.character(GENELIST$ORIENTATION),
    as.numeric(GENELIST$GENESTART),
    as.numeric(GENELIST$GENEEND),
    ROWINDX, MOVEFLAG)
    colnames(GENEROW) = c("GENE", "ORIENTATION", "START", "END", "ROW", "MOVEFLAG")
    return(GENEROW)
}

ReadPvalueFromFiles <- function(gwas, gwfm, glist, windowsize=500000, highlight) {
    gwas1 = fread(gwas)[, .(CHR, POS, SNP, p)]
    colnames(gwas1) = c("CHR", "BP", "SNP", "p")
    gwas1$CHR = as.character(gwas1$CHR)
    gwas2 = fread(gwfm)[, .(Chrom, Position, Name, PIP)]
    colnames(gwas2) = c("CHR", "BP", "SNP", "p")
    gwas2$CHR = as.character(gwas2$CHR)

    snp_gwas1 = gwas1[gwas1$SNP == highlight, ];
    chrom = unique(snp_gwas1$CHR)
    if (nrow(snp_gwas1) == 0) {stop("highlight SNP not found in file1, please check it!")} 
    BP1 = snp_gwas1$BP
    start1 = BP1 - windowsize;
    end1 = BP1 + windowsize;
    file1 = gwas1[gwas1$CHR == chrom & gwas1$BP > start1 & gwas1$BP < end1, ]

    snp_gwas2 = gwas2[gwas2$SNP == highlight, ];
    if (nrow(snp_gwas2) == 0) {stop("highlight SNP not found in file2, please check it!")} 
    BP2 = snp_gwas2$BP
    start2 = BP2 - windowsize;
    end2 = BP2 + windowsize;
    file2 = gwas2[gwas2$CHR == chrom & gwas2$BP > start2 & gwas2$BP < end2, ]
  
    start = max(c(start1, start2), na.rm=T) / 1e6;
    end = max(c(end1, end2), na.rm=T) / 1e6;
    glist = fread(glist)
    glist[,2] = glist[,2]/1e6;
    glist[,3] = glist[,3]/1e6;
    colnames(glist) = c("CHR", "GENESTART",  "GENEEND",  "GENE", "ORIENTATION")
    glist = glist[glist$CHR == chrom & glist$GENESTART >= start & glist$GENEEND <= end, ]

    return_lsit = list(file1=file1, file2=file2, SNP=highlight, glist=glist, CHR=chrom)
}


MultiPvalueLocusPlot <- function(data) {
    gwas1 = data$file1;
    gwas2 = data$file2;

    pXY1 = -log10(gwas1$p);
    yMAX = ceiling(max(pXY1, na.rm=T)) + 1;

    pXY2 = gwas2$p;
    yMAX2 = ceiling(max(pXY2, 1, na.rm=T));

    glist = data$glist;
    generow = GeneRowNum(glist);
    num_row = max(as.numeric(generow$ROW));
    offset_map = ceiling(yMAX);
    offset_map = max(offset_map, num_row*2.5);
    offset_pip = yMAX / 2.5;
    dev_axis = 0.1*yMAX;
    if (dev_axis < 1.5) dev_axie=1.5;
    yaxis.min = -offset_map - offset_pip - dev_axis - yMAX;
    yaxis.max = yMAX + ceiling(offset_pip) + 1;
    gwasBP1 = as.numeric(gwas1$BP) / 1e6;
    gwasBP2 = as.numeric(gwas2$BP) / 1e6;
    xmin = min(c(gwasBP1, gwasBP2), na.rm=T) - 0.001;
    xmax = max(c(gwasBP1, gwasBP2), na.rm=T) + 0.001;
  
  
    xlab = paste("Chromsome ", data$CHR, " (Mb)")
    #------------------- plot gwas layer ----//
    ylab1 = expression(-log[10] (italic(P) * " GWAS"))
    par(mar=c(5,5,3,2), xpd=TRUE);
    plot(gwasBP1, pXY1, pch=20, xaxt="n", yaxt="n", bty="n", ylim=c(yaxis.min, yaxis.max), 
    xlim=c(xmin, xmax), xlab=xlab, ylab="", cex.lab=lab, cex.axis=axis,cex=0.6, col="gray68");
    devbuf1 = yMAX/4;
    xticks = round(seq(xmin, xmax, length.out=6), 2);
    axis(1, at=xticks, lwd=1);
    axis(2, at=seq(0, yMAX, by=devbuf1), labels=round(seq(0, yMAX, devbuf1), 0), las=1, lwd=1);
    mtext(ylab1, side=2, line=3, at=(yMAX*1/2));
    segments(x0=xmin, y0=-0.5, x1=xmax, y1=-0.5, col="grey50", lwd=1, lty=2);
  
    snp1 = gwas1[SNP == data$SNP, ]
    snpBP1 = snp1$BP / 1e6;
    snpP1 = -log10(snp1$p);

    points(snpBP1, snpP1, pch="*", col="peru", cex=2);
    text(snpBP1, snpP1, labels=snp1$SNP, col="gold", pos=3, cex=0.8, offset=0.5);

    #------------------- plot PIP layer ----//
    ylab2 = "PIP (SBayesRC)"
    axis.start = 0;
    axis.start = axis.start - yMAX - offset_pip - dev_axis;
    pXY2buf = pXY2 / yMAX2*yMAX + axis.start;
    par(new=TRUE);
    plot(gwasBP2, pXY2buf, pch=20, xaxt="n", yaxt="n", bty="n", ylim=c(yaxis.min, yaxis.max), 
    xlim=c(xmin, xmax), ylab="", xlab="", cex.lab=lab, cex.axis=axis, cex=0.6, col="navy");
    devbuf2 = yMAX2/5;
    axis(2, at=seq(axis.start, (axis.start+yMAX), yMAX/5), labels=seq(0, yMAX2, devbuf2), las=1, lwd=1);
    mtext(ylab2, side=2, line=3, at=((axis.start + axis.start + yMAX)/2));
    segments(x0=xmin, y0=axis.start-0.5, x1=xmax, y1=axis.start-0.5, col="grey50", lwd=1, lty=2);
  
    snp2 = gwas2[SNP == data$SNP, ]
    snpBP2 = snp2$BP / 1e6;
    snpP2 = snp2$p / yMAX2*yMAX + axis.start;

    points(snpBP2, snpP2, pch="*", col="peru", cex=2);
    #------------------- plot gene layer ----//
    num_gene = dim(generow)[1]
    dist = offset_map / num_row;
    for (k in 1:num_row) {
        generowbuf = generow[which(as.numeric(generow[, 5]) == k), ]
        xstart = as.numeric(generowbuf[, 3])
        xend = as.numeric(generowbuf[, 4])
        snbuf = which(xend - xstart < 1e-3)
        if (length(snbuf) > 0) {
            xstart[snbuf] = xstart[snbuf] - 0.0025
            xend[snbuf] = xend[snbuf] + 0.0025
        }
        xcenter = (xstart+xend)/2
        xcenter = spread.labs(xcenter, mindiff=0.01, maxiter=1000, min=xmin, max=xmax)
        num_genebuf = dim(generowbuf)[1]
        if (num_row == 1) {
            for (l in 1:num_genebuf) {
                ofs=0.3;
                if (l%%2 == 0) ofs=-0.8;
                ypos = offset_map/2 + yaxis.min - 1;
                code = 1;
                if (generowbuf[l,2] == "+") code=2;
                arrows(x0=xstart[l], y0=ypos, x1=xend[l], y1=ypos, code=code, length=0.07, ylim=c(yaxis.min, yaxis.max),
                col=colors()[75], lwd=1)
                movebuf = as.numeric(generowbuf[l, 6]) * genemove
                text(x=xcenter[l]+movebuf, y=ypos, label=substitute(italic(genename), list(genename=as.character(generowbuf[l,1]))), pos=3, offset=ofs, col="black", cex=0.9)
            }
        } else if (num_row > 1) {
            for (l in 1:num_genebuf) {
                ofs=0.3
                if(l%%2==0) ofs=-0.8
                m = num_row - k
                ypos = m*dist + yaxis.min
                code = 1;
                if (generowbuf[l,2] == "+") code = 2;
                arrows(x0=xstart[l], y0=ypos, x1=xend[l], y1=ypos, code=code, length=0.07, ylim=c(yaxis.min, yaxis.max),
                col=colors()[75], lwd=1)
                movebuf = as.numeric(generowbuf[l,6]) * genemove
                text(x=xcenter[l]+movebuf, y=ypos, label=substitute(italic(genename), list(genename=as.character(generowbuf[l,1]))), pos=3, offset=ofs, col="black", cex=0.9)
            }
        }
    }
}

example data

set.seed(123)

n_snps <- 10000
n_genes <- 10

## SNP pos
positions <- as.integer(seq(500000, 2500000, length.out = n_snps))
snp_ids <- paste0("rs", 1000000 + 0:(n_snps - 1))

## highlight SNP(GWAS)
highlight_snp <- data.frame(
  CHR = "2",
  POS = 1250000,
  SNP = "rs888888",
  p = 1e-30
)

## GWAS summary
pvals <- pmax(rbeta(n_snps, 0.4, 5), 1e-8)
gwas_df <- data.frame(
  CHR = rep("2", n_snps),
  POS = positions,
  SNP = snp_ids,
  p = pvals
)

gwas_df <- rbind(gwas_df, highlight_snp)
write.table(gwas_df, file = "sim_gwas_chr2.gz", sep = "\t", row.names = FALSE, quote = FALSE)

## highlight SNP(PIP)
highlight_pip <- data.frame(
  Chrom = "2",
  Position = 1250000,
  Name = "rs888888",
  PIP = 0.99
)

# PIP data, finemapping result
pip_values <- round(runif(n_snps, 0.1, 0.2), 4)
gwfm_df <- data.frame(
  Chrom = rep("2", n_snps),
  Position = positions,
  Name = snp_ids,
  PIP = pip_values
)

gwfm_df <- rbind(gwfm_df, highlight_pip)
write.table(gwfm_df, file = "sim_gwfm.snpRes", sep = "\t", row.names = FALSE, quote = FALSE)

# gene list 
gene_starts <- as.integer(seq(1050000, 1450000, length.out = n_genes))
gene_lengths <- sample(30000:80000, n_genes, replace = TRUE)
gene_ends <- gene_starts + gene_lengths

gene_df <- data.frame(
  CHR = rep("2", n_genes),
  GENESTART = gene_starts,
  GENEEND = gene_ends,
  GENE = paste0("GENE", 1:n_genes),
  ORIENTATION = sample(c("+""-"), n_genes, replace = TRUE)
)

write.table(gene_df, file = "sim_glist_chr2.txt", sep = "\t", row.names = FALSE, quote = FALSE)

run script

source("plot_GWFM.r")
PData <- ReadPvalueFromFiles(gwas="sim_gwas_chr2.gz", gwfm="sim_gwfm.snpRes", glist="sim_glist_chr2.txt", windowsize=200000, highlight="rs888888")
pdf('gwfm_plot.pdf',width = 8,height = 8)
MultiPvalueLocusPlot(data=PData)
dev.off()