16 Oct 2022
Share
tweet

Table of Contents

some colors

D3=c("#f4f1de","#e07a5f","#3d405b","#81b29a","#f2cc8f","#a8dadc","#f1faee","#f08080",
"#F94144", "#dda15e", "#F8961E" ,"#bc6c25", "#F9C74F" ,"#90BE6D", "#43AA8B",
 "#4D908E", "#577590", "#277DA1", "#AEC7E8" ,"#FFBB78" ,"#98DF8A", "#FF9896",
"#C5B0D5" ,"#C49C94", "#F7B6D2", "#C7C7C7" ,"#DBDB8D" ,"#9EDAE5", "#393B79",
"#637939", "#8C6D31", "#843C39", "#7B4173" ,"#5254A3" ,"#8CA252", "#BD9E39",
"#AD494A", "#A55194", "#6B6ECF", "#B5CF6B", "#E7BA52" ,"#D6616B", "#CE6DBD",
"#9C9EDE", "#CEDB9C" ,"#E7CB94", "#E7969C", "#DE9ED6" ,"#3182BD", "#E6550D",
"#31A354", "#756BB1" ,"#636363", "#6BAED6" ,"#FD8D3C" ,"#74C476", "#9E9AC8",
"#969696", "#9ECAE1" ,"#FDAE6B", "#A1D99B" ,"#BCBDDC" ,"#BDBDBD", "#C6DBEF",
"#FDD0A2" ,"#C7E9C0" ,"#DADAEB", "#D9D9D9")

photo

some packages

library(ggplot2)
library(ggpubr)


library(reticulate)
use_python("/net/mulan/home/shanglu/anaconda3/envs/SVGPVAE/bin/python", required=TRUE)
sc <- import("scanpy")

suppressMessages(require(SPARK))
library(Seurat)
library(peakRAM)
library(ggplot2)
library(ggpubr)
library(mclust) # ARI
library(aricode)# NMI
library(SpatialPCA)# NMI
library(lisi) # lisi score
library(reticulate) 
library(tidyverse)

increase point size in legends

+ guides(colour = guide_legend(override.aes = list(size=10)))

violin plot


dp <- ggplot(dat, aes(x=num, y=pve)) + 
  geom_violin(trim=FALSE)+
  geom_boxplot(width=0.1, fill="white")+
  labs(title="xxx",x="xxx", y = "xxx")+
  scale_fill_brewer(palette="RdBu") + 
  theme_minimal(base_size = 22)
dp

boxplot


# example 1
p <- ggplot(dat, aes(x = num, y = pve )) +
        geom_boxplot(alpha = 0.8,fill = "cornflowerblue") + 
        scale_fill_manual(values=c("chocolate1","seagreen3"))+
        scale_y_continuous(name = "xxx",
                           #breaks = seq(0, 9, 1),
                           limits=c(0, 1)) +
        scale_x_discrete(name = "xxx") +
        ggtitle("xxx") +
        theme_minimal() +
        theme(plot.title = element_text(size = 18),
              text = element_text(size = 18),
              #axis.title = element_text(face="bold"),
              axis.text.x=element_text(size = 18) ,
              legend.position = "bottom")# +
        #facet_grid(. ~ Method)
p


# example 2

dat = data.frame(Method, ranking, Tissue, Tissue_types)

p10 <- ggplot(dat, aes(x = Tissue_types, y = ranking, fill = Tissue_types)) +
        geom_boxplot(alpha = 0.7) +scale_fill_manual(values=c("skyblue2","seagreen3"))+
        scale_y_continuous(name = "xxx",
                           breaks = seq(0, 40, 5),
                           limits=c(0, 40)) +
        scale_x_discrete(name = " ") +
        ggtitle(paste0(disease_name[i])) +
        theme_bw() +
        theme(plot.title = element_text(size = 20,  face = "bold"),
              text = element_text(size = 20),
              axis.title = element_text(face="bold"),
              axis.text.x=element_text(size = 15) ,
              legend.position = "bottom") +
        facet_grid(. ~ Method)
p10


# example 3

# order_dat$method_ratio = factor(order_datface$method_ratio, levels=rev(taba$method_ratio),order=T)
p2=ggplot(order_dat, aes(x=method_ratio, y=order)) +
  #geom_boxplot(alpha = 0.6,fill = "lightgreen")+
  geom_boxplot(alpha = 0.6,fill = cbp[1:7])+
  #stat_summary(fun=mean, geom="point", shape=20, size=10, color="red", fill="red") +
  # geom_abline(intercept = coefs[1], slope = coefs[2],color="orange",size=2)+
  #geom_point(shape=19, fill="#56B4E9", color="#56B4E9", size=3)+
  #geom_hline(yintercept=median(SpatialPCA_ratio), linetype="dashed", color = "red",size=1)+
  #geom_jitter(position = position_jitter(w = 0.1, h = 0),fill="#D55E00",color="#D55E00", size=1)+
  theme_bw(base_size=20)+
  #ylim(0,1)+
  #theme(axis.text.x = element_text(angle = 60,  hjust=1))+
  labs(title=paste0(""),
       x="", y = "Rank order")+
  coord_flip()
print(p2)



Venn plot


library(Vennerable)

Vcommon <- Venn(list("ePeak Genes"= ePeaks$GENE,"eGenes"= eGenes$Gene ) )
plot(Vcommon,doWeights=TRUE)


make_venn = function(plist , namelist, title){
  library(VennDiagram)
  library(RColorBrewer)
  myCol <- brewer.pal(length(plist), "Pastel2")

  venn.diagram(
        x = plist,
        category.names = namelist,
        filename = paste0("#",title,".png"),
        output=TRUE,
        # Output features
        imagetype="png" ,
        height = 480 , 
        width = 480 , 
        resolution = 300,
        compression = "lzw",
        # Circles
        lwd = 2,
        lty = 'blank',
        fill = myCol,
        # Numbers
        cex = .6,
        fontface = "bold",
        fontfamily = "sans",
        # Set names
        cat.cex = 0.6,
        cat.fontface = "bold",
        cat.default.pos = "outer",
        cat.pos = c(-27, 27, 135),
        cat.dist = c(0.055, 0.055, 0.085),
        cat.fontfamily = "sans",
        rotation = 1)


}

Volcano plot

VolcData <- allChr.est[,c("beta","pvalue","qvalue")]
VolcData$sig <- ifelse(paste(allChr.est$PEAK,allChr.est$SNP) %in% paste(reported_m6AQTL$PEAK,reported_m6AQTL$SNP),"FDR<0.1","Non-sig")

cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

ggplot(VolcData, aes(x = beta, y = -log10(pvalue)) )+ geom_point(size = 0.5, color =  "gray")+geom_point(data = VolcData[VolcData$sig == "FDR<0.1",],aes(x = beta, y = -log10(pvalue)), color = "#0072B2",size = 0.6 )+theme_bw()+theme(axis.title =  element_text(size = 20, face = "bold"),axis.text = element_text(size = 20, face = "bold", color = "black"),  axis.line = element_line(color = "black",size = 0.8), axis.ticks = element_line(color = "black",size = 0.8))+xlab("Effect sizes") + scale_x_continuous(limits = c(-2.5,2.5))

histogram


# example 1

ggplot(df, aes(x=df[,k])) +
  geom_histogram(bins = 150,color="darkblue", fill="lightblue")+
  geom_vline(aes(xintercept=median(df[,k])),linetype="dashed")+ 
  theme_minimal(base_size = 22)+
  geom_density(alpha=0.6)+
   labs(title=paste0(xxx),x=paste0(xxx), y = "xxx")

# example 2

cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

ggplot(data = reported_m6AQTL, aes(x = beta) )+
geom_histogram(col=I("black"),bins = 100)+
theme_bw()+
theme(axis.title =  element_text(size = 20, face = "bold"),
      axis.text = element_text(size = 15, face = "bold", color = "black"),  
      axis.line = element_line(color = "black",size = 0.8), 
      axis.ticks = element_line(color = "black",size = 0.8))+
xlab("Effect sizes") 


density plot

# example 1

  ggplot(eqtl_table_EU2, aes(x = eqtl_table_EU_rs_dist_tss)) + 
  geom_density(aes(fill = eQTL_order), alpha = 0.4) + 
  geom_vline(data = mu, aes(xintercept = grp.median, color = eQTL_order), linetype = "dashed") + 
  labs(title="xxx",x ="xxx") 

# example 2

p1 <-
        ggplot(ct1, aes(x=pos, group=Feature,colour=factor(Feature), weight=3*weight))  +
        ggtitle("Distribution on mRNA") +
        theme(axis.ticks = element_blank(), axis.text.x = element_blank()) +
        xlab("") +
        ylab("Frequency") +
        geom_density(adjust=adjust,aes(fill=factor(Feature)),alpha=0.2,position="fill") +
        annotate("text", x = 0.5, y = -0.2, label = "5'UTR") +
        annotate("text", x = 1.5, y = -0.2, label = "CDS") +
        annotate("text", x = 2.5, y = -0.2, label = "3'UTR") +
        theme_bw() + theme(axis.ticks = element_blank(), axis.text.x = element_blank(),panel.border = element_blank(), panel.grid.major = element_blank(),
                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
        annotate("rect", xmin = 0, xmax = 1, ymin = -0.12, ymax = -0.08, alpha = .99, colour = "black")+
        annotate("rect", xmin = 2, xmax = 3, ymin = -0.12, ymax = -0.08, alpha = .99, colour = "black")+
        annotate("rect", xmin = 1, xmax = 2, ymin = -0.16, ymax = -0.04, alpha = .2, colour = "black")

barplot

# example 1
p1 =ggplot(dat, aes(x=methods, y=value,fill=methods)) +
	  geom_bar(stat="identity", color="black",width=0.8)+
  #scale_fill_brewer(palette="Paired")+
  #scale_fill_brewer(palette="Set3")+
  #scale_fill_distiller(palette = "Oranges")+
  scale_fill_manual(values = D3)+
  labs(title=paste0("Sample ",name[sample]))+
  theme(legend.position="right") +
  theme_classic()+
  	  #geom_abline(intercept = coefs[1], slope = coefs[2],color="orange",size=2)+
	  #geom_point(shape=19, fill="#56B4E9", color="#56B4E9", size=3)+
	  #geom_jitter(position = position_jitter(w = 0.1, h = 0),fill="#D55E00",color="#D55E00", size=1)+
	  #ylim(0,1)+
  #geom_hline(yintercept=median(dat[1,metrics+1]), linetype="dashed", color = "red",size=1)+
  #scale_fill_manual(values = method_color)+
  theme(axis.text.x = element_text(angle = 60,  hjust=1))+
  theme(plot.title = element_text(size = 30),
              text = element_text(size = 30),
              #axis.title = element_text(face="bold"),
              #axis.text.x=element_text(size = 20) ,
              legend.position = "right")+
  facet_wrap(~variable)+
  labs(title=paste0(""),
   x="Methods", y = paste0("ARI"))

# example 2
p=ggplot(data=datt, aes(x=fct_reorder(tolower(term_id),log10p), y=log10p, fill=Source,label = ifelse(significant ==TRUE, "*",""))) +
  geom_bar(stat="identity", position=position_dodge(),color="black",width=0.8)+
  #scale_fill_continuous(low='#F0E442', high='red', guide=guide_colorbar(reverse=TRUE)) + 
  scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07", "#56B4E9", "#009E73","#F0E442", "#0072B2", "#D55E00", "#CC79A7"))+
  labs(title=paste0("Pseudotime related genes GSEA"),x="Biological terms", y = "-log10(p value)")+
  coord_flip()+
  theme_classic()+
  #geom_text(vjust = 0.5) +
  geom_hline(yintercept=-log10(0.05), linetype="dashed", color = "red",size=2)+
  geom_text(vjust = 1, nudge_y = 0.5)+
  #ylim(0,1)+
    theme(plot.title = element_text(size = 30,color="black",face="bold"),
              text = element_text(size = 30,color="black",face="bold"),
              #axis.title = element_text(size = 25,color="black",face="bold"),
              axis.text.x=element_text(size = 30,color="black",face="bold") ,
              legend.position = "right")# +


# example 3
ggplot(data=dat, aes(x=topcount, y=count, fill=method)) +
  geom_bar(stat="identity", position=position_dodge())+
  scale_fill_brewer(palette="Paired")+
  labs(title="xxx",x="xxx", y = "xxx")+
  theme_minimal(base_size = 22)+
  theme(legend.position="bottom") 

error bar plot

# reference: m6A nature genetics paper
torus.enrich <- read.table("~/m6AQTL/m6A_QTL_results/linear_model/annotation/m6AQTL.AnnotationEnrich.est", sep = "\t")[-c(1),]
torus.enrich <- gsub("\\s+", "\t", sapply(torus.enrich,trimws))
torus.enrich <- do.call(rbind.data.frame, strsplit(torus.enrich, split = "\t") )
torus.enrich[,-c(1)] <- apply(torus.enrich[,-c(1)],2, function(x) as.numeric(as.character(x)) )

colnames(torus.enrich) <- c("feature","LogOddRatio","CI05","CI95")
torus.enrich$feature <- factor( c( "5' UTR","3' UTR", "CDS", "Intron", "Intergenic\nrepressive" ))
torus.enrich$feature <- factor( c( "5' UTR","3' UTR", "CDS", "Intron" , "Intergenic\nrepressive"), levels = torus.enrich[order(torus.enrich$LogOddRatio),"feature"] )

ggplot(torus.enrich,aes(x = LogOddRatio/log(2), y = feature ))+geom_point(size = 2)+ xlab("Log2 enrichment")+
  geom_errorbarh(aes(xmin = CI05/log(2), xmax = CI95/log(2), height = 0.3),size = 0.8)+ 
  theme_bw()  + ylab("Features")+
  geom_vline(xintercept = 0,linetype="dotted", colour = "red")+theme(axis.ticks = element_blank(),  
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), axis.text = element_text(face="bold",size = 23, colour = "black"),axis.text.y = element_text(angle = 10),axis.title.x = element_text(face="bold",size = 20) , axis.title.y = element_blank())+coord_cartesian(xlim=c(-9,6))



# Use 80% confidence intervals
p <- ggplot(pi1_phenotypes.df, 
            aes(x=phenotypes, y=pi1, fill=thresh_nlogP)) + 
  geom_bar(position=position_dodge(), stat="identity") +
  geom_errorbar(aes(ymin=ci10, ymax=ci90),
                width=.2,                    # Width of the error bars
                position=position_dodge(.9)) + 
  labs(title = "QTL sharing (ascertained on m6A-QTLs)",
       x = "\nAscertainment cutoff", 
       y = expression(paste("Sharing (", pi["1"], ")")), 
       fill = expression(paste(-log[10], " p-value"))) + 
  coord_cartesian(ylim=c(0,0.8)) +
  scale_fill_manual(values=cbbPalette) +
  theme_minimal() +
  theme(legend.position="bottom")


pie plot


df = data.frame("State" = statenames,"Percentage" = statenum)
library(ggplot2)

pie = ggplot(df, aes(x="", y=Percentage, fill=State)) + geom_bar(stat="identity", width=1)
pie = pie + coord_polar("y", start=0) + geom_text(aes(label = paste0(round(Percentage*100), "%")), position = position_stack(vjust = 0.5))
pie = pie + scale_fill_brewer(palette = "Set3") 
pie = pie + labs(x = NULL, y = NULL, fill = NULL, title = "State")
pie = pie + theme_classic() + theme(axis.line = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(hjust = 0.5, color = "#666666"))
pie
     

scatter plot

# example 1

ggplot(mydata, aes(x=originalranking, y=subs)) +
  geom_point(shape=19, fill="#56B4E9", color="#56B4E9", size=3)+
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE,color="tomato1")+
  theme_bw(base_size=25)+
  labs(title=paste0("xxx"),
       x="xxx", y = "xxx")
dev.off()

# example 2

figure =ggplot(datt, aes(x = x, y = -y, color = region)) +
        geom_point(size=pointsize[sample], alpha = 1) +
        scale_color_viridis(option=coloroption)+
        #ggtitle(paste0("sample ",name[sample],": ",regionID))+
        ggtitle(paste0(regionID))+
        theme_void()+
        theme(plot.title = element_text(size = textsize),
            text = element_text(size = textsize),
            legend.position = "bottom")

# example 3

ggplot(effectSize_melt,aes(x = m6A, y = value))+
geom_point( size = 1)+
ylab("QTL effect sizes")+
xlab("m6A effect size")+
stat_smooth(aes(x = m6A, y = value),method = 'lm')+ 
facet_wrap(facets = "variable", nrow = 2 ) +
  geom_hline(yintercept = 0, lty= 2,colour="#990000")+
  theme_bw() + 
  theme(panel.grid = element_blank(),
        axis.line = element_line(colour = "black",size = I(0.5)),
        axis.title = element_text(family = "arial", face = "bold", size = 15),
        axis.text = element_text(family = "arial", face = "bold", size = 14,colour = "black"), 
        plot.title = element_text(family = "arial", face = "bold", size = 14, hjust = 0.5), 
        legend.position = "none", strip.text = element_text(face = "bold", size = 14), 
        panel.grid.minor = element_blank() )+
  stat_poly_eq(aes(label = paste(..rr.label..)), 
      label.x.npc = 0.33, label.y.npc = 0.9, formula = y~x, parse = TRUE, size = 4.6,coef.digits = 2) +
  xlim(-5,5) + ylim(-5,5) + ## remove outliers, abs(effect size) > 5
  coord_cartesian(xlim = c(-2.2,2.2), ylim = c(-2.2,2.2)) ## zoom in range: c(-2.2,2.2)


line plot


ggplot(topss, aes(x=tis, y=reproducibility)) +
  geom_point(shape=19, fill="hotpink1", color="hotpink1", size=3)+
  geom_line() +
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE,color="tomato1")+
  theme_bw(base_size=25)+
  ylim(0,1)+
  labs(title=paste0("xxx"),
       x="xxx", y = "xxx")
dev.off()


#  y: #eQTLs, x: #PCs
PC = c(0,  5, 10, 15, 20)
nG = PC_eQTL
nPC = length(PC)
pdf("xxx.pdf", 4,3)
plot(0, 0, xlim=c(0, 20), ylim=c(0, 600000), type="n", xlab="Number of Genotype PCs", ylab="Number of eSNPs", main="",cex.lab=1.2)
abline(v=seq(0, 20, 5), lty=2, col="lightgrey")
abline(h=seq(0, 600000, 50000), lty=2, col="lightgrey")
points(PC, nG, type="o", col=COL[2], pch=20, lwd=2)
#legend("bottomright", legend=c("eQTLs vs PCs at 5% FDR in African American"), fill=c(COL[1], "grey"), bg="white", cex=1.2)



voronoi plot

library(ggforce)


dat = data.frame(data_use$location_use, "region"=SpatialPCA_region,"expression"=data_use$SCTscaled[which(rownames(data_use$SCTscaled)=="FETUB"),])

dat_random = dat[sample(1:3813,200,replace=F),]

pdf("test.pdf",width=10,height=10)
color_in = c("#e07a5f","#a8dadc","#F9C74F")
p=ggplot(dat_random, aes(x, y)) +
  geom_voronoi_tile(aes(fill = region,group = -1L), colour = 'white',
                    expand = unit(-.5, 'mm'), radius = unit(2, 'mm'))+
  # geom_voronoi_segment(aes(fill = region,group = -1L)) +
  theme_void() +
   scale_color_manual(values = color_in)+
   theme(plot.title = element_text(size = 20,  face = "bold"),
              text = element_text(size = 20),
              axis.title = element_text(),
              axis.text.x=element_text() ,
              legend.position = "right") +
    guides(colour = guide_legend(override.aes = list(size=20)))
print(p)

lowcolor="#05a8aa"
midcolor="#edede9"
highcolor="#bc412b"
dat_random$expression[which(dat_random$region !="Tumor")]=0
p=ggplot(dat_random, aes(x, y)) +
  geom_voronoi_tile(aes(fill = expression,group = -1L), colour = 'white',
                    expand = unit(-.5, 'mm'), radius = unit(2, 'mm'))+
  geom_voronoi_segment(aes(fill = region,group = -1L)) +
  scale_color_gradient2( low=lowcolor,mid=midcolor, high=highcolor)+
   theme_void() +
   theme(plot.title = element_text(size = 20,  face = "bold"),
              text = element_text(size = 20),
              axis.title = element_text(),
              axis.text.x=element_text() ,
              legend.position = "right") +
    guides(colour = guide_legend(override.aes = list(size=20)))
print(p)
dev.off()



heatmap

mapDrugToColor<-function(annotations){
    colorsVector = ifelse(annotations["category"]=="Others", 
        "blue", ifelse(annotations["category"]=="Brain related", 
        "green", "red"))
    return(colorsVector)
}
   
testHeatmap3<-function(logCPM, annotations) {    
    sampleColors = mapDrugToColor(annotations)
    
    # Assign just column annotations
    heatmap3(logCPM, margins=c(16,16), ColSideColors=sampleColors,scale="none") 
    # Assign column annotations and make a custom legend for them
    heatmap3(logCPM, margins=c(16,16), ColSideColors=sampleColors, scale="none",
        legendfun=function()showLegend(legend=c("Others", 
        "Brain related", "Colon related"), col=c("blue", "green", "red"), cex=1))
    
    # Assign column annotations as a mini-graph instead of colors,
    # and use the built-in labeling for them
    ColSideAnn<-data.frame(Drug=annotations[["category"]])
    heatmap3(logCPM,ColSideAnn=ColSideAnn,
        ColSideFun=function(x)showAnn(x),
        ColSideWidth=0.8)
}

category = c(rep("Others",6),rep("Brain related",3),rep("Others",3),"Colon related", 
"Colon related", rep("Others",16),"Colon related","Others","Colon related",rep("Others",5))

gAnnotationData = data.frame(tissue_name, category)
gLogCpmData = a

pdf("Tissue_GTEx_heatmap_unscaled.pdf",width=7, height=7)
diag(gLogCpmData)=1    
testHeatmap3(gLogCpmData, gAnnotationData)
dev.off()

heatmap change margin size and color

library(BiRewire)
library(corrplot)
library(heatmap3)

#---------------------------------
# Tissues 
#---------------------------------

load("tissue_net.RData")
load("tissue_name.RData")
Tissue_network = tissue_net

# calculate Jaccard Index between each pair of tissues

a = matrix(0,38,38)
for(i in 1:38){
	for(j in (i+1):38){
		a[i,j] = birewire.similarity( Tissue_network[[i]],Tissue_network[[j]])
		a[j,i] = a[i,j]
	}
	print(i)
	print(a[i,])
}
colnames(a) = tissue_name
rownames(a) = tissue_name

# use heatmap3
mapDrugToColor<-function(annotations){
    colorsVector = ifelse(annotations["category"]=="Others", 
        "blue", ifelse(annotations["category"]=="Brain related", 
        "green", "red"))
    return(colorsVector)
}
testHeatmap3<-function(logCPM, annotations) {    
    sampleColors = mapDrugToColor(annotations)
    
    # Assign just column annotations
    heatmap3(logCPM, margins=c(10,10), ColSideColors=sampleColors,scale="none",col = colorRampPalette(c("firebrick", "yellow", "white"))(1024)) 
    # Assign column annotations and make a custom legend for them
    heatmap3(logCPM, margins=c(10,10), ColSideColors=sampleColors, scale="none",col = colorRampPalette(c("firebrick", "yellow", "white"))(1024),
        legendfun=function()showLegend(legend=c("Others", 
        "Brain related", "Colon related"), col=c("blue", "green", "red"), cex=1))
    
    # Assign column annotations as a mini-graph instead of colors,
    # and use the built-in labeling for them
    ColSideAnn<-data.frame(Drug=annotations[["category"]])
    heatmap3(logCPM,ColSideAnn=ColSideAnn,
        ColSideFun=function(x)showAnn(x),
        margins=c(10,10),
        ColSideWidth=0.8)
}
category = c(rep("Others",6),rep("Brain related",3),rep("Others",3),"Colon related", 
"Colon related", rep("Others",16),"Colon related","Others","Colon related",rep("Others",5))
gAnnotationData = data.frame(tissue_name, category)
gLogCpmData = a
pdf("Tissue_GTEx_heatmap_unscaled.pdf",width=8, height=8)
diag(gLogCpmData)=1    
testHeatmap3(gLogCpmData, gAnnotationData)
dev.off()


QQ plot

https://uw-gac.github.io/topmed_workshop_2017/association-tests.html#association-testing-with-aggregate-units

library(ggplot2)
qqPlot <- function(pval) {
    pval <- pval[!is.na(pval)]
    n <- length(pval)
    x <- 1:n
    dat <- data.frame(obs=sort(pval),
                      exp=x/n,
                      upper=qbeta(0.025, x, rev(x)),
                      lower=qbeta(0.975, x, rev(x)))
    
    ggplot(dat, aes(-log10(exp), -log10(obs))) +
        geom_line(aes(-log10(exp), -log10(upper)), color="gray") +
        geom_line(aes(-log10(exp), -log10(lower)), color="gray") +
        geom_point() +
        geom_abline(intercept=0, slope=1, color="red") +
        xlab(expression(paste(-log[10], "(expected P)"))) +
        ylab(expression(paste(-log[10], "(observed P)"))) +
        theme_bw()
}    

qqPlot(assoc$Wald.pval)

QQplot multiple groups

I modified the codes from https://genome.sph.umich.edu/wiki/Code_Sample:_Generating_QQ_Plots_in_R

so that the points won’t be along the y-axis when we observe many small p values

library(lattice)
qqunif.plot<-function(pvalues, 
	should.thin=T, thin.obs.places=2, thin.exp.places=2, 
	xlab=expression(paste("Expected (",-log[10], " p-value)")),
	ylab=expression(paste("Observed (",-log[10], " p-value)")), 
	draw.conf=TRUE, conf.points=1000, conf.col="lightgray", conf.alpha=.05,
	already.transformed=FALSE, pch=20, 
	aspect="iso", 
	prepanel=prepanel.qqunif,
	par.settings=list(superpose.symbol=list(pch=pch)), ...) {
	
	
	#error checking
	if (length(pvalues)==0) stop("pvalue vector is empty, can't draw plot")
	if(!(class(pvalues)=="numeric" || 
		(class(pvalues)=="list" && all(sapply(pvalues, class)=="numeric"))))
		stop("pvalue vector is not numeric, can't draw plot")
	if (any(is.na(unlist(pvalues)))) stop("pvalue vector contains NA values, can't draw plot")
	if (already.transformed==FALSE) {
		if (any(unlist(pvalues)==0)) stop("pvalue vector contains zeros, can't draw plot")
	} else {
		if (any(unlist(pvalues)<0)) stop("-log10 pvalue vector contains negative values, can't draw plot")
	}
	
	
	grp<-NULL
	n<-1
	exp.x<-c()
	if(is.list(pvalues)) {
		nn<-sapply(pvalues, length)
		rs<-cumsum(nn)
		re<-rs-nn+1
		n<-min(nn)
		if (!is.null(names(pvalues))) {
			grp=factor(rep(names(pvalues), nn), levels=names(pvalues))
			names(pvalues)<-NULL
		} else {
			grp=factor(rep(1:length(pvalues), nn))
		}
		pvo<-pvalues
		pvalues<-numeric(sum(nn))
		exp.x<-numeric(sum(nn))
		for(i in 1:length(pvo)) {
			if (!already.transformed) {
				pvalues[rs[i]:re[i]] <- -log10(pvo[[i]])
				exp.x[rs[i]:re[i]] <- -log10((rank(pvo[[i]], ties.method="first")-.5)/nn[i])
			} else {
				pvalues[rs[i]:re[i]] <- pvo[[i]]
				exp.x[rs[i]:re[i]] <- -log10((nn[i]+1-rank(pvo[[i]], ties.method="first")-.5)/(nn[i]+1))
			}
		}
	} else {
		n <- length(pvalues)+1
		if (!already.transformed) {
			exp.x <- -log10((rank(pvalues, ties.method="first")-.5)/n)
			pvalues <- -log10(pvalues)
		} else {
			exp.x <- -log10((n-rank(pvalues, ties.method="first")-.5)/n)
		}
	}


	#this is a helper function to draw the confidence interval
	panel.qqconf<-function(n, conf.points=1000, conf.col="gray", conf.alpha=.05, ...) {
		require(grid)
		conf.points = min(conf.points, n-1);
		mpts<-matrix(nrow=conf.points*2, ncol=2)
        	for(i in seq(from=1, to=conf.points)) {
            		mpts[i,1]<- -log10((i-.5)/n)
            		mpts[i,2]<- -log10(qbeta(1-conf.alpha/2, i, n-i))
            		mpts[conf.points*2+1-i,1]<- -log10((i-.5)/n)
            		mpts[conf.points*2+1-i,2]<- -log10(qbeta(conf.alpha/2, i, n-i))
        	}
        	grid.polygon(x=mpts[,1],y=mpts[,2], gp=gpar(fill=conf.col, lty=0), default.units="native")
    	}

	#reduce number of points to plot
	if (should.thin==T) {
		if (!is.null(grp)) {
			thin <- unique(data.frame(pvalues = round(pvalues, thin.obs.places),
				exp.x = round(exp.x, thin.exp.places),
				grp=grp))
			grp = thin$grp
		} else {
			thin <- unique(data.frame(pvalues = round(pvalues, thin.obs.places),
				exp.x = round(exp.x, thin.exp.places)))
		}
		pvalues <- thin$pvalues
		exp.x <- thin$exp.x
	}
	gc()
	
	prepanel.qqunif= function(x,y,...) {
		A = list()
		A$xlim = range(x)*1.02
		A$xlim[1]=0
		A$ylim = range(y)*1.02
		A$ylim[1]=0
		return(A)
	}

	#draw the plot
	xyplot(pvalues~exp.x, groups=grp, xlab=xlab, ylab=ylab, 
		#aspect=aspect,
		prepanel=prepanel, 
		scales=list(axs="i"), 
		pch=pch,
		panel = function(x, y, ...) {
			if (draw.conf) {
				panel.qqconf(n, conf.points=conf.points, 
					conf.col=conf.col, conf.alpha=conf.alpha)
			};
			panel.xyplot(x,y, ...);
			panel.abline(0,1);
		}, par.settings=par.settings, ...
	)
}

my.pvalue.list<-list("AA"=mydat[mydat$Population=="AA",1], "EA"=mydat[mydat$Population=="EA",1],
"AFA"=mydat[mydat$Population=="AFA",1],"CAU"=mydat[mydat$Population=="CAU",1],
"HIS"=mydat[mydat$Population=="HIS",1])

pdf(paste0("QQ_trait",traits[traitnum],"_test.pdf") ,width = 6, height = 6)
qqunif.plot(my.pvalue.list, auto.key=list(corner=c(.95,.05)))
dev.off()




categorical value visualization on 2d

plot_cluster = function (location, clusterlabel, pointsize = 3, text_size = 15, shape=16,title_in, color_in, legend = "none"){
    cluster = clusterlabel
    loc_x = location[, 1]
    loc_y = location[, 2]
    datt = data.frame(cluster, loc_x, loc_y)
    p = ggplot(datt, aes(x = location[, 1], y = location[, 2], 
        color = cluster)) + geom_point(alpha = 1, size = pointsize,shape=shape) + 
        scale_color_manual(values = color_in) + ggtitle(paste0(title_in)) + 
        theme_void() + theme(plot.title = element_text(size = text_size, 
        face = "bold"), text = element_text(size = text_size), 
        legend.position = legend)
    p
}

continuous value visualization on 2d

library(ggplot2)
# location is n by 2 matrix of location coordinates
# feature is a vector of gene expression
plot_factor_value= function(location, feature, textmethod="Your figure", pointsize = 2, textsize = 15,shape=15) 
{
    location = as.data.frame(location)
        locc1 = location[, 1]
        locc2 = location[, 2]
        datt = data.frame(feature, locc1, locc2)
        p = ggplot(datt, aes(x = locc1, y = locc2, color = feature)) + 
            geom_point(size = pointsize, alpha = 1,shape=shape) + 
            #scale_color_viridis(option="magma")+
			      scale_color_gradient2( low="#05a8aa",mid="#edede9", high="#bc412b")+
            ggtitle(paste0(textmethod)) + 
            theme_void() + 
            theme(plot.title = element_text(size = textsize), 
            text = element_text(size = textsize), legend.position = "right")
  
    return(p)
}


plot_factor_value= function (location, feature, textmethod, pointsize = 2, textsize = 15,shape=15,lowcolor="#05a8aa",midcolor="#edede9",highcolor="#bc412b") 
{
    location = as.data.frame(location)
        locc1 = location[, 1]
        locc2 = location[, 2]
        datt = data.frame(feature, locc1, locc2)
        p = ggplot(datt, aes(x = locc1, y = locc2, color = feature)) + 
            geom_point(size = pointsize, alpha = 1,shape=shape) + 
            #scale_color_viridis(option="magma")+
      scale_color_gradient2( low=lowcolor,mid=midcolor, high=highcolor)+
            ggtitle(paste0(textmethod)) + 
            theme_void() + 
            theme(plot.title = element_text(size = textsize), 
            text = element_text(size = textsize), legend.position = "right")
  
    return(p)
}



plot_factor_value_darkmode =function (location, feature, textmethod, pointsize = 2, textsize = 15,shape=15,lowcolor="#ffffff",midcolor="#118ab2",highcolor="#284b63",legend="right") 
{
    location = as.data.frame(location)
        locc1 = location[, 1]
        locc2 = location[, 2]
        datt = data.frame(feature, locc1, locc2)
        p = ggplot(datt, aes(x = locc1, y = locc2, color = feature)) + 
            geom_point(size = pointsize, alpha = 1,shape=shape) + 
            #scale_color_viridis(option="magma")+
      scale_color_gradient2( low=lowcolor,mid=midcolor, high=highcolor)+
            ggtitle(paste0(textmethod)) + 
            theme_dark() + 
            #theme_void() +
            theme(axis.line=element_blank(),axis.text.x=element_blank(),
            axis.text = element_text(size = textsize),
            plot.title = element_text(size = textsize), 
          axis.text.y=element_blank(),axis.ticks=element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          legend.position=legend,
          #panel.background=element_blank(),
          panel.border=element_blank(),panel.grid.major=element_blank(),
          panel.grid.minor=element_blank(),plot.background=element_blank())

    return(p)
}



calculate Moran I

get_moranI = function(count_in,location){
    # count_in: gene by spot

    count_in = as.matrix(count_in)
    if(length(which(rowSums(count_in)==0))>0){
        count_in = count_in[-which(rowSums(count_in)==0),]
    }
    library(moranfast)
    output <- apply(count_in, 1, function(x) moranfast(x, location[,1], location[,2]))
    out =  sapply(output, '[[', 1)
    return(out)
}   



# spatialpc_moranI = get_moranI(SpatialPCA_result$SpatialPCs[1:5,], SpatialPCA_result$location)
# pc_moranI = get_moranI(PCA_result$PCs[1:5, ], SpatialPCA_result$location)


get_moranI_normalize = function(expr,location){
    # count_in: gene by spot
    
    library(moranfast)
    output <- apply(expr, 1, function(x) moranfast(x, location[,1], location[,2]))
    out =  sapply(output, '[[', 1)
    return(out)
}   

string split code

strsplit_func = function(input){
  strsplit(input, split = "-")[[1]][1]
}


unlist(lapply(gostres$result$term_id,strsplit_func))

testEnrichment

testEnrichment = function(Goi, gsets, background,  mindiffexp=2) {
  scores = do.call(rbind.data.frame, lapply(names(gsets), function(gsetid){
    gset = gsets[[gsetid]]

    # much faster than using table
    tp = length(intersect(Goi, gset))
    fn = length(Goi) - tp
    fp = length(gset) - tp
    tn = length(background) - tp - fp - fn

    contingency_table = matrix(c(tp, fp, fn, tn), 2, 2)

    if (contingency_table[1,1] < mindiffexp){
      return(NULL)
    }

    fisher_result = stats::fisher.test(contingency_table, alternative="two.sided",conf.int = TRUE)
    list(pval=fisher_result$p.value, odds=fisher_result$estimate, odds_conf_low = fisher_result$conf.int[1],odds_conf_high = fisher_result$conf.int[2],found=contingency_table[1,1], gsetid=gsetid)
  }))
  if (nrow(scores) > 0) {
    scores$qval = stats::p.adjust(scores$pval, method="fdr")
  }
  scores
}

gprofiler2, GSEA


library(ActivePathways)

gmt <- read.GMT("/net/mulan/disk2/shanglu/Projects/spatialPCA/manuscript_v5/h.c24557all.v7.4.symbols.filter.gmt")
gmt_filter = gmt[c(1,2)]
count = 0
for(item in 1:length(gmt)){
  itemid = gmt[[item]]$id 
  if(length(grep("CANCER MODULE",itemid))!=0  | length(grep("GOBP",itemid))!=0 | length(grep("GOCC",itemid))!=0 | length(grep("GOMF",itemid))!=0){
    count = count + 1
    print(count)
    gmt_filter[[count]]=gmt[[item]] 
  }
}
write.GMT(gmt_filter, "/net/mulan/disk2/shanglu/Projects/spatialPCA/manuscript_v5/h.c24557all.v7.4.symbols.filter.update.gmt")
upload_GMT_file(gmtfile = "/net/mulan/disk2/shanglu/Projects/spatialPCA/manuscript_v5/h.c24557all.v7.4.symbols.filter.update.gmt")



library(MAST)
library(fdrtool)
library(qvalue)

library(gprofiler2)
library(ggplot2)
library(tidyverse)


h: hallmark gene sets hallmark gene sets as Gene Symbols  h.all.v7.4.symbols.gmt
C1: positional gene sets Gene sets corresponding to each human chromosome and each cytogenetic band. 
c2: curated gene sets KEGG gene sets as Gene Symbols  c2.cp.kegg.v7.4.symbols.gmt
C3: regulatory target gene sets Gene sets representing potential targets of regulation by transcription factors or microRNAs. 
c4: computational gene sets cancer modules as Gene Symbols  c4.cm.v7.4.symbols.gmt
c5: Ontology gene sets all GO gene sets as Gene Symbols  c5.go.v7.4.symbols.gmt
c5: Ontology gene sets Human Phenotype Ontology as Gene Symbols  c5.hpo.v7.4.symbols.gmt
C6: oncogenic signature gene sets Gene sets that represent signatures of cellular pathways which are often dis-regulated in cancer. 
c7: immunologic signature gene sets all immunologic signature gene sets as Gene Symbols c7.all.v7.4.symbols.gmt
C8: cell type signature gene sets Gene sets that contain curated cluster markers for cell types identified in single-cell sequencing studies of human tissue.

# GMT downloaded in : /net/mulan/disk2/shanglu/Projects/datasets/msigdbr


upload_GMT_file(gmtfile = "/net/mulan/disk2/shanglu/Projects/spatialPCA/manuscript_v5/h.c24557all.v7.4.symbols.filter.update.gmt")
# gp__WcnH_hRul_jhQ


bg = rownames(SpatialPCA_result$rawcount)
gostres <- gost(query = unlist(ST_pseudotime_genelist), 
                organism = "gp__WcnH_hRul_jhQ", ordered_query = FALSE, 
                multi_query = FALSE, significant = FALSE, exclude_iea = FALSE, 
                measure_underrepresentation = FALSE, evcodes = FALSE, 
                user_threshold = 0.05, correction_method = "g_SCS", 
                domain_scope = "custom", custom_bg = bg, 
                numeric_ns = "", 
                #sources = c("GO:MF","GO:BP", "GO:CC","KEGG"), 
                as_short_link = FALSE)
gostres$result = gostres$result[order(gostres$result$p_value),]
gostres$result$Source = unlist(lapply(gostres$result$term_id,strsplit_func))
gostres$result$Source[which(gostres$result$Source=="MODULE")]="CANCER MODULE"
datt = gostres$result[1:10,]
datt$log10p = -log10(datt$p_value)




p=ggplot(data=datt, aes(x=fct_reorder(tolower(term_id),log10p), y=log10p, fill=Source,label = ifelse(significant ==TRUE, "*",""))) +
  geom_bar(stat="identity", position=position_dodge(),color="black",width=0.8)+
  #scale_fill_continuous(low='#F0E442', high='red', guide=guide_colorbar(reverse=TRUE)) + 
  scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07", "#56B4E9", "#009E73","#F0E442", "#0072B2", "#D55E00", "#CC79A7"))+
  labs(title=paste0("Pseudotime related genes GSEA"),x="Biological terms", y = "-log10(p value)")+
  coord_flip()+
  theme_classic()+
  #geom_text(vjust = 0.5) +
  geom_hline(yintercept=-log10(0.05), linetype="dashed", color = "red",size=2)+
  geom_text(vjust = 1, nudge_y = 0.5)+
  #ylim(0,1)+
    theme(plot.title = element_text(size = 30,color="black",face="bold"),
              text = element_text(size = 30,color="black",face="bold"),
              #axis.title = element_text(size = 25,color="black",face="bold"),
              axis.text.x=element_text(size = 30,color="black",face="bold") ,
              legend.position = "right")# +


pdf(paste0("ST_GSEA_pseudotime_genes.pdf"),width=40,height=5)
  print(p)
dev.off()


tryCatch skip errors

tryCatch({
    celltype_in = as.matrix(celltype_proportion_in[,num_celltype])
    sparkXw <- SPARK::sparkx(count_in, location_in,X_in=celltype_in,numCores = 5,verbose=FALSE)
    dat = data.frame("gene"=rownames(sparkXw$res_mtest),"combinedPval"=sparkXw$res_mtest$combinedPval,"sparkXw_adj_p"=sparkXw$res_mtest$adjustedPval,"celltype"=colnames(celltype_proportion_in)[num_celltype])
    save(dat, file = paste0("/net/mulan/disk2/shanglu/Projects/explore/celltype_SVG/10X_visium_mouse_kidney/SPARK/SPARKX_result_control_celltype_",num_celltype,"_permu_",permu,".RData"))
    }, error=function(e){cat("num_celltype",num_celltype," :",conditionMessage(e), "\n")})


ZIP and unzip

tar xvzf test.tar.gz
# -x --extract = extract files from an archive
# -v, --verbose = verbosely list files processed
# -z, --gzip = gzipped files eg. for tar.gz packages
# -f, --file ARCHIVE = use archive file or device ARCHIVE

tar -xvf file_name.tar
gzip -d file.gz

Plot figures for network

http://blog.schochastics.net/post/ggraph-tricks-for-common-problems/

read in h5ad file

library(reticulate)
library(Seurat)
use_python("/rsrch5/home/biostatistics/lshang/anaconda3/envs/scETM/bin/python3.7")
sc <- import("scanpy")

visium_data = sc$read_h5ad(paste0(path_data,"/visium_data_upload.h5ad"))
visium_data$X = as.matrix(visium_data$X)
data_use = list()
data_use$meta = visium_data$obs
data_use$location = data_use$meta[,c("array_row","array_col")]
data_use$count = t(visium_data$X)
colnames(data_use$count) = rownames(data_use$meta)
rownames(data_use$count) = as.character(visium_data$var$SYMBOL)

plot 2d values with clusters


plot_factor_value_color_by_cluster <- function(location, feature_fill, feature_color, alpha_in,textmethod, stroke=2,pointsize = 2, textsize = 15, alpha = 0.9, legend_title_fill = "Fill", legend_title_color = "Color") {
    # Ensure location is a data frame
    location <- as.data.frame(location)
    
    # Extract coordinates
    locc1 <- location[, 1]
    locc2 <- location[, 2]
    
    # Combine data
    datt <- data.frame(feature_fill, feature_color, locc1, locc2,alpha_in)
    
    # Create ggplot
    p <- ggplot(datt, aes(x = locc1, y = locc2, fill = feature_fill, color = feature_color, alpha=alpha_in)) +
        geom_point(shape = 21, size = pointsize, alpha = alpha_in,stroke = stroke) +
        scale_fill_viridis() +   # continuous fill color scale
        scale_color_manual(values=D3) + # Discrete border color scale
        ggtitle(textmethod) +
        theme_void() +
        theme(plot.title = element_text(size = textsize),
              text = element_text(size = textsize), 
              legend.position = "right") +
        labs(fill = legend_title_fill, color = legend_title_color)
    
    # Return plot
    return(p)
}


scaler <- function(x, q = 0.95) {x[x > quantile(x, q)] <- max(x); scales::rescale(x)}
pdf(paste0("Lung","_celltype_genes_in_",celltype_focus,"_region_as_env.pdf"),width=6,height=5)
region_labels = data_use$region_label
region_labels[which(region_labels!="Tumor")]="other"
for(genename in names(results[order(results)[1:20]])){
  print(genename)
  #genename = "Foxf1"
  expr = scaler(normalized_counts[match(genename,rownames(normalized_counts)),])
  size = expr*3
  alpha_in = rep(0.9,length(expr))
  alpha_in[which(region_labels!="Tumor")] = 0.3
  p = plot_factor_value_color_by_cluster(data_use$location_use, feature_fill=expr,alpha_in =alpha_in ,feature_color = as.factor(data_use$region_label),textmethod=paste0(genename," expression in ",celltype_focus),  stroke=0.7, pointsize = size, textsize = 20)+ guides(colour = guide_legend(override.aes = list(size=10)))
  print(p)

}

dev.off()

blog comments powered by Disqus