Figures

some colors
some packages
increase point size in legends
violin plot
Venn plot
Volcano plot
histogram
density plot
barplot
error bar plot
pie plot
scatter plot
line plot
voronoi plot
heatmap
heatmap change margin size and color
QQ plot
QQplot multiple groups
categorical value visualization on 2d
continuous value visualization on 2d
calculate Moran I
string split code
testEnrichment
gprofiler2, GSEA
tryCatch skip errors
ZIP and unzip
read in h5ad file
plot 2d values with clusters

some colors

D3=c("#f4f1de","#e07a5f","#3d405b","#81b29a","#f2cc8f","#a8dadc","#f1faee","#f08080",
"#F94144", "#dda15e", "#F8961E" ,"#bc6c25", "#F9C74F" ,"#90BE6D", "#43AA8B",
 "#4D908E", "#577590", "#277DA1", "#AEC7E8" ,"#FFBB78" ,"#98DF8A", "#FF9896",
"#C5B0D5" ,"#C49C94", "#F7B6D2", "#C7C7C7" ,"#DBDB8D" ,"#9EDAE5", "#393B79",
"#637939", "#8C6D31", "#843C39", "#7B4173" ,"#5254A3" ,"#8CA252", "#BD9E39",
"#AD494A", "#A55194", "#6B6ECF", "#B5CF6B", "#E7BA52" ,"#D6616B", "#CE6DBD",
"#9C9EDE", "#CEDB9C" ,"#E7CB94", "#E7969C", "#DE9ED6" ,"#3182BD", "#E6550D",
"#31A354", "#756BB1" ,"#636363", "#6BAED6" ,"#FD8D3C" ,"#74C476", "#9E9AC8",
"#969696", "#9ECAE1" ,"#FDAE6B", "#A1D99B" ,"#BCBDDC" ,"#BDBDBD", "#C6DBEF",
"#FDD0A2" ,"#C7E9C0" ,"#DADAEB", "#D9D9D9")

some packages

library(ggplot2)
library(ggpubr)


library(reticulate)
use_python("/net/mulan/home/shanglu/anaconda3/envs/SVGPVAE/bin/python", required=TRUE)
sc <- import("scanpy")

suppressMessages(require(SPARK))
library(Seurat)
library(peakRAM)
library(ggplot2)
library(ggpubr)
library(mclust) # ARI
library(aricode)# NMI
library(SpatialPCA)# NMI
library(lisi) # lisi score
library(reticulate) 
library(tidyverse)

increase point size in legends

+ guides(colour = guide_legend(override.aes = list(size=10)))

violin plot

dp <- ggplot(dat, aes(x=num, y=pve)) + 
  geom_violin(trim=FALSE)+
  geom_boxplot(width=0.1, fill="white")+
  labs(title="xxx",x="xxx", y = "xxx")+
  scale_fill_brewer(palette="RdBu") + 
  theme_minimal(base_size = 22)
dp

boxplot

# example 1
p <- ggplot(dat, aes(x = num, y = pve )) +
        geom_boxplot(alpha = 0.8,fill = "cornflowerblue") + 
        scale_fill_manual(values=c("chocolate1","seagreen3"))+
        scale_y_continuous(name = "xxx",
                           #breaks = seq(0, 9, 1),
                           limits=c(0, 1)) +
        scale_x_discrete(name = "xxx") +
        ggtitle("xxx") +
        theme_minimal() +
        theme(plot.title = element_text(size = 18),
              text = element_text(size = 18),
              #axis.title = element_text(face="bold"),
              axis.text.x=element_text(size = 18) ,
              legend.position = "bottom")# +
        #facet_grid(. ~ Method)
p


# example 2

dat = data.frame(Method, ranking, Tissue, Tissue_types)

p10 <- ggplot(dat, aes(x = Tissue_types, y = ranking, fill = Tissue_types)) +
        geom_boxplot(alpha = 0.7) +scale_fill_manual(values=c("skyblue2","seagreen3"))+
        scale_y_continuous(name = "xxx",
                           breaks = seq(0, 40, 5),
                           limits=c(0, 40)) +
        scale_x_discrete(name = " ") +
        ggtitle(paste0(disease_name[i])) +
        theme_bw() +
        theme(plot.title = element_text(size = 20,  face = "bold"),
              text = element_text(size = 20),
              axis.title = element_text(face="bold"),
              axis.text.x=element_text(size = 15) ,
              legend.position = "bottom") +
        facet_grid(. ~ Method)
p10


# example 3

# order_dat$method_ratio = factor(order_datface$method_ratio, levels=rev(taba$method_ratio),order=T)
p2=ggplot(order_dat, aes(x=method_ratio, y=order)) +
  #geom_boxplot(alpha = 0.6,fill = "lightgreen")+
  geom_boxplot(alpha = 0.6,fill = cbp[1:7])+
  #stat_summary(fun=mean, geom="point", shape=20, size=10, color="red", fill="red") +
  # geom_abline(intercept = coefs[1], slope = coefs[2],color="orange",size=2)+
  #geom_point(shape=19, fill="#56B4E9", color="#56B4E9", size=3)+
  #geom_hline(yintercept=median(SpatialPCA_ratio), linetype="dashed", color = "red",size=1)+
  #geom_jitter(position = position_jitter(w = 0.1, h = 0),fill="#D55E00",color="#D55E00", size=1)+
  theme_bw(base_size=20)+
  #ylim(0,1)+
  #theme(axis.text.x = element_text(angle = 60,  hjust=1))+
  labs(title=paste0(""),
       x="", y = "Rank order")+
  coord_flip()
print(p2)

Venn plot

library(Vennerable)

Vcommon <- Venn(list("ePeak Genes"= ePeaks$GENE,"eGenes"= eGenes$Gene ) )
plot(Vcommon,doWeights=TRUE)


make_venn = function(plist , namelist, title){
  library(VennDiagram)
  library(RColorBrewer)
  myCol <- brewer.pal(length(plist), "Pastel2")

  venn.diagram(
        x = plist,
        category.names = namelist,
        filename = paste0("#",title,".png"),
        output=TRUE,
        # Output features
        imagetype="png" ,
        height = 480 , 
        width = 480 , 
        resolution = 300,
        compression = "lzw",
        # Circles
        lwd = 2,
        lty = 'blank',
        fill = myCol,
        # Numbers
        cex = .6,
        fontface = "bold",
        fontfamily = "sans",
        # Set names
        cat.cex = 0.6,
        cat.fontface = "bold",
        cat.default.pos = "outer",
        cat.pos = c(-27, 27, 135),
        cat.dist = c(0.055, 0.055, 0.085),
        cat.fontfamily = "sans",
        rotation = 1)


}

Volcano plot

VolcData <- allChr.est[,c("beta","pvalue","qvalue")]
VolcData$sig <- ifelse(paste(allChr.est$PEAK,allChr.est$SNP) %in% paste(reported_m6AQTL$PEAK,reported_m6AQTL$SNP),"FDR<0.1","Non-sig")

cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

ggplot(VolcData, aes(x = beta, y = -log10(pvalue)) )+ geom_point(size = 0.5, color =  "gray")+geom_point(data = VolcData[VolcData$sig == "FDR<0.1",],aes(x = beta, y = -log10(pvalue)), color = "#0072B2",size = 0.6 )+theme_bw()+theme(axis.title =  element_text(size = 20, face = "bold"),axis.text = element_text(size = 20, face = "bold", color = "black"),  axis.line = element_line(color = "black",size = 0.8), axis.ticks = element_line(color = "black",size = 0.8))+xlab("Effect sizes") + scale_x_continuous(limits = c(-2.5,2.5))

histogram

# example 1

ggplot(df, aes(x=df[,k])) +
  geom_histogram(bins = 150,color="darkblue", fill="lightblue")+
  geom_vline(aes(xintercept=median(df[,k])),linetype="dashed")+ 
  theme_minimal(base_size = 22)+
  geom_density(alpha=0.6)+
   labs(title=paste0(xxx),x=paste0(xxx), y = "xxx")

# example 2

cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

ggplot(data = reported_m6AQTL, aes(x = beta) )+
geom_histogram(col=I("black"),bins = 100)+
theme_bw()+
theme(axis.title =  element_text(size = 20, face = "bold"),
      axis.text = element_text(size = 15, face = "bold", color = "black"),  
      axis.line = element_line(color = "black",size = 0.8), 
      axis.ticks = element_line(color = "black",size = 0.8))+
xlab("Effect sizes") 

density plot

# example 1

  ggplot(eqtl_table_EU2, aes(x = eqtl_table_EU_rs_dist_tss)) + 
  geom_density(aes(fill = eQTL_order), alpha = 0.4) + 
  geom_vline(data = mu, aes(xintercept = grp.median, color = eQTL_order), linetype = "dashed") + 
  labs(title="xxx",x ="xxx") 

# example 2

p1 <-
        ggplot(ct1, aes(x=pos, group=Feature,colour=factor(Feature), weight=3*weight))  +
        ggtitle("Distribution on mRNA") +
        theme(axis.ticks = element_blank(), axis.text.x = element_blank()) +
        xlab("") +
        ylab("Frequency") +
        geom_density(adjust=adjust,aes(fill=factor(Feature)),alpha=0.2,position="fill") +
        annotate("text", x = 0.5, y = -0.2, label = "5'UTR") +
        annotate("text", x = 1.5, y = -0.2, label = "CDS") +
        annotate("text", x = 2.5, y = -0.2, label = "3'UTR") +
        theme_bw() + theme(axis.ticks = element_blank(), axis.text.x = element_blank(),panel.border = element_blank(), panel.grid.major = element_blank(),
                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
        annotate("rect", xmin = 0, xmax = 1, ymin = -0.12, ymax = -0.08, alpha = .99, colour = "black")+
        annotate("rect", xmin = 2, xmax = 3, ymin = -0.12, ymax = -0.08, alpha = .99, colour = "black")+
        annotate("rect", xmin = 1, xmax = 2, ymin = -0.16, ymax = -0.04, alpha = .2, colour = "black")

barplot

# example 1
p1 =ggplot(dat, aes(x=methods, y=value,fill=methods)) +
	  geom_bar(stat="identity", color="black",width=0.8)+
  #scale_fill_brewer(palette="Paired")+
  #scale_fill_brewer(palette="Set3")+
  #scale_fill_distiller(palette = "Oranges")+
  scale_fill_manual(values = D3)+
  labs(title=paste0("Sample ",name[sample]))+
  theme(legend.position="right") +
  theme_classic()+
  	  #geom_abline(intercept = coefs[1], slope = coefs[2],color="orange",size=2)+
	  #geom_point(shape=19, fill="#56B4E9", color="#56B4E9", size=3)+
	  #geom_jitter(position = position_jitter(w = 0.1, h = 0),fill="#D55E00",color="#D55E00", size=1)+
	  #ylim(0,1)+
  #geom_hline(yintercept=median(dat[1,metrics+1]), linetype="dashed", color = "red",size=1)+
  #scale_fill_manual(values = method_color)+
  theme(axis.text.x = element_text(angle = 60,  hjust=1))+
  theme(plot.title = element_text(size = 30),
              text = element_text(size = 30),
              #axis.title = element_text(face="bold"),
              #axis.text.x=element_text(size = 20) ,
              legend.position = "right")+
  facet_wrap(~variable)+
  labs(title=paste0(""),
   x="Methods", y = paste0("ARI"))

# example 2
p=ggplot(data=datt, aes(x=fct_reorder(tolower(term_id),log10p), y=log10p, fill=Source,label = ifelse(significant ==TRUE, "*",""))) +
  geom_bar(stat="identity", position=position_dodge(),color="black",width=0.8)+
  #scale_fill_continuous(low='#F0E442', high='red', guide=guide_colorbar(reverse=TRUE)) + 
  scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07", "#56B4E9", "#009E73","#F0E442", "#0072B2", "#D55E00", "#CC79A7"))+
  labs(title=paste0("Pseudotime related genes GSEA"),x="Biological terms", y = "-log10(p value)")+
  coord_flip()+
  theme_classic()+
  #geom_text(vjust = 0.5) +
  geom_hline(yintercept=-log10(0.05), linetype="dashed", color = "red",size=2)+
  geom_text(vjust = 1, nudge_y = 0.5)+
  #ylim(0,1)+
    theme(plot.title = element_text(size = 30,color="black",face="bold"),
              text = element_text(size = 30,color="black",face="bold"),
              #axis.title = element_text(size = 25,color="black",face="bold"),
              axis.text.x=element_text(size = 30,color="black",face="bold") ,
              legend.position = "right")# +


# example 3
ggplot(data=dat, aes(x=topcount, y=count, fill=method)) +
  geom_bar(stat="identity", position=position_dodge())+
  scale_fill_brewer(palette="Paired")+
  labs(title="xxx",x="xxx", y = "xxx")+
  theme_minimal(base_size = 22)+
  theme(legend.position="bottom") 

error bar plot

# reference: m6A nature genetics paper
torus.enrich <- read.table("~/m6AQTL/m6A_QTL_results/linear_model/annotation/m6AQTL.AnnotationEnrich.est", sep = "\t")[-c(1),]
torus.enrich <- gsub("\\s+", "\t", sapply(torus.enrich,trimws))
torus.enrich <- do.call(rbind.data.frame, strsplit(torus.enrich, split = "\t") )
torus.enrich[,-c(1)] <- apply(torus.enrich[,-c(1)],2, function(x) as.numeric(as.character(x)) )

colnames(torus.enrich) <- c("feature","LogOddRatio","CI05","CI95")
torus.enrich$feature <- factor( c( "5' UTR","3' UTR", "CDS", "Intron", "Intergenic\nrepressive" ))
torus.enrich$feature <- factor( c( "5' UTR","3' UTR", "CDS", "Intron" , "Intergenic\nrepressive"), levels = torus.enrich[order(torus.enrich$LogOddRatio),"feature"] )

ggplot(torus.enrich,aes(x = LogOddRatio/log(2), y = feature ))+geom_point(size = 2)+ xlab("Log2 enrichment")+
  geom_errorbarh(aes(xmin = CI05/log(2), xmax = CI95/log(2), height = 0.3),size = 0.8)+ 
  theme_bw()  + ylab("Features")+
  geom_vline(xintercept = 0,linetype="dotted", colour = "red")+theme(axis.ticks = element_blank(),  
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), axis.text = element_text(face="bold",size = 23, colour = "black"),axis.text.y = element_text(angle = 10),axis.title.x = element_text(face="bold",size = 20) , axis.title.y = element_blank())+coord_cartesian(xlim=c(-9,6))



# Use 80% confidence intervals
p <- ggplot(pi1_phenotypes.df, 
            aes(x=phenotypes, y=pi1, fill=thresh_nlogP)) + 
  geom_bar(position=position_dodge(), stat="identity") +
  geom_errorbar(aes(ymin=ci10, ymax=ci90),
                width=.2,                    # Width of the error bars
                position=position_dodge(.9)) + 
  labs(title = "QTL sharing (ascertained on m6A-QTLs)",
       x = "\nAscertainment cutoff", 
       y = expression(paste("Sharing (", pi["1"], ")")), 
       fill = expression(paste(-log[10], " p-value"))) + 
  coord_cartesian(ylim=c(0,0.8)) +
  scale_fill_manual(values=cbbPalette) +
  theme_minimal() +
  theme(legend.position="bottom")

pie plot

df = data.frame("State" = statenames,"Percentage" = statenum)
library(ggplot2)

pie = ggplot(df, aes(x="", y=Percentage, fill=State)) + geom_bar(stat="identity", width=1)
pie = pie + coord_polar("y", start=0) + geom_text(aes(label = paste0(round(Percentage*100), "%")), position = position_stack(vjust = 0.5))
pie = pie + scale_fill_brewer(palette = "Set3") 
pie = pie + labs(x = NULL, y = NULL, fill = NULL, title = "State")
pie = pie + theme_classic() + theme(axis.line = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(hjust = 0.5, color = "#666666"))
pie
     

scatter plot

# example 1

ggplot(mydata, aes(x=originalranking, y=subs)) +
  geom_point(shape=19, fill="#56B4E9", color="#56B4E9", size=3)+
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE,color="tomato1")+
  theme_bw(base_size=25)+
  labs(title=paste0("xxx"),
       x="xxx", y = "xxx")
dev.off()

# example 2

figure =ggplot(datt, aes(x = x, y = -y, color = region)) +
        geom_point(size=pointsize[sample], alpha = 1) +
        scale_color_viridis(option=coloroption)+
        #ggtitle(paste0("sample ",name[sample],": ",regionID))+
        ggtitle(paste0(regionID))+
        theme_void()+
        theme(plot.title = element_text(size = textsize),
            text = element_text(size = textsize),
            legend.position = "bottom")

# example 3

ggplot(effectSize_melt,aes(x = m6A, y = value))+
geom_point( size = 1)+
ylab("QTL effect sizes")+
xlab("m6A effect size")+
stat_smooth(aes(x = m6A, y = value),method = 'lm')+ 
facet_wrap(facets = "variable", nrow = 2 ) +
  geom_hline(yintercept = 0, lty= 2,colour="#990000")+
  theme_bw() + 
  theme(panel.grid = element_blank(),
        axis.line = element_line(colour = "black",size = I(0.5)),
        axis.title = element_text(family = "arial", face = "bold", size = 15),
        axis.text = element_text(family = "arial", face = "bold", size = 14,colour = "black"), 
        plot.title = element_text(family = "arial", face = "bold", size = 14, hjust = 0.5), 
        legend.position = "none", strip.text = element_text(face = "bold", size = 14), 
        panel.grid.minor = element_blank() )+
  stat_poly_eq(aes(label = paste(..rr.label..)), 
      label.x.npc = 0.33, label.y.npc = 0.9, formula = y~x, parse = TRUE, size = 4.6,coef.digits = 2) +
  xlim(-5,5) + ylim(-5,5) + ## remove outliers, abs(effect size) > 5
  coord_cartesian(xlim = c(-2.2,2.2), ylim = c(-2.2,2.2)) ## zoom in range: c(-2.2,2.2)

line plot

ggplot(topss, aes(x=tis, y=reproducibility)) +
  geom_point(shape=19, fill="hotpink1", color="hotpink1", size=3)+
  geom_line() +
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE,color="tomato1")+
  theme_bw(base_size=25)+
  ylim(0,1)+
  labs(title=paste0("xxx"),
       x="xxx", y = "xxx")
dev.off()


#  y: #eQTLs, x: #PCs
PC = c(0,  5, 10, 15, 20)
nG = PC_eQTL
nPC = length(PC)
pdf("xxx.pdf", 4,3)
plot(0, 0, xlim=c(0, 20), ylim=c(0, 600000), type="n", xlab="Number of Genotype PCs", ylab="Number of eSNPs", main="",cex.lab=1.2)
abline(v=seq(0, 20, 5), lty=2, col="lightgrey")
abline(h=seq(0, 600000, 50000), lty=2, col="lightgrey")
points(PC, nG, type="o", col=COL[2], pch=20, lwd=2)
#legend("bottomright", legend=c("eQTLs vs PCs at 5% FDR in African American"), fill=c(COL[1], "grey"), bg="white", cex=1.2)

voronoi plot

library(ggforce)


dat = data.frame(data_use$location_use, "region"=SpatialPCA_region,"expression"=data_use$SCTscaled[which(rownames(data_use$SCTscaled)=="FETUB"),])

dat_random = dat[sample(1:3813,200,replace=F),]

pdf("test.pdf",width=10,height=10)
color_in = c("#e07a5f","#a8dadc","#F9C74F")
p=ggplot(dat_random, aes(x, y)) +
  geom_voronoi_tile(aes(fill = region,group = -1L), colour = 'white',
                    expand = unit(-.5, 'mm'), radius = unit(2, 'mm'))+
  # geom_voronoi_segment(aes(fill = region,group = -1L)) +
  theme_void() +
   scale_color_manual(values = color_in)+
   theme(plot.title = element_text(size = 20,  face = "bold"),
              text = element_text(size = 20),
              axis.title = element_text(),
              axis.text.x=element_text() ,
              legend.position = "right") +
    guides(colour = guide_legend(override.aes = list(size=20)))
print(p)

lowcolor="#05a8aa"
midcolor="#edede9"
highcolor="#bc412b"
dat_random$expression[which(dat_random$region !="Tumor")]=0
p=ggplot(dat_random, aes(x, y)) +
  geom_voronoi_tile(aes(fill = expression,group = -1L), colour = 'white',
                    expand = unit(-.5, 'mm'), radius = unit(2, 'mm'))+
  geom_voronoi_segment(aes(fill = region,group = -1L)) +
  scale_color_gradient2( low=lowcolor,mid=midcolor, high=highcolor)+
   theme_void() +
   theme(plot.title = element_text(size = 20,  face = "bold"),
              text = element_text(size = 20),
              axis.title = element_text(),
              axis.text.x=element_text() ,
              legend.position = "right") +
    guides(colour = guide_legend(override.aes = list(size=20)))
print(p)
dev.off()

heatmap

mapDrugToColor<-function(annotations){
    colorsVector = ifelse(annotations["category"]=="Others", 
        "blue", ifelse(annotations["category"]=="Brain related", 
        "green", "red"))
    return(colorsVector)
}
   
testHeatmap3<-function(logCPM, annotations) {    
    sampleColors = mapDrugToColor(annotations)
    
    # Assign just column annotations
    heatmap3(logCPM, margins=c(16,16), ColSideColors=sampleColors,scale="none") 
    # Assign column annotations and make a custom legend for them
    heatmap3(logCPM, margins=c(16,16), ColSideColors=sampleColors, scale="none",
        legendfun=function()showLegend(legend=c("Others", 
        "Brain related", "Colon related"), col=c("blue", "green", "red"), cex=1))
    
    # Assign column annotations as a mini-graph instead of colors,
    # and use the built-in labeling for them
    ColSideAnn<-data.frame(Drug=annotations[["category"]])
    heatmap3(logCPM,ColSideAnn=ColSideAnn,
        ColSideFun=function(x)showAnn(x),
        ColSideWidth=0.8)
}

category = c(rep("Others",6),rep("Brain related",3),rep("Others",3),"Colon related", 
"Colon related", rep("Others",16),"Colon related","Others","Colon related",rep("Others",5))

gAnnotationData = data.frame(tissue_name, category)
gLogCpmData = a

pdf("Tissue_GTEx_heatmap_unscaled.pdf",width=7, height=7)
diag(gLogCpmData)=1    
testHeatmap3(gLogCpmData, gAnnotationData)
dev.off()

heatmap change margin size and color

library(BiRewire)
library(corrplot)
library(heatmap3)

#---------------------------------
# Tissues 
#---------------------------------

load("tissue_net.RData")
load("tissue_name.RData")
Tissue_network = tissue_net

# calculate Jaccard Index between each pair of tissues

a = matrix(0,38,38)
for(i in 1:38){
	for(j in (i+1):38){
		a[i,j] = birewire.similarity( Tissue_network[[i]],Tissue_network[[j]])
		a[j,i] = a[i,j]
	}
	print(i)
	print(a[i,])
}
colnames(a) = tissue_name
rownames(a) = tissue_name

# use heatmap3
mapDrugToColor<-function(annotations){
    colorsVector = ifelse(annotations["category"]=="Others", 
        "blue", ifelse(annotations["category"]=="Brain related", 
        "green", "red"))
    return(colorsVector)
}
testHeatmap3<-function(logCPM, annotations) {    
    sampleColors = mapDrugToColor(annotations)
    
    # Assign just column annotations
    heatmap3(logCPM, margins=c(10,10), ColSideColors=sampleColors,scale="none",col = colorRampPalette(c("firebrick", "yellow", "white"))(1024)) 
    # Assign column annotations and make a custom legend for them
    heatmap3(logCPM, margins=c(10,10), ColSideColors=sampleColors, scale="none",col = colorRampPalette(c("firebrick", "yellow", "white"))(1024),
        legendfun=function()showLegend(legend=c("Others", 
        "Brain related", "Colon related"), col=c("blue", "green", "red"), cex=1))
    
    # Assign column annotations as a mini-graph instead of colors,
    # and use the built-in labeling for them
    ColSideAnn<-data.frame(Drug=annotations[["category"]])
    heatmap3(logCPM,ColSideAnn=ColSideAnn,
        ColSideFun=function(x)showAnn(x),
        margins=c(10,10),
        ColSideWidth=0.8)
}
category = c(rep("Others",6),rep("Brain related",3),rep("Others",3),"Colon related", 
"Colon related", rep("Others",16),"Colon related","Others","Colon related",rep("Others",5))
gAnnotationData = data.frame(tissue_name, category)
gLogCpmData = a
pdf("Tissue_GTEx_heatmap_unscaled.pdf",width=8, height=8)
diag(gLogCpmData)=1    
testHeatmap3(gLogCpmData, gAnnotationData)
dev.off()

QQ plot

https://uw-gac.github.io/topmed_workshop_2017/association-tests.html#association-testing-with-aggregate-units

library(ggplot2)
qqPlot <- function(pval) {
    pval <- pval[!is.na(pval)]
    n <- length(pval)
    x <- 1:n
    dat <- data.frame(obs=sort(pval),
                      exp=x/n,
                      upper=qbeta(0.025, x, rev(x)),
                      lower=qbeta(0.975, x, rev(x)))
    
    ggplot(dat, aes(-log10(exp), -log10(obs))) +
        geom_line(aes(-log10(exp), -log10(upper)), color="gray") +
        geom_line(aes(-log10(exp), -log10(lower)), color="gray") +
        geom_point() +
        geom_abline(intercept=0, slope=1, color="red") +
        xlab(expression(paste(-log[10], "(expected P)"))) +
        ylab(expression(paste(-log[10], "(observed P)"))) +
        theme_bw()
}    

qqPlot(assoc$Wald.pval)

QQplot multiple groups

I modified the codes from https://genome.sph.umich.edu/wiki/Code_Sample:_Generating_QQ_Plots_in_R

so that the points won’t be along the y-axis when we observe many small p values

library(lattice)
qqunif.plot<-function(pvalues, 
	should.thin=T, thin.obs.places=2, thin.exp.places=2, 
	xlab=expression(paste("Expected (",-log[10], " p-value)")),
	ylab=expression(paste("Observed (",-log[10], " p-value)")), 
	draw.conf=TRUE, conf.points=1000, conf.col="lightgray", conf.alpha=.05,
	already.transformed=FALSE, pch=20, 
	aspect="iso", 
	prepanel=prepanel.qqunif,
	par.settings=list(superpose.symbol=list(pch=pch)), ...) {
	
	
	#error checking
	if (length(pvalues)==0) stop("pvalue vector is empty, can't draw plot")
	if(!(class(pvalues)=="numeric" || 
		(class(pvalues)=="list" && all(sapply(pvalues, class)=="numeric"))))
		stop("pvalue vector is not numeric, can't draw plot")
	if (any(is.na(unlist(pvalues)))) stop("pvalue vector contains NA values, can't draw plot")
	if (already.transformed==FALSE) {
		if (any(unlist(pvalues)==0)) stop("pvalue vector contains zeros, can't draw plot")
	} else {
		if (any(unlist(pvalues)<0)) stop("-log10 pvalue vector contains negative values, can't draw plot")
	}
	
	
	grp<-NULL
	n<-1
	exp.x<-c()
	if(is.list(pvalues)) {
		nn<-sapply(pvalues, length)
		rs<-cumsum(nn)
		re<-rs-nn+1
		n<-min(nn)
		if (!is.null(names(pvalues))) {
			grp=factor(rep(names(pvalues), nn), levels=names(pvalues))
			names(pvalues)<-NULL
		} else {
			grp=factor(rep(1:length(pvalues), nn))
		}
		pvo<-pvalues
		pvalues<-numeric(sum(nn))
		exp.x<-numeric(sum(nn))
		for(i in 1:length(pvo)) {
			if (!already.transformed) {
				pvalues[rs[i]:re[i]] <- -log10(pvo[[i]])
				exp.x[rs[i]:re[i]] <- -log10((rank(pvo[[i]], ties.method="first")-.5)/nn[i])
			} else {
				pvalues[rs[i]:re[i]] <- pvo[[i]]
				exp.x[rs[i]:re[i]] <- -log10((nn[i]+1-rank(pvo[[i]], ties.method="first")-.5)/(nn[i]+1))
			}
		}
	} else {
		n <- length(pvalues)+1
		if (!already.transformed) {
			exp.x <- -log10((rank(pvalues, ties.method="first")-.5)/n)
			pvalues <- -log10(pvalues)
		} else {
			exp.x <- -log10((n-rank(pvalues, ties.method="first")-.5)/n)
		}
	}


	#this is a helper function to draw the confidence interval
	panel.qqconf<-function(n, conf.points=1000, conf.col="gray", conf.alpha=.05, ...) {
		require(grid)
		conf.points = min(conf.points, n-1);
		mpts<-matrix(nrow=conf.points*2, ncol=2)
        	for(i in seq(from=1, to=conf.points)) {
            		mpts[i,1]<- -log10((i-.5)/n)
            		mpts[i,2]<- -log10(qbeta(1-conf.alpha/2, i, n-i))
            		mpts[conf.points*2+1-i,1]<- -log10((i-.5)/n)
            		mpts[conf.points*2+1-i,2]<- -log10(qbeta(conf.alpha/2, i, n-i))
        	}
        	grid.polygon(x=mpts[,1],y=mpts[,2], gp=gpar(fill=conf.col, lty=0), default.units="native")
    	}

	#reduce number of points to plot
	if (should.thin==T) {
		if (!is.null(grp)) {
			thin <- unique(data.frame(pvalues = round(pvalues, thin.obs.places),
				exp.x = round(exp.x, thin.exp.places),
				grp=grp))
			grp = thin$grp
		} else {
			thin <- unique(data.frame(pvalues = round(pvalues, thin.obs.places),
				exp.x = round(exp.x, thin.exp.places)))
		}
		pvalues <- thin$pvalues
		exp.x <- thin$exp.x
	}
	gc()
	
	prepanel.qqunif= function(x,y,...) {
		A = list()
		A$xlim = range(x)*1.02
		A$xlim[1]=0
		A$ylim = range(y)*1.02
		A$ylim[1]=0
		return(A)
	}

	#draw the plot
	xyplot(pvalues~exp.x, groups=grp, xlab=xlab, ylab=ylab, 
		#aspect=aspect,
		prepanel=prepanel, 
		scales=list(axs="i"), 
		pch=pch,
		panel = function(x, y, ...) {
			if (draw.conf) {
				panel.qqconf(n, conf.points=conf.points, 
					conf.col=conf.col, conf.alpha=conf.alpha)
			};
			panel.xyplot(x,y, ...);
			panel.abline(0,1);
		}, par.settings=par.settings, ...
	)
}

my.pvalue.list<-list("AA"=mydat[mydat$Population=="AA",1], "EA"=mydat[mydat$Population=="EA",1],
"AFA"=mydat[mydat$Population=="AFA",1],"CAU"=mydat[mydat$Population=="CAU",1],
"HIS"=mydat[mydat$Population=="HIS",1])

pdf(paste0("QQ_trait",traits[traitnum],"_test.pdf") ,width = 6, height = 6)
qqunif.plot(my.pvalue.list, auto.key=list(corner=c(.95,.05)))
dev.off()

categorical value visualization on 2d

plot_cluster = function (location, clusterlabel, pointsize = 3, text_size = 15, shape=16,title_in, color_in, legend = "none"){
    cluster = clusterlabel
    loc_x = location[, 1]
    loc_y = location[, 2]
    datt = data.frame(cluster, loc_x, loc_y)
    p = ggplot(datt, aes(x = location[, 1], y = location[, 2], 
        color = cluster)) + geom_point(alpha = 1, size = pointsize,shape=shape) + 
        scale_color_manual(values = color_in) + ggtitle(paste0(title_in)) + 
        theme_void() + theme(plot.title = element_text(size = text_size, 
        face = "bold"), text = element_text(size = text_size), 
        legend.position = legend)
    p
}

continuous value visualization on 2d

library(ggplot2)
# location is n by 2 matrix of location coordinates
# feature is a vector of gene expression
plot_factor_value= function(location, feature, textmethod="Your figure", pointsize = 2, textsize = 15,shape=15) 
{
    location = as.data.frame(location)
        locc1 = location[, 1]
        locc2 = location[, 2]
        datt = data.frame(feature, locc1, locc2)
        p = ggplot(datt, aes(x = locc1, y = locc2, color = feature)) + 
            geom_point(size = pointsize, alpha = 1,shape=shape) + 
            #scale_color_viridis(option="magma")+
			      scale_color_gradient2( low="#05a8aa",mid="#edede9", high="#bc412b")+
            ggtitle(paste0(textmethod)) + 
            theme_void() + 
            theme(plot.title = element_text(size = textsize), 
            text = element_text(size = textsize), legend.position = "right")
  
    return(p)
}


plot_factor_value= function (location, feature, textmethod, pointsize = 2, textsize = 15,shape=15,lowcolor="#05a8aa",midcolor="#edede9",highcolor="#bc412b") 
{
    location = as.data.frame(location)
        locc1 = location[, 1]
        locc2 = location[, 2]
        datt = data.frame(feature, locc1, locc2)
        p = ggplot(datt, aes(x = locc1, y = locc2, color = feature)) + 
            geom_point(size = pointsize, alpha = 1,shape=shape) + 
            #scale_color_viridis(option="magma")+
      scale_color_gradient2( low=lowcolor,mid=midcolor, high=highcolor)+
            ggtitle(paste0(textmethod)) + 
            theme_void() + 
            theme(plot.title = element_text(size = textsize), 
            text = element_text(size = textsize), legend.position = "right")
  
    return(p)
}



plot_factor_value_darkmode =function (location, feature, textmethod, pointsize = 2, textsize = 15,shape=15,lowcolor="#ffffff",midcolor="#118ab2",highcolor="#284b63",legend="right") 
{
    location = as.data.frame(location)
        locc1 = location[, 1]
        locc2 = location[, 2]
        datt = data.frame(feature, locc1, locc2)
        p = ggplot(datt, aes(x = locc1, y = locc2, color = feature)) + 
            geom_point(size = pointsize, alpha = 1,shape=shape) + 
            #scale_color_viridis(option="magma")+
      scale_color_gradient2( low=lowcolor,mid=midcolor, high=highcolor)+
            ggtitle(paste0(textmethod)) + 
            theme_dark() + 
            #theme_void() +
            theme(axis.line=element_blank(),axis.text.x=element_blank(),
            axis.text = element_text(size = textsize),
            plot.title = element_text(size = textsize), 
          axis.text.y=element_blank(),axis.ticks=element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          legend.position=legend,
          #panel.background=element_blank(),
          panel.border=element_blank(),panel.grid.major=element_blank(),
          panel.grid.minor=element_blank(),plot.background=element_blank())

    return(p)
}

calculate Moran I

get_moranI = function(count_in,location){
    # count_in: gene by spot

    count_in = as.matrix(count_in)
    if(length(which(rowSums(count_in)==0))>0){
        count_in = count_in[-which(rowSums(count_in)==0),]
    }
    library(moranfast)
    output <- apply(count_in, 1, function(x) moranfast(x, location[,1], location[,2]))
    out =  sapply(output, '[[', 1)
    return(out)
}   



# spatialpc_moranI = get_moranI(SpatialPCA_result$SpatialPCs[1:5,], SpatialPCA_result$location)
# pc_moranI = get_moranI(PCA_result$PCs[1:5, ], SpatialPCA_result$location)


get_moranI_normalize = function(expr,location){
    # count_in: gene by spot
    
    library(moranfast)
    output <- apply(expr, 1, function(x) moranfast(x, location[,1], location[,2]))
    out =  sapply(output, '[[', 1)
    return(out)
}   

string split code

strsplit_func = function(input){
  strsplit(input, split = "-")[[1]][1]
}


unlist(lapply(gostres$result$term_id,strsplit_func))

testEnrichment

testEnrichment = function(Goi, gsets, background,  mindiffexp=2) {
  scores = do.call(rbind.data.frame, lapply(names(gsets), function(gsetid){
    gset = gsets[[gsetid]]

    # much faster than using table
    tp = length(intersect(Goi, gset))
    fn = length(Goi) - tp
    fp = length(gset) - tp
    tn = length(background) - tp - fp - fn

    contingency_table = matrix(c(tp, fp, fn, tn), 2, 2)

    if (contingency_table[1,1] < mindiffexp){
      return(NULL)
    }

    fisher_result = stats::fisher.test(contingency_table, alternative="two.sided",conf.int = TRUE)
    list(pval=fisher_result$p.value, odds=fisher_result$estimate, odds_conf_low = fisher_result$conf.int[1],odds_conf_high = fisher_result$conf.int[2],found=contingency_table[1,1], gsetid=gsetid)
  }))
  if (nrow(scores) > 0) {
    scores$qval = stats::p.adjust(scores$pval, method="fdr")
  }
  scores
}

gprofiler2, GSEA

library(ActivePathways)

gmt <- read.GMT("/net/mulan/disk2/shanglu/Projects/spatialPCA/manuscript_v5/h.c24557all.v7.4.symbols.filter.gmt")
gmt_filter = gmt[c(1,2)]
count = 0
for(item in 1:length(gmt)){
  itemid = gmt[[item]]$id 
  if(length(grep("CANCER MODULE",itemid))!=0  | length(grep("GOBP",itemid))!=0 | length(grep("GOCC",itemid))!=0 | length(grep("GOMF",itemid))!=0){
    count = count + 1
    print(count)
    gmt_filter[[count]]=gmt[[item]] 
  }
}
write.GMT(gmt_filter, "/net/mulan/disk2/shanglu/Projects/spatialPCA/manuscript_v5/h.c24557all.v7.4.symbols.filter.update.gmt")
upload_GMT_file(gmtfile = "/net/mulan/disk2/shanglu/Projects/spatialPCA/manuscript_v5/h.c24557all.v7.4.symbols.filter.update.gmt")



library(MAST)
library(fdrtool)
library(qvalue)

library(gprofiler2)
library(ggplot2)
library(tidyverse)


h: hallmark gene sets hallmark gene sets as Gene Symbols  h.all.v7.4.symbols.gmt
C1: positional gene sets Gene sets corresponding to each human chromosome and each cytogenetic band. 
c2: curated gene sets KEGG gene sets as Gene Symbols  c2.cp.kegg.v7.4.symbols.gmt
C3: regulatory target gene sets Gene sets representing potential targets of regulation by transcription factors or microRNAs. 
c4: computational gene sets cancer modules as Gene Symbols  c4.cm.v7.4.symbols.gmt
c5: Ontology gene sets all GO gene sets as Gene Symbols  c5.go.v7.4.symbols.gmt
c5: Ontology gene sets Human Phenotype Ontology as Gene Symbols  c5.hpo.v7.4.symbols.gmt
C6: oncogenic signature gene sets Gene sets that represent signatures of cellular pathways which are often dis-regulated in cancer. 
c7: immunologic signature gene sets all immunologic signature gene sets as Gene Symbols c7.all.v7.4.symbols.gmt
C8: cell type signature gene sets Gene sets that contain curated cluster markers for cell types identified in single-cell sequencing studies of human tissue.

# GMT downloaded in : /net/mulan/disk2/shanglu/Projects/datasets/msigdbr


upload_GMT_file(gmtfile = "/net/mulan/disk2/shanglu/Projects/spatialPCA/manuscript_v5/h.c24557all.v7.4.symbols.filter.update.gmt")
# gp__WcnH_hRul_jhQ


bg = rownames(SpatialPCA_result$rawcount)
gostres <- gost(query = unlist(ST_pseudotime_genelist), 
                organism = "gp__WcnH_hRul_jhQ", ordered_query = FALSE, 
                multi_query = FALSE, significant = FALSE, exclude_iea = FALSE, 
                measure_underrepresentation = FALSE, evcodes = FALSE, 
                user_threshold = 0.05, correction_method = "g_SCS", 
                domain_scope = "custom", custom_bg = bg, 
                numeric_ns = "", 
                #sources = c("GO:MF","GO:BP", "GO:CC","KEGG"), 
                as_short_link = FALSE)
gostres$result = gostres$result[order(gostres$result$p_value),]
gostres$result$Source = unlist(lapply(gostres$result$term_id,strsplit_func))
gostres$result$Source[which(gostres$result$Source=="MODULE")]="CANCER MODULE"
datt = gostres$result[1:10,]
datt$log10p = -log10(datt$p_value)




p=ggplot(data=datt, aes(x=fct_reorder(tolower(term_id),log10p), y=log10p, fill=Source,label = ifelse(significant ==TRUE, "*",""))) +
  geom_bar(stat="identity", position=position_dodge(),color="black",width=0.8)+
  #scale_fill_continuous(low='#F0E442', high='red', guide=guide_colorbar(reverse=TRUE)) + 
  scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07", "#56B4E9", "#009E73","#F0E442", "#0072B2", "#D55E00", "#CC79A7"))+
  labs(title=paste0("Pseudotime related genes GSEA"),x="Biological terms", y = "-log10(p value)")+
  coord_flip()+
  theme_classic()+
  #geom_text(vjust = 0.5) +
  geom_hline(yintercept=-log10(0.05), linetype="dashed", color = "red",size=2)+
  geom_text(vjust = 1, nudge_y = 0.5)+
  #ylim(0,1)+
    theme(plot.title = element_text(size = 30,color="black",face="bold"),
              text = element_text(size = 30,color="black",face="bold"),
              #axis.title = element_text(size = 25,color="black",face="bold"),
              axis.text.x=element_text(size = 30,color="black",face="bold") ,
              legend.position = "right")# +


pdf(paste0("ST_GSEA_pseudotime_genes.pdf"),width=40,height=5)
  print(p)
dev.off()

tryCatch skip errors

tryCatch({
    celltype_in = as.matrix(celltype_proportion_in[,num_celltype])
    sparkXw <- SPARK::sparkx(count_in, location_in,X_in=celltype_in,numCores = 5,verbose=FALSE)
    dat = data.frame("gene"=rownames(sparkXw$res_mtest),"combinedPval"=sparkXw$res_mtest$combinedPval,"sparkXw_adj_p"=sparkXw$res_mtest$adjustedPval,"celltype"=colnames(celltype_proportion_in)[num_celltype])
    save(dat, file = paste0("/net/mulan/disk2/shanglu/Projects/explore/celltype_SVG/10X_visium_mouse_kidney/SPARK/SPARKX_result_control_celltype_",num_celltype,"_permu_",permu,".RData"))
    }, error=function(e){cat("num_celltype",num_celltype," :",conditionMessage(e), "\n")})

ZIP and unzip

tar xvzf test.tar.gz
# -x --extract = extract files from an archive
# -v, --verbose = verbosely list files processed
# -z, --gzip = gzipped files eg. for tar.gz packages
# -f, --file ARCHIVE = use archive file or device ARCHIVE

tar -xvf file_name.tar
gzip -d file.gz

Plot figures for network

http://blog.schochastics.net/post/ggraph-tricks-for-common-problems/

read in h5ad file

library(reticulate)
library(Seurat)
use_python("/rsrch5/home/biostatistics/lshang/anaconda3/envs/scETM/bin/python3.7")
sc <- import("scanpy")

visium_data = sc$read_h5ad(paste0(path_data,"/visium_data_upload.h5ad"))
visium_data$X = as.matrix(visium_data$X)
data_use = list()
data_use$meta = visium_data$obs
data_use$location = data_use$meta[,c("array_row","array_col")]
data_use$count = t(visium_data$X)
colnames(data_use$count) = rownames(data_use$meta)
rownames(data_use$count) = as.character(visium_data$var$SYMBOL)

plot 2d values with clusters

plot_factor_value_color_by_cluster <- function(location, feature_fill, feature_color, alpha_in,textmethod, stroke=2,pointsize = 2, textsize = 15, alpha = 0.9, legend_title_fill = "Fill", legend_title_color = "Color") {
    # Ensure location is a data frame
    location <- as.data.frame(location)
    
    # Extract coordinates
    locc1 <- location[, 1]
    locc2 <- location[, 2]
    
    # Combine data
    datt <- data.frame(feature_fill, feature_color, locc1, locc2,alpha_in)
    
    # Create ggplot
    p <- ggplot(datt, aes(x = locc1, y = locc2, fill = feature_fill, color = feature_color, alpha=alpha_in)) +
        geom_point(shape = 21, size = pointsize, alpha = alpha_in,stroke = stroke) +
        scale_fill_viridis() +   # continuous fill color scale
        scale_color_manual(values=D3) + # Discrete border color scale
        ggtitle(textmethod) +
        theme_void() +
        theme(plot.title = element_text(size = textsize),
              text = element_text(size = textsize), 
              legend.position = "right") +
        labs(fill = legend_title_fill, color = legend_title_color)
    
    # Return plot
    return(p)
}


scaler <- function(x, q = 0.95) {x[x > quantile(x, q)] <- max(x); scales::rescale(x)}
pdf(paste0("Lung","_celltype_genes_in_",celltype_focus,"_region_as_env.pdf"),width=6,height=5)
region_labels = data_use$region_label
region_labels[which(region_labels!="Tumor")]="other"
for(genename in names(results[order(results)[1:20]])){
  print(genename)
  #genename = "Foxf1"
  expr = scaler(normalized_counts[match(genename,rownames(normalized_counts)),])
  size = expr*3
  alpha_in = rep(0.9,length(expr))
  alpha_in[which(region_labels!="Tumor")] = 0.3
  p = plot_factor_value_color_by_cluster(data_use$location_use, feature_fill=expr,alpha_in =alpha_in ,feature_color = as.factor(data_use$region_label),textmethod=paste0(genename," expression in ",celltype_focus),  stroke=0.7, pointsize = size, textsize = 20)+ guides(colour = guide_legend(override.aes = list(size=10)))
  print(p)

}

dev.off()

Figures This is The Tagline

Table of Contents