Three machines

master
comp1
comp2

SSH between machines:

not only authorized_keys need to be consistent, but also known_hosts

Mount hard disk and build NFS system

root@master:~# mount  /dev/mapper/fileserver-xzlab /home

root@master:/# cat /etc/exports
/home xxx.xxx.9.xxx2(rw,sync,no_root_squash,no_subtree_check)
/home xxx.xxx.11.xxx3(rw,sync,no_root_squash,no_subtree_check)

root@master:/# service portmap restart
root@master:/# systemctl enable rpcbind
root@comp1:/# systemctl enable rpcbind
root@comp2:/# systemctl enable rpcbind

root@master:/# /etc/init.d/nfs-kernel-server start
root@comp1:~# /etc/init.d/nfs-kernel-server start
root@comp2:~# /etc/init.d/nfs-kernel-server start

root@master:/# showmount -e
Export list for master:
/home xxx.xxx.11.xxx3,xxx.xxx.9.xxx2

root@comp1:~# mount xxx.xxx.10.xxx1:/home/ /home
root@comp2:~# mount xxx.xxx.10.xxx1:/home/ /home

start munge

/etc/init.d/munge start

test munge

# test
munge -n
# Check if a credential can be locally decoded:
munge -n |unmunge
# Check if a credential can be remotely decoded:
# on slave
munge -n | ssh comp1 unmunge
# on master
munge -n | ssh comp2 unmunge
# Run a quick benchmark:
remunge

when problems show up in munge:

root@comp1:~# munge -n
munge: Error: Failed to access "/var/run/munge/munge.socket.2": No such file or directory
# or
/usr/sbin/munged --foreground
munged: Error: Found pid 25660 bound to socket "/var/run/munge/munge.socket.2"

root@master:~# rm /var/run/munge/munge.socket.2
munged --force --syslog

# “/etc/init.d/munge start“ is very fragile, it can cause many problems.
# after using “munged --force --syslog”, no more problems in “munge -n | ssh comp1 unmunge”.

# only when three nodes are set up, they can recognize  each other

start MYSQL

/etc/init.d/mysql restart

slurmdbd.conf

AuthType=auth/munge
AuthInfo=/var/run/munge/munge.socket.2
DbdHost=localhost
StorageHost=localhost
StorageLoc=slurm_acct_db
StoragePass=password
StorageType=accounting_storage/mysql
StorageUser=slurm
LogFile=/var/log/slurm-llnl/slurmdbd.log
PidFile=/var/run/slurm-llnl/slurmdbd.pid
SlurmUser=slurm

slurm.conf

ControlMachine=master
ControlAddr=xxx.xxx.10.xxx1
#BackupController=
#BackupAddr=
#
AuthType=auth/munge
CacheGroups=0
#CheckpointType=checkpoint/none
CryptoType=crypto/munge
#DisableRootJobs=NO
#EnforcePartLimits=NO
#Epilog=
#EpilogSlurmctld=
#FirstJobId=1
#MaxJobId=999999
#GresTypes=
#GroupUpdateForce=0
#GroupUpdateTime=600
#JobCheckpointDir=/var/slurm/checkpoint
#JobCredentialPrivateKey=
#JobCredentialPublicCertificate=
#JobFileAppend=0
#JobRequeue=1
#JobSubmitPlugins=1
#KillOnBadExit=0
#Licenses=foo*4,bar
#MailProg=/bin/mail
#MaxJobCount=5000
#MaxStepCount=40000
#MaxTasksPerNode=128
MpiDefault=none
#MpiParams=ports=#-#
#PluginDir=
#PlugStackConfig=
#PrivateData=jobs
ProctrackType=proctrack/pgid
#Prolog=
#PrologSlurmctld=
#PropagatePrioProcess=0
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
ReturnToService=1
#SallocDefaultCommand=
SlurmctldPidFile=/var/run/slurm-llnl/slurmctld.pid
SlurmctldPort=6817
SlurmdPidFile=/var/run/slurm-llnl/slurmd.pid
SlurmdPort=6818
SlurmdSpoolDir=/tmp/slurmd
SlurmUser=root
#SlurmdUser=root
#SrunEpilog=
#SrunProlog=
StateSaveLocation=/tmp
SwitchType=switch/none
#TaskEpilog=
TaskPlugin=task/none
#TaskPluginParam=
#TaskProlog=
#TopologyPlugin=topology/tree
#TmpFs=/tmp
#TrackWCKey=no
#TreeWidth=
#UnkillableStepProgram=
#UsePAM=0
#
#
# TIMERS
#BatchStartTimeout=10
#CompleteWait=0
#EpilogMsgTime=2000
#GetEnvTimeout=2
#HealthCheckInterval=0
#HealthCheckProgram=
InactiveLimit=0
KillWait=30
#MessageTimeout=10
#ResvOverRun=0
MinJobAge=300
#OverTimeLimit=0
SlurmctldTimeout=120
SlurmdTimeout=300
#UnkillableStepTimeout=60
#VSizeFactor=0
Waittime=0
#
#
# SCHEDULING
DefMemPerCPU=1000
FastSchedule=1
#MaxMemPerCPU=0
MaxMemPerNode=54000
#SchedulerRootFilter=1
#SchedulerTimeSlice=30
SchedulerType=sched/backfill
SchedulerPort=7321
SelectType=select/cons_res
SelectTypeParameters=CR_CPU
#
#
# JOB PRIORITY
#PriorityType=priority/basic
#PriorityDecayHalfLife=
#PriorityCalcPeriod=
#PriorityFavorSmall=
#PriorityMaxAge=
#PriorityUsageResetPeriod=
#PriorityWeightAge=
#PriorityWeightFairshare=
#PriorityWeightJobSize=
#PriorityWeightPartition=
#PriorityWeightQOS=
#
#
# LOGGING AND ACCOUNTING
#AccountingStorageEnforce=0
#AccountingStorageHost=
#AccountingStorageLoc=
#AccountingStoragePass=
#AccountingStoragePort=
AccountingStorageType=accounting_storage/slurmdbd
#AccountingStorageUser=
AccountingStoreJobComment=YES
ClusterName=cluster
#DebugFlags=
#JobCompHost=
#JobCompLoc=
#JobCompPass=
#JobCompPort=
JobCompType=jobcomp/none
#JobCompUser=
JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/linux
SlurmctldDebug=3
#SlurmctldLogFile=
SlurmdDebug=3
#SlurmdLogFile=
#SlurmSchedLogFile=
#SlurmSchedLogLevel=
#
#
# POWER SAVE SUPPORT FOR IDLE NODES (optional)
#SuspendProgram=
#ResumeProgram=
#SuspendTimeout=
#ResumeTimeout=
#ResumeRate=
#SuspendExcNodes=
#SuspendExcParts=
#SuspendRate=
#SuspendTime=
#
#
# COMPUTE NODES


NodeName=master CPUs=24 RealMemory=64349  State=UNKNOWN
NodeName=comp1 CPUs=48 RealMemory=95367  State=UNKNOWN
NodeName=comp2 CPUs=48 RealMemory=95367 State=UNKNOWN
PartitionName=control Nodes=master Default=NO MaxTime=INFINITE State=UP
PartitionName=compute Nodes=comp1 Default=YES MaxTime=INFINITE State=UP
PartitionName=debug Nodes=comp2 Default=YES MaxTime=INFINITE State=UP

#######

check real memory:

free -m

#######

on all three nodes start slurmd

systemctl start slurmd
systemctl status slurmd
systemctl enable slurmd

root@master:~# systemctl restart slurmctld
root@master:~# systemctl restart slurmd
root@master:~# scp /etc/slurm-llnl/slurm.conf xzlab@comp1:/etc/slurm-llnl/slurm.conf

When nodes are drained:

Example: when comp1 is drained, use root, copy the following lines on master machine

scontrol update NodeName=comp1 State=DOWN Reason="undraining"
scontrol update NodeName=comp1 State=RESUME
scontrol show node comp1
sinfo

scontrol update NodeName=comp2 State=DOWN Reason="undraining"
scontrol update NodeName=comp2 State=RESUME
scontrol show node comp2
sinfo

After restarting, if NFS doesn’t work well,

or clients show “mount.nfs: Stale file handle” error:

root@comp1:~# mount xxx.xxx.10.xxx1:/home/ /home
root@comp2:~# mount xxx.xxx.10.xxx1:/home/ /home

Or edit /etc/fstab by adding one line:

xxx.xxx.10.xxx1:/home       /home   nfs4    _netdev,defaults,nosuid,proto=tcp,auto  0       0

Then restart NFS

/etc/init.d/nfs-kernel-server restart
(first on master then on two clients)

After restarting the machine:

First restart munge on master, comp1, and comp2 respectively:

/etc/init.d/munge start

Then restart mysql on each of the three machines:

/etc/init.d/mysql restart

Next restart slurmctld on master:

systemctl restart slurmctld

Finally restart slurmd on each of the three machines:

systemctl restart slurmd

when the firewall ufw is blocking traffic from the compute node and slurmctld/slurmd are not active

on master:

systemctl start slurmctld
sudo ufw allow from xxx.xxx.9.xx
sudo ufw allow from xxx.xxx.11.xxx

Just figured out how to send message across the server to another user

lulushang@master:~# who
lulushang pts/4        2019-06-06 15:15 (xx.x.xxx.xxx)

then you could type like

lulushang@master:~# write lulushang pts/4
write: write: you have write permission turned off.

hello

some colors
some packages
increase point size in legends
violin plot
Venn plot
Volcano plot
histogram
density plot
barplot
error bar plot
pie plot
scatter plot
line plot
voronoi plot
heatmap
heatmap change margin size and color
QQ plot
QQplot multiple groups
categorical value visualization on 2d
continuous value visualization on 2d
calculate Moran I
string split code
testEnrichment
gprofiler2, GSEA
tryCatch skip errors
ZIP and unzip
read in h5ad file
plot 2d values with clusters

some colors

D3=c("#f4f1de","#e07a5f","#3d405b","#81b29a","#f2cc8f","#a8dadc","#f1faee","#f08080",
"#F94144", "#dda15e", "#F8961E" ,"#bc6c25", "#F9C74F" ,"#90BE6D", "#43AA8B",
 "#4D908E", "#577590", "#277DA1", "#AEC7E8" ,"#FFBB78" ,"#98DF8A", "#FF9896",
"#C5B0D5" ,"#C49C94", "#F7B6D2", "#C7C7C7" ,"#DBDB8D" ,"#9EDAE5", "#393B79",
"#637939", "#8C6D31", "#843C39", "#7B4173" ,"#5254A3" ,"#8CA252", "#BD9E39",
"#AD494A", "#A55194", "#6B6ECF", "#B5CF6B", "#E7BA52" ,"#D6616B", "#CE6DBD",
"#9C9EDE", "#CEDB9C" ,"#E7CB94", "#E7969C", "#DE9ED6" ,"#3182BD", "#E6550D",
"#31A354", "#756BB1" ,"#636363", "#6BAED6" ,"#FD8D3C" ,"#74C476", "#9E9AC8",
"#969696", "#9ECAE1" ,"#FDAE6B", "#A1D99B" ,"#BCBDDC" ,"#BDBDBD", "#C6DBEF",
"#FDD0A2" ,"#C7E9C0" ,"#DADAEB", "#D9D9D9")

some packages

library(ggplot2)
library(ggpubr)


library(reticulate)
use_python("/net/mulan/home/shanglu/anaconda3/envs/SVGPVAE/bin/python", required=TRUE)
sc <- import("scanpy")

suppressMessages(require(SPARK))
library(Seurat)
library(peakRAM)
library(ggplot2)
library(ggpubr)
library(mclust) # ARI
library(aricode)# NMI
library(SpatialPCA)# NMI
library(lisi) # lisi score
library(reticulate) 
library(tidyverse)

increase point size in legends

+ guides(colour = guide_legend(override.aes = list(size=10)))

violin plot

dp <- ggplot(dat, aes(x=num, y=pve)) + 
  geom_violin(trim=FALSE)+
  geom_boxplot(width=0.1, fill="white")+
  labs(title="xxx",x="xxx", y = "xxx")+
  scale_fill_brewer(palette="RdBu") + 
  theme_minimal(base_size = 22)
dp

boxplot

# example 1
p <- ggplot(dat, aes(x = num, y = pve )) +
        geom_boxplot(alpha = 0.8,fill = "cornflowerblue") + 
        scale_fill_manual(values=c("chocolate1","seagreen3"))+
        scale_y_continuous(name = "xxx",
                           #breaks = seq(0, 9, 1),
                           limits=c(0, 1)) +
        scale_x_discrete(name = "xxx") +
        ggtitle("xxx") +
        theme_minimal() +
        theme(plot.title = element_text(size = 18),
              text = element_text(size = 18),
              #axis.title = element_text(face="bold"),
              axis.text.x=element_text(size = 18) ,
              legend.position = "bottom")# +
        #facet_grid(. ~ Method)
p


# example 2

dat = data.frame(Method, ranking, Tissue, Tissue_types)

p10 <- ggplot(dat, aes(x = Tissue_types, y = ranking, fill = Tissue_types)) +
        geom_boxplot(alpha = 0.7) +scale_fill_manual(values=c("skyblue2","seagreen3"))+
        scale_y_continuous(name = "xxx",
                           breaks = seq(0, 40, 5),
                           limits=c(0, 40)) +
        scale_x_discrete(name = " ") +
        ggtitle(paste0(disease_name[i])) +
        theme_bw() +
        theme(plot.title = element_text(size = 20,  face = "bold"),
              text = element_text(size = 20),
              axis.title = element_text(face="bold"),
              axis.text.x=element_text(size = 15) ,
              legend.position = "bottom") +
        facet_grid(. ~ Method)
p10


# example 3

# order_dat$method_ratio = factor(order_datface$method_ratio, levels=rev(taba$method_ratio),order=T)
p2=ggplot(order_dat, aes(x=method_ratio, y=order)) +
  #geom_boxplot(alpha = 0.6,fill = "lightgreen")+
  geom_boxplot(alpha = 0.6,fill = cbp[1:7])+
  #stat_summary(fun=mean, geom="point", shape=20, size=10, color="red", fill="red") +
  # geom_abline(intercept = coefs[1], slope = coefs[2],color="orange",size=2)+
  #geom_point(shape=19, fill="#56B4E9", color="#56B4E9", size=3)+
  #geom_hline(yintercept=median(SpatialPCA_ratio), linetype="dashed", color = "red",size=1)+
  #geom_jitter(position = position_jitter(w = 0.1, h = 0),fill="#D55E00",color="#D55E00", size=1)+
  theme_bw(base_size=20)+
  #ylim(0,1)+
  #theme(axis.text.x = element_text(angle = 60,  hjust=1))+
  labs(title=paste0(""),
       x="", y = "Rank order")+
  coord_flip()
print(p2)

Venn plot

library(Vennerable)

Vcommon <- Venn(list("ePeak Genes"= ePeaks$GENE,"eGenes"= eGenes$Gene ) )
plot(Vcommon,doWeights=TRUE)


make_venn = function(plist , namelist, title){
  library(VennDiagram)
  library(RColorBrewer)
  myCol <- brewer.pal(length(plist), "Pastel2")

  venn.diagram(
        x = plist,
        category.names = namelist,
        filename = paste0("#",title,".png"),
        output=TRUE,
        # Output features
        imagetype="png" ,
        height = 480 , 
        width = 480 , 
        resolution = 300,
        compression = "lzw",
        # Circles
        lwd = 2,
        lty = 'blank',
        fill = myCol,
        # Numbers
        cex = .6,
        fontface = "bold",
        fontfamily = "sans",
        # Set names
        cat.cex = 0.6,
        cat.fontface = "bold",
        cat.default.pos = "outer",
        cat.pos = c(-27, 27, 135),
        cat.dist = c(0.055, 0.055, 0.085),
        cat.fontfamily = "sans",
        rotation = 1)


}

Volcano plot

VolcData <- allChr.est[,c("beta","pvalue","qvalue")]
VolcData$sig <- ifelse(paste(allChr.est$PEAK,allChr.est$SNP) %in% paste(reported_m6AQTL$PEAK,reported_m6AQTL$SNP),"FDR<0.1","Non-sig")

cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

ggplot(VolcData, aes(x = beta, y = -log10(pvalue)) )+ geom_point(size = 0.5, color =  "gray")+geom_point(data = VolcData[VolcData$sig == "FDR<0.1",],aes(x = beta, y = -log10(pvalue)), color = "#0072B2",size = 0.6 )+theme_bw()+theme(axis.title =  element_text(size = 20, face = "bold"),axis.text = element_text(size = 20, face = "bold", color = "black"),  axis.line = element_line(color = "black",size = 0.8), axis.ticks = element_line(color = "black",size = 0.8))+xlab("Effect sizes") + scale_x_continuous(limits = c(-2.5,2.5))

histogram

# example 1

ggplot(df, aes(x=df[,k])) +
  geom_histogram(bins = 150,color="darkblue", fill="lightblue")+
  geom_vline(aes(xintercept=median(df[,k])),linetype="dashed")+ 
  theme_minimal(base_size = 22)+
  geom_density(alpha=0.6)+
   labs(title=paste0(xxx),x=paste0(xxx), y = "xxx")

# example 2

cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

ggplot(data = reported_m6AQTL, aes(x = beta) )+
geom_histogram(col=I("black"),bins = 100)+
theme_bw()+
theme(axis.title =  element_text(size = 20, face = "bold"),
      axis.text = element_text(size = 15, face = "bold", color = "black"),  
      axis.line = element_line(color = "black",size = 0.8), 
      axis.ticks = element_line(color = "black",size = 0.8))+
xlab("Effect sizes") 

density plot

# example 1

  ggplot(eqtl_table_EU2, aes(x = eqtl_table_EU_rs_dist_tss)) + 
  geom_density(aes(fill = eQTL_order), alpha = 0.4) + 
  geom_vline(data = mu, aes(xintercept = grp.median, color = eQTL_order), linetype = "dashed") + 
  labs(title="xxx",x ="xxx") 

# example 2

p1 <-
        ggplot(ct1, aes(x=pos, group=Feature,colour=factor(Feature), weight=3*weight))  +
        ggtitle("Distribution on mRNA") +
        theme(axis.ticks = element_blank(), axis.text.x = element_blank()) +
        xlab("") +
        ylab("Frequency") +
        geom_density(adjust=adjust,aes(fill=factor(Feature)),alpha=0.2,position="fill") +
        annotate("text", x = 0.5, y = -0.2, label = "5'UTR") +
        annotate("text", x = 1.5, y = -0.2, label = "CDS") +
        annotate("text", x = 2.5, y = -0.2, label = "3'UTR") +
        theme_bw() + theme(axis.ticks = element_blank(), axis.text.x = element_blank(),panel.border = element_blank(), panel.grid.major = element_blank(),
                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
        annotate("rect", xmin = 0, xmax = 1, ymin = -0.12, ymax = -0.08, alpha = .99, colour = "black")+
        annotate("rect", xmin = 2, xmax = 3, ymin = -0.12, ymax = -0.08, alpha = .99, colour = "black")+
        annotate("rect", xmin = 1, xmax = 2, ymin = -0.16, ymax = -0.04, alpha = .2, colour = "black")

barplot

# example 1
p1 =ggplot(dat, aes(x=methods, y=value,fill=methods)) +
	  geom_bar(stat="identity", color="black",width=0.8)+
  #scale_fill_brewer(palette="Paired")+
  #scale_fill_brewer(palette="Set3")+
  #scale_fill_distiller(palette = "Oranges")+
  scale_fill_manual(values = D3)+
  labs(title=paste0("Sample ",name[sample]))+
  theme(legend.position="right") +
  theme_classic()+
  	  #geom_abline(intercept = coefs[1], slope = coefs[2],color="orange",size=2)+
	  #geom_point(shape=19, fill="#56B4E9", color="#56B4E9", size=3)+
	  #geom_jitter(position = position_jitter(w = 0.1, h = 0),fill="#D55E00",color="#D55E00", size=1)+
	  #ylim(0,1)+
  #geom_hline(yintercept=median(dat[1,metrics+1]), linetype="dashed", color = "red",size=1)+
  #scale_fill_manual(values = method_color)+
  theme(axis.text.x = element_text(angle = 60,  hjust=1))+
  theme(plot.title = element_text(size = 30),
              text = element_text(size = 30),
              #axis.title = element_text(face="bold"),
              #axis.text.x=element_text(size = 20) ,
              legend.position = "right")+
  facet_wrap(~variable)+
  labs(title=paste0(""),
   x="Methods", y = paste0("ARI"))

# example 2
p=ggplot(data=datt, aes(x=fct_reorder(tolower(term_id),log10p), y=log10p, fill=Source,label = ifelse(significant ==TRUE, "*",""))) +
  geom_bar(stat="identity", position=position_dodge(),color="black",width=0.8)+
  #scale_fill_continuous(low='#F0E442', high='red', guide=guide_colorbar(reverse=TRUE)) + 
  scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07", "#56B4E9", "#009E73","#F0E442", "#0072B2", "#D55E00", "#CC79A7"))+
  labs(title=paste0("Pseudotime related genes GSEA"),x="Biological terms", y = "-log10(p value)")+
  coord_flip()+
  theme_classic()+
  #geom_text(vjust = 0.5) +
  geom_hline(yintercept=-log10(0.05), linetype="dashed", color = "red",size=2)+
  geom_text(vjust = 1, nudge_y = 0.5)+
  #ylim(0,1)+
    theme(plot.title = element_text(size = 30,color="black",face="bold"),
              text = element_text(size = 30,color="black",face="bold"),
              #axis.title = element_text(size = 25,color="black",face="bold"),
              axis.text.x=element_text(size = 30,color="black",face="bold") ,
              legend.position = "right")# +


# example 3
ggplot(data=dat, aes(x=topcount, y=count, fill=method)) +
  geom_bar(stat="identity", position=position_dodge())+
  scale_fill_brewer(palette="Paired")+
  labs(title="xxx",x="xxx", y = "xxx")+
  theme_minimal(base_size = 22)+
  theme(legend.position="bottom") 

error bar plot

# reference: m6A nature genetics paper
torus.enrich <- read.table("~/m6AQTL/m6A_QTL_results/linear_model/annotation/m6AQTL.AnnotationEnrich.est", sep = "\t")[-c(1),]
torus.enrich <- gsub("\\s+", "\t", sapply(torus.enrich,trimws))
torus.enrich <- do.call(rbind.data.frame, strsplit(torus.enrich, split = "\t") )
torus.enrich[,-c(1)] <- apply(torus.enrich[,-c(1)],2, function(x) as.numeric(as.character(x)) )

colnames(torus.enrich) <- c("feature","LogOddRatio","CI05","CI95")
torus.enrich$feature <- factor( c( "5' UTR","3' UTR", "CDS", "Intron", "Intergenic\nrepressive" ))
torus.enrich$feature <- factor( c( "5' UTR","3' UTR", "CDS", "Intron" , "Intergenic\nrepressive"), levels = torus.enrich[order(torus.enrich$LogOddRatio),"feature"] )

ggplot(torus.enrich,aes(x = LogOddRatio/log(2), y = feature ))+geom_point(size = 2)+ xlab("Log2 enrichment")+
  geom_errorbarh(aes(xmin = CI05/log(2), xmax = CI95/log(2), height = 0.3),size = 0.8)+ 
  theme_bw()  + ylab("Features")+
  geom_vline(xintercept = 0,linetype="dotted", colour = "red")+theme(axis.ticks = element_blank(),  
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), axis.text = element_text(face="bold",size = 23, colour = "black"),axis.text.y = element_text(angle = 10),axis.title.x = element_text(face="bold",size = 20) , axis.title.y = element_blank())+coord_cartesian(xlim=c(-9,6))



# Use 80% confidence intervals
p <- ggplot(pi1_phenotypes.df, 
            aes(x=phenotypes, y=pi1, fill=thresh_nlogP)) + 
  geom_bar(position=position_dodge(), stat="identity") +
  geom_errorbar(aes(ymin=ci10, ymax=ci90),
                width=.2,                    # Width of the error bars
                position=position_dodge(.9)) + 
  labs(title = "QTL sharing (ascertained on m6A-QTLs)",
       x = "\nAscertainment cutoff", 
       y = expression(paste("Sharing (", pi["1"], ")")), 
       fill = expression(paste(-log[10], " p-value"))) + 
  coord_cartesian(ylim=c(0,0.8)) +
  scale_fill_manual(values=cbbPalette) +
  theme_minimal() +
  theme(legend.position="bottom")

pie plot

df = data.frame("State" = statenames,"Percentage" = statenum)
library(ggplot2)

pie = ggplot(df, aes(x="", y=Percentage, fill=State)) + geom_bar(stat="identity", width=1)
pie = pie + coord_polar("y", start=0) + geom_text(aes(label = paste0(round(Percentage*100), "%")), position = position_stack(vjust = 0.5))
pie = pie + scale_fill_brewer(palette = "Set3") 
pie = pie + labs(x = NULL, y = NULL, fill = NULL, title = "State")
pie = pie + theme_classic() + theme(axis.line = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(hjust = 0.5, color = "#666666"))
pie
     

scatter plot

# example 1

ggplot(mydata, aes(x=originalranking, y=subs)) +
  geom_point(shape=19, fill="#56B4E9", color="#56B4E9", size=3)+
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE,color="tomato1")+
  theme_bw(base_size=25)+
  labs(title=paste0("xxx"),
       x="xxx", y = "xxx")
dev.off()

# example 2

figure =ggplot(datt, aes(x = x, y = -y, color = region)) +
        geom_point(size=pointsize[sample], alpha = 1) +
        scale_color_viridis(option=coloroption)+
        #ggtitle(paste0("sample ",name[sample],": ",regionID))+
        ggtitle(paste0(regionID))+
        theme_void()+
        theme(plot.title = element_text(size = textsize),
            text = element_text(size = textsize),
            legend.position = "bottom")

# example 3

ggplot(effectSize_melt,aes(x = m6A, y = value))+
geom_point( size = 1)+
ylab("QTL effect sizes")+
xlab("m6A effect size")+
stat_smooth(aes(x = m6A, y = value),method = 'lm')+ 
facet_wrap(facets = "variable", nrow = 2 ) +
  geom_hline(yintercept = 0, lty= 2,colour="#990000")+
  theme_bw() + 
  theme(panel.grid = element_blank(),
        axis.line = element_line(colour = "black",size = I(0.5)),
        axis.title = element_text(family = "arial", face = "bold", size = 15),
        axis.text = element_text(family = "arial", face = "bold", size = 14,colour = "black"), 
        plot.title = element_text(family = "arial", face = "bold", size = 14, hjust = 0.5), 
        legend.position = "none", strip.text = element_text(face = "bold", size = 14), 
        panel.grid.minor = element_blank() )+
  stat_poly_eq(aes(label = paste(..rr.label..)), 
      label.x.npc = 0.33, label.y.npc = 0.9, formula = y~x, parse = TRUE, size = 4.6,coef.digits = 2) +
  xlim(-5,5) + ylim(-5,5) + ## remove outliers, abs(effect size) > 5
  coord_cartesian(xlim = c(-2.2,2.2), ylim = c(-2.2,2.2)) ## zoom in range: c(-2.2,2.2)

line plot

ggplot(topss, aes(x=tis, y=reproducibility)) +
  geom_point(shape=19, fill="hotpink1", color="hotpink1", size=3)+
  geom_line() +
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE,color="tomato1")+
  theme_bw(base_size=25)+
  ylim(0,1)+
  labs(title=paste0("xxx"),
       x="xxx", y = "xxx")
dev.off()


#  y: #eQTLs, x: #PCs
PC = c(0,  5, 10, 15, 20)
nG = PC_eQTL
nPC = length(PC)
pdf("xxx.pdf", 4,3)
plot(0, 0, xlim=c(0, 20), ylim=c(0, 600000), type="n", xlab="Number of Genotype PCs", ylab="Number of eSNPs", main="",cex.lab=1.2)
abline(v=seq(0, 20, 5), lty=2, col="lightgrey")
abline(h=seq(0, 600000, 50000), lty=2, col="lightgrey")
points(PC, nG, type="o", col=COL[2], pch=20, lwd=2)
#legend("bottomright", legend=c("eQTLs vs PCs at 5% FDR in African American"), fill=c(COL[1], "grey"), bg="white", cex=1.2)

voronoi plot

library(ggforce)


dat = data.frame(data_use$location_use, "region"=SpatialPCA_region,"expression"=data_use$SCTscaled[which(rownames(data_use$SCTscaled)=="FETUB"),])

dat_random = dat[sample(1:3813,200,replace=F),]

pdf("test.pdf",width=10,height=10)
color_in = c("#e07a5f","#a8dadc","#F9C74F")
p=ggplot(dat_random, aes(x, y)) +
  geom_voronoi_tile(aes(fill = region,group = -1L), colour = 'white',
                    expand = unit(-.5, 'mm'), radius = unit(2, 'mm'))+
  # geom_voronoi_segment(aes(fill = region,group = -1L)) +
  theme_void() +
   scale_color_manual(values = color_in)+
   theme(plot.title = element_text(size = 20,  face = "bold"),
              text = element_text(size = 20),
              axis.title = element_text(),
              axis.text.x=element_text() ,
              legend.position = "right") +
    guides(colour = guide_legend(override.aes = list(size=20)))
print(p)

lowcolor="#05a8aa"
midcolor="#edede9"
highcolor="#bc412b"
dat_random$expression[which(dat_random$region !="Tumor")]=0
p=ggplot(dat_random, aes(x, y)) +
  geom_voronoi_tile(aes(fill = expression,group = -1L), colour = 'white',
                    expand = unit(-.5, 'mm'), radius = unit(2, 'mm'))+
  geom_voronoi_segment(aes(fill = region,group = -1L)) +
  scale_color_gradient2( low=lowcolor,mid=midcolor, high=highcolor)+
   theme_void() +
   theme(plot.title = element_text(size = 20,  face = "bold"),
              text = element_text(size = 20),
              axis.title = element_text(),
              axis.text.x=element_text() ,
              legend.position = "right") +
    guides(colour = guide_legend(override.aes = list(size=20)))
print(p)
dev.off()

heatmap

mapDrugToColor<-function(annotations){
    colorsVector = ifelse(annotations["category"]=="Others", 
        "blue", ifelse(annotations["category"]=="Brain related", 
        "green", "red"))
    return(colorsVector)
}
   
testHeatmap3<-function(logCPM, annotations) {    
    sampleColors = mapDrugToColor(annotations)
    
    # Assign just column annotations
    heatmap3(logCPM, margins=c(16,16), ColSideColors=sampleColors,scale="none") 
    # Assign column annotations and make a custom legend for them
    heatmap3(logCPM, margins=c(16,16), ColSideColors=sampleColors, scale="none",
        legendfun=function()showLegend(legend=c("Others", 
        "Brain related", "Colon related"), col=c("blue", "green", "red"), cex=1))
    
    # Assign column annotations as a mini-graph instead of colors,
    # and use the built-in labeling for them
    ColSideAnn<-data.frame(Drug=annotations[["category"]])
    heatmap3(logCPM,ColSideAnn=ColSideAnn,
        ColSideFun=function(x)showAnn(x),
        ColSideWidth=0.8)
}

category = c(rep("Others",6),rep("Brain related",3),rep("Others",3),"Colon related", 
"Colon related", rep("Others",16),"Colon related","Others","Colon related",rep("Others",5))

gAnnotationData = data.frame(tissue_name, category)
gLogCpmData = a

pdf("Tissue_GTEx_heatmap_unscaled.pdf",width=7, height=7)
diag(gLogCpmData)=1    
testHeatmap3(gLogCpmData, gAnnotationData)
dev.off()

heatmap change margin size and color

library(BiRewire)
library(corrplot)
library(heatmap3)

#---------------------------------
# Tissues 
#---------------------------------

load("tissue_net.RData")
load("tissue_name.RData")
Tissue_network = tissue_net

# calculate Jaccard Index between each pair of tissues

a = matrix(0,38,38)
for(i in 1:38){
	for(j in (i+1):38){
		a[i,j] = birewire.similarity( Tissue_network[[i]],Tissue_network[[j]])
		a[j,i] = a[i,j]
	}
	print(i)
	print(a[i,])
}
colnames(a) = tissue_name
rownames(a) = tissue_name

# use heatmap3
mapDrugToColor<-function(annotations){
    colorsVector = ifelse(annotations["category"]=="Others", 
        "blue", ifelse(annotations["category"]=="Brain related", 
        "green", "red"))
    return(colorsVector)
}
testHeatmap3<-function(logCPM, annotations) {    
    sampleColors = mapDrugToColor(annotations)
    
    # Assign just column annotations
    heatmap3(logCPM, margins=c(10,10), ColSideColors=sampleColors,scale="none",col = colorRampPalette(c("firebrick", "yellow", "white"))(1024)) 
    # Assign column annotations and make a custom legend for them
    heatmap3(logCPM, margins=c(10,10), ColSideColors=sampleColors, scale="none",col = colorRampPalette(c("firebrick", "yellow", "white"))(1024),
        legendfun=function()showLegend(legend=c("Others", 
        "Brain related", "Colon related"), col=c("blue", "green", "red"), cex=1))
    
    # Assign column annotations as a mini-graph instead of colors,
    # and use the built-in labeling for them
    ColSideAnn<-data.frame(Drug=annotations[["category"]])
    heatmap3(logCPM,ColSideAnn=ColSideAnn,
        ColSideFun=function(x)showAnn(x),
        margins=c(10,10),
        ColSideWidth=0.8)
}
category = c(rep("Others",6),rep("Brain related",3),rep("Others",3),"Colon related", 
"Colon related", rep("Others",16),"Colon related","Others","Colon related",rep("Others",5))
gAnnotationData = data.frame(tissue_name, category)
gLogCpmData = a
pdf("Tissue_GTEx_heatmap_unscaled.pdf",width=8, height=8)
diag(gLogCpmData)=1    
testHeatmap3(gLogCpmData, gAnnotationData)
dev.off()

QQ plot

https://uw-gac.github.io/topmed_workshop_2017/association-tests.html#association-testing-with-aggregate-units

library(ggplot2)
qqPlot <- function(pval) {
    pval <- pval[!is.na(pval)]
    n <- length(pval)
    x <- 1:n
    dat <- data.frame(obs=sort(pval),
                      exp=x/n,
                      upper=qbeta(0.025, x, rev(x)),
                      lower=qbeta(0.975, x, rev(x)))
    
    ggplot(dat, aes(-log10(exp), -log10(obs))) +
        geom_line(aes(-log10(exp), -log10(upper)), color="gray") +
        geom_line(aes(-log10(exp), -log10(lower)), color="gray") +
        geom_point() +
        geom_abline(intercept=0, slope=1, color="red") +
        xlab(expression(paste(-log[10], "(expected P)"))) +
        ylab(expression(paste(-log[10], "(observed P)"))) +
        theme_bw()
}    

qqPlot(assoc$Wald.pval)

QQplot multiple groups

I modified the codes from https://genome.sph.umich.edu/wiki/Code_Sample:_Generating_QQ_Plots_in_R

so that the points won’t be along the y-axis when we observe many small p values

library(lattice)
qqunif.plot<-function(pvalues, 
	should.thin=T, thin.obs.places=2, thin.exp.places=2, 
	xlab=expression(paste("Expected (",-log[10], " p-value)")),
	ylab=expression(paste("Observed (",-log[10], " p-value)")), 
	draw.conf=TRUE, conf.points=1000, conf.col="lightgray", conf.alpha=.05,
	already.transformed=FALSE, pch=20, 
	aspect="iso", 
	prepanel=prepanel.qqunif,
	par.settings=list(superpose.symbol=list(pch=pch)), ...) {
	
	
	#error checking
	if (length(pvalues)==0) stop("pvalue vector is empty, can't draw plot")
	if(!(class(pvalues)=="numeric" || 
		(class(pvalues)=="list" && all(sapply(pvalues, class)=="numeric"))))
		stop("pvalue vector is not numeric, can't draw plot")
	if (any(is.na(unlist(pvalues)))) stop("pvalue vector contains NA values, can't draw plot")
	if (already.transformed==FALSE) {
		if (any(unlist(pvalues)==0)) stop("pvalue vector contains zeros, can't draw plot")
	} else {
		if (any(unlist(pvalues)<0)) stop("-log10 pvalue vector contains negative values, can't draw plot")
	}
	
	
	grp<-NULL
	n<-1
	exp.x<-c()
	if(is.list(pvalues)) {
		nn<-sapply(pvalues, length)
		rs<-cumsum(nn)
		re<-rs-nn+1
		n<-min(nn)
		if (!is.null(names(pvalues))) {
			grp=factor(rep(names(pvalues), nn), levels=names(pvalues))
			names(pvalues)<-NULL
		} else {
			grp=factor(rep(1:length(pvalues), nn))
		}
		pvo<-pvalues
		pvalues<-numeric(sum(nn))
		exp.x<-numeric(sum(nn))
		for(i in 1:length(pvo)) {
			if (!already.transformed) {
				pvalues[rs[i]:re[i]] <- -log10(pvo[[i]])
				exp.x[rs[i]:re[i]] <- -log10((rank(pvo[[i]], ties.method="first")-.5)/nn[i])
			} else {
				pvalues[rs[i]:re[i]] <- pvo[[i]]
				exp.x[rs[i]:re[i]] <- -log10((nn[i]+1-rank(pvo[[i]], ties.method="first")-.5)/(nn[i]+1))
			}
		}
	} else {
		n <- length(pvalues)+1
		if (!already.transformed) {
			exp.x <- -log10((rank(pvalues, ties.method="first")-.5)/n)
			pvalues <- -log10(pvalues)
		} else {
			exp.x <- -log10((n-rank(pvalues, ties.method="first")-.5)/n)
		}
	}


	#this is a helper function to draw the confidence interval
	panel.qqconf<-function(n, conf.points=1000, conf.col="gray", conf.alpha=.05, ...) {
		require(grid)
		conf.points = min(conf.points, n-1);
		mpts<-matrix(nrow=conf.points*2, ncol=2)
        	for(i in seq(from=1, to=conf.points)) {
            		mpts[i,1]<- -log10((i-.5)/n)
            		mpts[i,2]<- -log10(qbeta(1-conf.alpha/2, i, n-i))
            		mpts[conf.points*2+1-i,1]<- -log10((i-.5)/n)
            		mpts[conf.points*2+1-i,2]<- -log10(qbeta(conf.alpha/2, i, n-i))
        	}
        	grid.polygon(x=mpts[,1],y=mpts[,2], gp=gpar(fill=conf.col, lty=0), default.units="native")
    	}

	#reduce number of points to plot
	if (should.thin==T) {
		if (!is.null(grp)) {
			thin <- unique(data.frame(pvalues = round(pvalues, thin.obs.places),
				exp.x = round(exp.x, thin.exp.places),
				grp=grp))
			grp = thin$grp
		} else {
			thin <- unique(data.frame(pvalues = round(pvalues, thin.obs.places),
				exp.x = round(exp.x, thin.exp.places)))
		}
		pvalues <- thin$pvalues
		exp.x <- thin$exp.x
	}
	gc()
	
	prepanel.qqunif= function(x,y,...) {
		A = list()
		A$xlim = range(x)*1.02
		A$xlim[1]=0
		A$ylim = range(y)*1.02
		A$ylim[1]=0
		return(A)
	}

	#draw the plot
	xyplot(pvalues~exp.x, groups=grp, xlab=xlab, ylab=ylab, 
		#aspect=aspect,
		prepanel=prepanel, 
		scales=list(axs="i"), 
		pch=pch,
		panel = function(x, y, ...) {
			if (draw.conf) {
				panel.qqconf(n, conf.points=conf.points, 
					conf.col=conf.col, conf.alpha=conf.alpha)
			};
			panel.xyplot(x,y, ...);
			panel.abline(0,1);
		}, par.settings=par.settings, ...
	)
}

my.pvalue.list<-list("AA"=mydat[mydat$Population=="AA",1], "EA"=mydat[mydat$Population=="EA",1],
"AFA"=mydat[mydat$Population=="AFA",1],"CAU"=mydat[mydat$Population=="CAU",1],
"HIS"=mydat[mydat$Population=="HIS",1])

pdf(paste0("QQ_trait",traits[traitnum],"_test.pdf") ,width = 6, height = 6)
qqunif.plot(my.pvalue.list, auto.key=list(corner=c(.95,.05)))
dev.off()

categorical value visualization on 2d

plot_cluster = function (location, clusterlabel, pointsize = 3, text_size = 15, shape=16,title_in, color_in, legend = "none"){
    cluster = clusterlabel
    loc_x = location[, 1]
    loc_y = location[, 2]
    datt = data.frame(cluster, loc_x, loc_y)
    p = ggplot(datt, aes(x = location[, 1], y = location[, 2], 
        color = cluster)) + geom_point(alpha = 1, size = pointsize,shape=shape) + 
        scale_color_manual(values = color_in) + ggtitle(paste0(title_in)) + 
        theme_void() + theme(plot.title = element_text(size = text_size, 
        face = "bold"), text = element_text(size = text_size), 
        legend.position = legend)
    p
}

continuous value visualization on 2d

library(ggplot2)
# location is n by 2 matrix of location coordinates
# feature is a vector of gene expression
plot_factor_value= function(location, feature, textmethod="Your figure", pointsize = 2, textsize = 15,shape=15) 
{
    location = as.data.frame(location)
        locc1 = location[, 1]
        locc2 = location[, 2]
        datt = data.frame(feature, locc1, locc2)
        p = ggplot(datt, aes(x = locc1, y = locc2, color = feature)) + 
            geom_point(size = pointsize, alpha = 1,shape=shape) + 
            #scale_color_viridis(option="magma")+
			      scale_color_gradient2( low="#05a8aa",mid="#edede9", high="#bc412b")+
            ggtitle(paste0(textmethod)) + 
            theme_void() + 
            theme(plot.title = element_text(size = textsize), 
            text = element_text(size = textsize), legend.position = "right")
  
    return(p)
}


plot_factor_value= function (location, feature, textmethod, pointsize = 2, textsize = 15,shape=15,lowcolor="#05a8aa",midcolor="#edede9",highcolor="#bc412b") 
{
    location = as.data.frame(location)
        locc1 = location[, 1]
        locc2 = location[, 2]
        datt = data.frame(feature, locc1, locc2)
        p = ggplot(datt, aes(x = locc1, y = locc2, color = feature)) + 
            geom_point(size = pointsize, alpha = 1,shape=shape) + 
            #scale_color_viridis(option="magma")+
      scale_color_gradient2( low=lowcolor,mid=midcolor, high=highcolor)+
            ggtitle(paste0(textmethod)) + 
            theme_void() + 
            theme(plot.title = element_text(size = textsize), 
            text = element_text(size = textsize), legend.position = "right")
  
    return(p)
}



plot_factor_value_darkmode =function (location, feature, textmethod, pointsize = 2, textsize = 15,shape=15,lowcolor="#ffffff",midcolor="#118ab2",highcolor="#284b63",legend="right") 
{
    location = as.data.frame(location)
        locc1 = location[, 1]
        locc2 = location[, 2]
        datt = data.frame(feature, locc1, locc2)
        p = ggplot(datt, aes(x = locc1, y = locc2, color = feature)) + 
            geom_point(size = pointsize, alpha = 1,shape=shape) + 
            #scale_color_viridis(option="magma")+
      scale_color_gradient2( low=lowcolor,mid=midcolor, high=highcolor)+
            ggtitle(paste0(textmethod)) + 
            theme_dark() + 
            #theme_void() +
            theme(axis.line=element_blank(),axis.text.x=element_blank(),
            axis.text = element_text(size = textsize),
            plot.title = element_text(size = textsize), 
          axis.text.y=element_blank(),axis.ticks=element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          legend.position=legend,
          #panel.background=element_blank(),
          panel.border=element_blank(),panel.grid.major=element_blank(),
          panel.grid.minor=element_blank(),plot.background=element_blank())

    return(p)
}

calculate Moran I

get_moranI = function(count_in,location){
    # count_in: gene by spot

    count_in = as.matrix(count_in)
    if(length(which(rowSums(count_in)==0))>0){
        count_in = count_in[-which(rowSums(count_in)==0),]
    }
    library(moranfast)
    output <- apply(count_in, 1, function(x) moranfast(x, location[,1], location[,2]))
    out =  sapply(output, '[[', 1)
    return(out)
}   



# spatialpc_moranI = get_moranI(SpatialPCA_result$SpatialPCs[1:5,], SpatialPCA_result$location)
# pc_moranI = get_moranI(PCA_result$PCs[1:5, ], SpatialPCA_result$location)


get_moranI_normalize = function(expr,location){
    # count_in: gene by spot
    
    library(moranfast)
    output <- apply(expr, 1, function(x) moranfast(x, location[,1], location[,2]))
    out =  sapply(output, '[[', 1)
    return(out)
}   

string split code

strsplit_func = function(input){
  strsplit(input, split = "-")[[1]][1]
}


unlist(lapply(gostres$result$term_id,strsplit_func))

testEnrichment

testEnrichment = function(Goi, gsets, background,  mindiffexp=2) {
  scores = do.call(rbind.data.frame, lapply(names(gsets), function(gsetid){
    gset = gsets[[gsetid]]

    # much faster than using table
    tp = length(intersect(Goi, gset))
    fn = length(Goi) - tp
    fp = length(gset) - tp
    tn = length(background) - tp - fp - fn

    contingency_table = matrix(c(tp, fp, fn, tn), 2, 2)

    if (contingency_table[1,1] < mindiffexp){
      return(NULL)
    }

    fisher_result = stats::fisher.test(contingency_table, alternative="two.sided",conf.int = TRUE)
    list(pval=fisher_result$p.value, odds=fisher_result$estimate, odds_conf_low = fisher_result$conf.int[1],odds_conf_high = fisher_result$conf.int[2],found=contingency_table[1,1], gsetid=gsetid)
  }))
  if (nrow(scores) > 0) {
    scores$qval = stats::p.adjust(scores$pval, method="fdr")
  }
  scores
}

gprofiler2, GSEA

library(ActivePathways)

gmt <- read.GMT("/net/mulan/disk2/shanglu/Projects/spatialPCA/manuscript_v5/h.c24557all.v7.4.symbols.filter.gmt")
gmt_filter = gmt[c(1,2)]
count = 0
for(item in 1:length(gmt)){
  itemid = gmt[[item]]$id 
  if(length(grep("CANCER MODULE",itemid))!=0  | length(grep("GOBP",itemid))!=0 | length(grep("GOCC",itemid))!=0 | length(grep("GOMF",itemid))!=0){
    count = count + 1
    print(count)
    gmt_filter[[count]]=gmt[[item]] 
  }
}
write.GMT(gmt_filter, "/net/mulan/disk2/shanglu/Projects/spatialPCA/manuscript_v5/h.c24557all.v7.4.symbols.filter.update.gmt")
upload_GMT_file(gmtfile = "/net/mulan/disk2/shanglu/Projects/spatialPCA/manuscript_v5/h.c24557all.v7.4.symbols.filter.update.gmt")



library(MAST)
library(fdrtool)
library(qvalue)

library(gprofiler2)
library(ggplot2)
library(tidyverse)


h: hallmark gene sets hallmark gene sets as Gene Symbols  h.all.v7.4.symbols.gmt
C1: positional gene sets Gene sets corresponding to each human chromosome and each cytogenetic band. 
c2: curated gene sets KEGG gene sets as Gene Symbols  c2.cp.kegg.v7.4.symbols.gmt
C3: regulatory target gene sets Gene sets representing potential targets of regulation by transcription factors or microRNAs. 
c4: computational gene sets cancer modules as Gene Symbols  c4.cm.v7.4.symbols.gmt
c5: Ontology gene sets all GO gene sets as Gene Symbols  c5.go.v7.4.symbols.gmt
c5: Ontology gene sets Human Phenotype Ontology as Gene Symbols  c5.hpo.v7.4.symbols.gmt
C6: oncogenic signature gene sets Gene sets that represent signatures of cellular pathways which are often dis-regulated in cancer. 
c7: immunologic signature gene sets all immunologic signature gene sets as Gene Symbols c7.all.v7.4.symbols.gmt
C8: cell type signature gene sets Gene sets that contain curated cluster markers for cell types identified in single-cell sequencing studies of human tissue.

# GMT downloaded in : /net/mulan/disk2/shanglu/Projects/datasets/msigdbr


upload_GMT_file(gmtfile = "/net/mulan/disk2/shanglu/Projects/spatialPCA/manuscript_v5/h.c24557all.v7.4.symbols.filter.update.gmt")
# gp__WcnH_hRul_jhQ


bg = rownames(SpatialPCA_result$rawcount)
gostres <- gost(query = unlist(ST_pseudotime_genelist), 
                organism = "gp__WcnH_hRul_jhQ", ordered_query = FALSE, 
                multi_query = FALSE, significant = FALSE, exclude_iea = FALSE, 
                measure_underrepresentation = FALSE, evcodes = FALSE, 
                user_threshold = 0.05, correction_method = "g_SCS", 
                domain_scope = "custom", custom_bg = bg, 
                numeric_ns = "", 
                #sources = c("GO:MF","GO:BP", "GO:CC","KEGG"), 
                as_short_link = FALSE)
gostres$result = gostres$result[order(gostres$result$p_value),]
gostres$result$Source = unlist(lapply(gostres$result$term_id,strsplit_func))
gostres$result$Source[which(gostres$result$Source=="MODULE")]="CANCER MODULE"
datt = gostres$result[1:10,]
datt$log10p = -log10(datt$p_value)




p=ggplot(data=datt, aes(x=fct_reorder(tolower(term_id),log10p), y=log10p, fill=Source,label = ifelse(significant ==TRUE, "*",""))) +
  geom_bar(stat="identity", position=position_dodge(),color="black",width=0.8)+
  #scale_fill_continuous(low='#F0E442', high='red', guide=guide_colorbar(reverse=TRUE)) + 
  scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07", "#56B4E9", "#009E73","#F0E442", "#0072B2", "#D55E00", "#CC79A7"))+
  labs(title=paste0("Pseudotime related genes GSEA"),x="Biological terms", y = "-log10(p value)")+
  coord_flip()+
  theme_classic()+
  #geom_text(vjust = 0.5) +
  geom_hline(yintercept=-log10(0.05), linetype="dashed", color = "red",size=2)+
  geom_text(vjust = 1, nudge_y = 0.5)+
  #ylim(0,1)+
    theme(plot.title = element_text(size = 30,color="black",face="bold"),
              text = element_text(size = 30,color="black",face="bold"),
              #axis.title = element_text(size = 25,color="black",face="bold"),
              axis.text.x=element_text(size = 30,color="black",face="bold") ,
              legend.position = "right")# +


pdf(paste0("ST_GSEA_pseudotime_genes.pdf"),width=40,height=5)
  print(p)
dev.off()

tryCatch skip errors

tryCatch({
    celltype_in = as.matrix(celltype_proportion_in[,num_celltype])
    sparkXw <- SPARK::sparkx(count_in, location_in,X_in=celltype_in,numCores = 5,verbose=FALSE)
    dat = data.frame("gene"=rownames(sparkXw$res_mtest),"combinedPval"=sparkXw$res_mtest$combinedPval,"sparkXw_adj_p"=sparkXw$res_mtest$adjustedPval,"celltype"=colnames(celltype_proportion_in)[num_celltype])
    save(dat, file = paste0("/net/mulan/disk2/shanglu/Projects/explore/celltype_SVG/10X_visium_mouse_kidney/SPARK/SPARKX_result_control_celltype_",num_celltype,"_permu_",permu,".RData"))
    }, error=function(e){cat("num_celltype",num_celltype," :",conditionMessage(e), "\n")})

ZIP and unzip

tar xvzf test.tar.gz
# -x --extract = extract files from an archive
# -v, --verbose = verbosely list files processed
# -z, --gzip = gzipped files eg. for tar.gz packages
# -f, --file ARCHIVE = use archive file or device ARCHIVE

tar -xvf file_name.tar
gzip -d file.gz

Plot figures for network

http://blog.schochastics.net/post/ggraph-tricks-for-common-problems/

read in h5ad file

library(reticulate)
library(Seurat)
use_python("/rsrch5/home/biostatistics/lshang/anaconda3/envs/scETM/bin/python3.7")
sc <- import("scanpy")

visium_data = sc$read_h5ad(paste0(path_data,"/visium_data_upload.h5ad"))
visium_data$X = as.matrix(visium_data$X)
data_use = list()
data_use$meta = visium_data$obs
data_use$location = data_use$meta[,c("array_row","array_col")]
data_use$count = t(visium_data$X)
colnames(data_use$count) = rownames(data_use$meta)
rownames(data_use$count) = as.character(visium_data$var$SYMBOL)

plot 2d values with clusters

plot_factor_value_color_by_cluster <- function(location, feature_fill, feature_color, alpha_in,textmethod, stroke=2,pointsize = 2, textsize = 15, alpha = 0.9, legend_title_fill = "Fill", legend_title_color = "Color") {
    # Ensure location is a data frame
    location <- as.data.frame(location)
    
    # Extract coordinates
    locc1 <- location[, 1]
    locc2 <- location[, 2]
    
    # Combine data
    datt <- data.frame(feature_fill, feature_color, locc1, locc2,alpha_in)
    
    # Create ggplot
    p <- ggplot(datt, aes(x = locc1, y = locc2, fill = feature_fill, color = feature_color, alpha=alpha_in)) +
        geom_point(shape = 21, size = pointsize, alpha = alpha_in,stroke = stroke) +
        scale_fill_viridis() +   # continuous fill color scale
        scale_color_manual(values=D3) + # Discrete border color scale
        ggtitle(textmethod) +
        theme_void() +
        theme(plot.title = element_text(size = textsize),
              text = element_text(size = textsize), 
              legend.position = "right") +
        labs(fill = legend_title_fill, color = legend_title_color)
    
    # Return plot
    return(p)
}


scaler <- function(x, q = 0.95) {x[x > quantile(x, q)] <- max(x); scales::rescale(x)}
pdf(paste0("Lung","_celltype_genes_in_",celltype_focus,"_region_as_env.pdf"),width=6,height=5)
region_labels = data_use$region_label
region_labels[which(region_labels!="Tumor")]="other"
for(genename in names(results[order(results)[1:20]])){
  print(genename)
  #genename = "Foxf1"
  expr = scaler(normalized_counts[match(genename,rownames(normalized_counts)),])
  size = expr*3
  alpha_in = rep(0.9,length(expr))
  alpha_in[which(region_labels!="Tumor")] = 0.3
  p = plot_factor_value_color_by_cluster(data_use$location_use, feature_fill=expr,alpha_in =alpha_in ,feature_color = as.factor(data_use$region_label),textmethod=paste0(genename," expression in ",celltype_focus),  stroke=0.7, pointsize = size, textsize = 20)+ guides(colour = guide_legend(override.aes = list(size=10)))
  print(p)

}

dev.off()

Table of Contents

Coding

Reading

Writing

Meeting

General stuff