Simulation

Prepare simulation datasets
Generate single cell data
Main functions in simulation
Simulate data, 10000 cells
Simulate data, spot level
Simulate data, spot level with stripe pattern
Simulate data, spot level with arbitrary spatial correlation
High resolution spatial map reconstruction

Prepare simulation datasets

The locations are generated from pixels in a image (LIBDsimu.jpg), which can be downloaded from here.

library("grid")
library("gridExtra")

library(jpeg)
mandrill = readJPEG("LIBDsimu.jpg", native = FALSE)
# > dim(mandrill)
# [1] 1100  984    3
# copy the image three times
mandrill.R = mandrill
mandrill.G = mandrill
mandrill.B = mandrill
# zero out the non-contributing channels for each image copy
mandrill.R[,,2:3] = 0
mandrill.G[,,1]=0
mandrill.G[,,3]=0
mandrill.B[,,1:2]=0

df = data.frame(
  red = matrix(mandrill[,,1], ncol=1),
  green = matrix(mandrill[,,2], ncol=1),
  blue = matrix(mandrill[,,3], ncol=1)
)

### compute the k-means clustering, to obtain regions from image by color
K = kmeans(df,5)

df$label = K$cluster

table(df$label)

### Replace the color of each pixel in the image with the mean 
### R,G, and B values of the cluster in which the pixel resides:
# get the coloring
colors = data.frame(
  label = 1:nrow(K$centers), 
  R = K$centers[,"red"],
  G = K$centers[,"green"],
  B = K$centers[,"blue"]
)
# merge color codes on to df
# IMPORTANT: we must maintain the original order of the df after the merge!
df$order = 1:nrow(df)
df = merge(df, colors)
df = df[order(df$order),]
df$order = NULL

# get mean color channel values for each row of the df.
R = matrix(df$R, nrow=dim(mandrill)[1])
G = matrix(df$G, nrow=dim(mandrill)[1])
B = matrix(df$B, nrow=dim(mandrill)[1])
  
# reconstitute the segmented image in the same shape as the input image
mandrill.segmented = array(dim=dim(mandrill))
mandrill.segmented[,,1] = R
mandrill.segmented[,,2] = G
mandrill.segmented[,,3] = B

RGB_label = R*B*G

# View the result
pdf("segmented.pdf")
grid.raster(mandrill.segmented)
dev.off()

#save(df, mandrill.segmented, R,G,B, file = "From_image_region.RData")

#> df[1:4,]
#       label red green blue         R         G         B
#200066     2   1     1    1 0.9997333 0.9994392 0.9992982
#200067     2   1     1    1 0.9997333 0.9994392 0.9992982
#200068     2   1     1    1 0.9997333 0.9994392 0.9992982
#200069     2   1     1    1 0.9997333 0.9994392 0.9992982

pixel_ind = c(1:dim(df)[1])
x_coor = rep(1:dim(mandrill.segmented)[2], each=dim(mandrill.segmented)[1])
y_coor = -rep(1:dim(mandrill.segmented)[1], times=dim(mandrill.segmented)[2])

data_groundtruth = data.frame(pixel_ind, x_coor,y_coor )
data_groundtruth$label = as.integer(as.factor(c(RGB_label)))

# remove background 
data_groundtruth_use = data_groundtruth[-which(data_groundtruth$label==5),]

set.seed(1234)
subsample = data_groundtruth_use[sample(1:dim(data_groundtruth_use)[1],10000,replace=F),]
# > min(subsample$y_coor)
# [1] -1591
subsample$y_coor = subsample$y_coor+1591 # make coordinates >=0
save(data_groundtruth, data_groundtruth_use,subsample, file = "simulate_spatial_cell_label.RData")

save(subsample, file = "LIBDsubsample.RData") 

Generate single cell data

Single cell data are downloaded from GSE104276.

library(Matrix)
library(readr)
library(openxlsx)

PFC_ref = read.table("GSE104276_all_pfc_2394_UMI_count_NOERCC.txt")
PFC_ref = as.matrix(PFC_ref)
celltype_ref = read.xlsx("GSE104276_readme_sample_barcode.xlsx",5)
PFC_ref_use = PFC_ref[,match(as.character(celltype_ref[,1]), colnames(PFC_ref))]
raw.data = PFC_ref_use
cell_types <- celltype_ref$cell_types
names(cell_types) <- as.character(celltype_ref[,1]) # create cell_types named list
cell_types <- as.factor(cell_types) # convert to factor data type
nUMI <- colSums(PFC_ref_use); names(nUMI) <- as.character(celltype_ref[,1]) # create nUMI named list
library(RCTD)
reference <- Reference(raw.data, cell_types, nUMI)
print(dim(reference@counts))
table(reference@cell_types)
#> table(reference@cell_types)
#       Astrocytes GABAergic neurons         Microglia           Neurons 
#               76               701                68              1057 
#              OPC        Stem cells 
#              117               290 
# > head(subsample)
#        pixel_ind x_coor y_coor label
# 858114    858114    781   1477     3
# 723450    723450    658    841     1
# 380568    380568    346    523     2
# 371366    371366    338    925     2
# 169923    169923    155   1068     1
# 466557    466557    425   1434     3

library(splatter)
cnts=as.matrix(reference@counts) 
# [1] 24153  2309
celltype=reference@cell_types
init_params <- splatEstimate(cnts[,which(reference@cell_types=="Neurons")])
save(init_params, file = "init_params_LIBD.RData")

Main functions in simulation

code_utility.R

"%&%" <- function(x, y) paste0(x, y)

BayesSpace_read <- function(count_in, location_in,platform="ST") {
    spot = rownames(location_in)
    in_tissue = rep(1,length(spot))
    row = location_in[,1]
    col = location_in[,2]
    imagerow = location_in[,1]
    imagecol = location_in[,2]
    colData=data.frame(spot,in_tissue, row, col, imagerow, imagecol)
    colnames(colData) <- c("spot", "in_tissue", "row", "col", "imagerow", "imagecol")
    rownames(colData) <- colData$spot
    colData <- colData[colData$in_tissue > 0, ]
    gene_id = gene_name = rownames(count_in)
    feature_type = rep("",length(gene_id))
    rowData = data.frame(gene_id, gene_name, feature_type)
    colnames(rowData) <- c("gene_id", "gene_name", "feature_type")
    rowData <- rowData[, c("gene_id", "gene_name")]
    rownames(rowData) <- scater::uniquifyFeatureNames(rowData$gene_id, rowData$gene_name)
    sce <- SingleCellExperiment(assays=list(counts=count_in),
                                rowData=rowData,
                                colData=colData)
    sce <- scater::logNormCounts(sce)
    metadata(sce)$BayesSpace.data <- list()
    print("BayesSpace platform")
    print(platform)
    metadata(sce)$BayesSpace.data$platform <- platform
    metadata(sce)$BayesSpace.data$is.enhanced <- FALSE
    
    return(sce)
}


BayesSpace_func = function(count_in, location_in, clusternum,nrep=50000,ifLIBD = FALSE,LIBD_sampleid=NULL,platform = "Visium"){
	
	if(ifLIBD==TRUE){

		library(igraph)
		library(peakRAM)
		library(SingleCellExperiment)
		library(ggplot2)
		library(BayesSpace)
		library('spatialLIBD')
		# library(BiocFileCache)

		clusternums = c(7,7,7,7,5,5,5,5,7,7,7,7)
		# sce <- fetch_data(type = 'sce')
		sce <- spatialLIBD::fetch_data(type = 'sce',destdir="/net/mulan/disk2/shanglu/Projects/SpatialPCA/LIBD_dat")
		metaData = SingleCellExperiment::colData(sce)
		sample_names <- paste0( unique(colData(sce)$sample_name))

		dlpfc <- getRDS("2020_maynard_prefrontal-cortex", sample_names[i],cache=FALSE)
		load(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/LIBD/LIBD_sample",LIBD_sampleid,".RData") )
		ground_truth = KRM_manual_layers_sub$layer_guess_reordered

		# use top 2000 genes
		set.seed(101)
		dec <- scran::modelGeneVar(dlpfc)
		top <- scran::getTopHVGs(dec, n = 2000)
		set.seed(102)
		dlpfc <- scater::runPCA(dlpfc, subset_row=top)
		## Add BayesSpace metadata
		dlpfc <- spatialPreprocess(dlpfc, platform="Visium", skip.PCA=TRUE)
		#q <- length(table(ground_truth))  # Number of clusters
		q=clusternums[LIBD_sampleid]
		d <- 15  # Number of PCs
		## Run BayesSpace clustering
		set.seed(104)
		dlpfc <- spatialCluster(dlpfc, q=q, d=d, platform='Visium',
		                        nrep=50000, gamma=3, save.chain=TRUE) 

		clusterlabel = dlpfc$spatial.cluster
		loc1 = unlist(dlpfc@colData@listData$imagecol)
		loc2 = unlist(dlpfc@colData@listData$imagerow)
		location = data.frame(loc1, loc2)

		return(list("clusterlabel"=clusterlabel,"location"=location))

	}else if(ifLIBD==FALSE){

		library(peakRAM)
		library(SingleCellExperiment)
		library(ggplot2)
		library(BayesSpace)
		library(Matrix)
    print("platform")
    print(platform)
    platform_in = platform
		sce = BayesSpace_read(count_in,location_in,platform_in)
		set.seed(1234)
		top <- scran::getTopHVGs(sce, n = 2000)
		sce <- scater::runPCA(sce, subset_row=top)
		## Add BayesSpace metadata
		sce <- spatialPreprocess(sce, platform=platform_in, skip.PCA=TRUE)
		q <- clusternum # Number of clusters
		d <- 15  # Number of PCs
		## Run BayesSpace clustering
		sce <- spatialCluster(sce, q=q, d=d, platform=platform_in,
		                        nrep=nrep, gamma=3, save.chain=FALSE)  # slow
		clusterlabel = sce$spatial.cluster
		loc1 = unlist(sce@colData@listData$imagecol)
		loc2 = unlist(sce@colData@listData$imagerow)
		location = data.frame(loc1, loc2)

		return(list("clusterlabel"=clusterlabel,"location"=location))
	}

}


HMRF_func = function(count_in, location_in, clusternum,path_out,betas=c(28,2,2)){
	
	# HMRF, giotto
	library(Giotto)
	library(peakRAM)
	library(tidyverse)
	my_python_path = "/net/mulan/home/shanglu/anaconda3/envs/SpaGCN/bin/python3.7"
	instrs <- createGiottoInstructions(python_path = my_python_path)
	hmrf <- createGiottoObject(raw_exprs = as.data.frame(t(count_in)), 
	spatial_locs = location_in, instructions = instrs)
	hmrf <- filterGiotto(gobject = hmrf, expression_threshold = 0.5, 
	gene_det_in_min_cells = 20, min_det_genes_per_cell = 0)
	hmrf <- normalizeGiotto(hmrf) 
	hmrf <- addStatistics(hmrf)
	hmrf <- createSpatialNetwork(hmrf, minimum_k = 2)
	# select SE genes using
	genes <- binSpect(hmrf, bin_method = 'kmeans')$genes
	genes <- genes[1:100]
	outfile <- file.path(path_out)
	R=clusternum
	verbose <- doHMRF(
	gobject = hmrf, 
	spatial_genes = genes, 
	expression_values = "scaled", 
	k = clusternum, 
	betas=betas,
	#betas = c(0, 2, 26), # betas = [start, step, number]
	output_folder = outfile,
	python_path = my_python_path, 
	overwrite_output = T)
	files <- list.files(file.path(outfile, "result.spatial.zscore", 
	"k_" %&% R), pattern = "\\.prob\\.txt", full.names = T)
	labels <- lapply(files, function(f){
	res.i <- read.table(f, row.names = 1)
	apply(res.i, 1, which.max)
	})
	labels <- do.call(cbind, labels)
	betas <- sapply(strsplit(files, "beta\\.|\\.0\\.prob"), `[`, 2)
	colnames(labels) <- paste0("beta", betas)
	labels <- labels[, order(as.numeric(betas))]

	return(labels)

}


get_NMF = function(count, PCnum){
  suppressMessages(require(scater))
  suppressMessages(require(NMF))
  suppressMessages(require(SingleCellExperiment))
  suppressMessages(require(RcppML))
  #expr = log(count+1) # non negative
  sce <- SingleCellExperiment(list(counts=count))
  sce <- logNormCounts(sce)
  #sce <- runNMF(sce,ncomponents = PCnum)
  res <- scater::calculateNMF(sce, ncomponents = PCnum)
  Z_NMF = t(res)
  return(Z_NMF)
}



NMF_cluster_func = function(count_in, genelist,PCnum,cluster_method="walktrap",clusternum){
# install.packages("RcppML")
# devtools::install_github("zdebruine/RcppML")
	library(RcppML)
	library(SpatialPCA)
	count_use = count_in[match(genelist,rownames(count_in)),]
	#res <- RcppML::nmf(count_use,  k = PCnum, nonneg = TRUE)
	# NMF_pcs = res$h

  NMF_pcs = get_NMF(count_use,PCnum)

	if(cluster_method=="walktrap"){
		clusterlabel = walktrap_clustering(clusternum=clusternum,latent_dat=as.matrix(NMF_pcs),knearest=round(sqrt(dim(NMF_pcs)[2])) ) 
	}else if(cluster_method=="louvain"){
		clusterlabel = louvain_clustering(clusternum=clusternum,latent_dat=as.matrix(NMF_pcs),knearest=round(sqrt(dim(NMF_pcs)[2])) ) 
	}

	return(list("PC"=NMF_pcs,"clusterlabel"=clusterlabel))

}



PCA_cluster_func = function(expr, PCnum,cluster_method="walktrap",clusternum){
	output=expr
	d=PCnum
	n=dim(expr)[2]
	k = dim(output)[1]
	output_sub_mean=matrix(0,k,n)
	for(i_k in 1:k){
  		output_sub_mean[i_k,]=output[i_k,]-mean(output[i_k,])
	}
	svd_output_sub_mean=svd(output_sub_mean)
	A_ini=svd_output_sub_mean$u[,1:d] 
	Z_pca = t(A_ini) %*% output_sub_mean

	if(cluster_method=="walktrap"){
		clusterlabel = walktrap_clustering(clusternum=clusternum,latent_dat=as.matrix(Z_pca),knearest=round(sqrt(dim(Z_pca)[2])) ) 
	}else if(cluster_method=="louvain"){
		clusterlabel = louvain_clustering(clusternum=clusternum,latent_dat=as.matrix(Z_pca),knearest=round(sqrt(dim(Z_pca)[2])) ) 
	}

	return(list("PC"=Z_pca,"clusterlabel"=clusterlabel))
}


fx_lisi = function(clusterlabel_in, location){
	library(lisi)
	dat = data.frame("clusterlabel"=clusterlabel_in)
	lisi=compute_lisi(location, dat, c("clusterlabel"))
	return("lisi"=lisi$clusterlabel)
}


	library(BayesSpace)
	library(assertthat)
	library(RCurl)

getRDS <- function(dataset, sample, cache=TRUE) {


    url <- "https://fh-pi-gottardo-r-eco-public.s3.amazonaws.com/SpatialTranscriptomes/%s/%s.rds"
    url <- sprintf(url, dataset, sample)
    assert_that(url.exists(url), msg="Dataset/sample not available")
    
    if (cache) {
        bfc <- BiocFileCache()
        local.path <- bfcrpath(bfc, url)
    } else {
        local.path <- tempfile(fileext=".rds")
        download.file(url, local.path, quiet=TRUE, mode="wb")
    }
    
    readRDS(local.path)
}

simu_stripe = function(
  location,
  label,
  init_params,
  scenario,
  J,
  batch_facLoc,
  de_prop,
  de_facLoc,
  de_facScale,
  sim_seed,
  debug = FALSE
  )
{ 

  N <- nrow(location)
  set.seed(sim_seed)
  # 1.simulate count data
  noBatch <- ifelse(batch_facLoc == 0, TRUE, FALSE)
  params <- setParams(
    init_params,
    batchCells = rep(3 * N, 1),
    batch.rmEffect = noBatch,
    batch.facLoc = batch_facLoc,
    nGenes = J,
    group.prob = c(0.25, 0.25, 0.25,0.25),
    out.prob = 0,
    de.prob = de_prop,
    de.facLoc = de_facLoc,
    de.facScale = de_facScale,
    seed = sim_seed)

  sim_groups <- splatSimulate(
    params = params,
    method = "groups",
    verbose = FALSE)

  # remove cells having no expressed genes
    idx_zerosize <- apply(counts(sim_groups), MARGIN = 2, sum) == 0
    sim_groups <- sim_groups[, !idx_zerosize]

   if(scenario == 1){
      prop <- c(0.6, 0.3, 0.05,0.05)
    }else if(scenario == 2){
      prop <- c(0.45, 0.45, 0.05,0.05)
    }else if(scenario == 3){
      prop <- c(0.35, 0.30, 0.30,0.05)
    }

  # 3.generate cell types
  print("generate cell types")

    ztrue <- label
    c_simu <- rep(NA, length(ztrue))
    if(scenario == 1){ # scenario 1: 4 cell types
      print("scenario == 1")
      c_simu <- ztrue # cell type assignment same as region assignment
    }else if(scenario != 1){
      print("scenario != 1")
      for(z in unique(label)){
        zi_idx <- ztrue == z # zi_idx is index for region z
        c_simu[zi_idx] <- sample(map_z2c(z), sum(zi_idx), prob = prop, replace = T)
      }
    }

    # 4.assign count data
    groups <- as.data.frame(colData(sim_groups)) %>% filter(Batch == "Batch" %&% 1)
    sim_cnt <- array(NA, c(J, N))
    for(c in 1:4){
      c_size <- sum(c_simu == c)  # true number of cells in cell type c
      c_cells <- groups$Cell[grepl(c, groups$Group)] # generated cells in group c
      cells_select <- sample(as.character(c_cells), c_size, replace = F)
      # sample same number of group c cells in real data from generated cells
      sim_cnt[, c_simu == c] <- as.matrix(counts(sim_groups)[, cells_select])
      # for positions of original cell type c cells, assign generated counts of group c
    }
    colnames(sim_cnt) <- "Cell" %&% 1:N
    rownames(sim_cnt) <- "Gene" %&% 1:J

  return(list(sim_cnt, c_simu, sim_seed))
}




map_z2c = function(z)
{
  case_when(
    z == 1 ~ c(1, 2, 3, 4),
    z == 2 ~ c(2, 3, 4, 1),
    z == 3 ~ c(3, 4, 1, 2),
    z == 4 ~ c(4, 1, 2, 3)
    )
}


louvain_clustering_new=function(clusternum, latent_dat,knearest=10){
set.seed(1234)
suppressMessages(require(FNN))
suppressMessages(require(igraph))
suppressMessages(require(bluster))
 
  PCvalues = latent_dat
info.spatial = as.data.frame(t(PCvalues))
colnames(info.spatial) =  paste0("factor", 1:nrow(PCvalues))

  knn.norm = get.knn(as.matrix(t(PCvalues)), k = knearest)
  knn.norm = data.frame(from = rep(1:nrow(knn.norm$nn.index), 
  k=10), to = as.vector(knn.norm$nn.index), weight = 1/(1 + as.vector(knn.norm$nn.dist)))
  nw.norm = graph_from_data_frame(knn.norm, directed = FALSE)
  nw.norm = simplify(nw.norm)
  lc.norm = cluster_louvain(nw.norm)
  merged <- mergeCommunities(nw.norm, lc.norm$membership, number=clusternum)
  clusterlabel = as.character(as.integer(as.factor(paste0("cluster",merged))))
return("cluster_label"=clusterlabel)
}


refine_v2 = function(spotlist, pred_cluster, dist, shape="square"){
  # refined_pred = c(NA,length(pred_cluster))
  #pred = data.frame("spot"=spotlist, "cluster"=pred_cluster)
  dis_df = as.matrix(dist)
  if(shape=="square"){
    num_obs = 4
  }else if(shape == "hexagon"){
    num_nbs = 6
  }else{
    print("Shape is not recongized, shape='hexagon' for Visium data, 'square' for ST data.")
  }
  refined_pred = pred_cluster
  for(i in 1:length(pred_cluster)){
    nearby_spot_ind = order(dist[i,])[1:(num_obs+1)]
    labels_nearby_spot_ind = refined_pred[nearby_spot_ind]  # use updated cluster
    spot_of_interest = refined_pred[i]
    labels_table = table(labels_nearby_spot_ind)
    if( labels_table[spot_of_interest]<num_obs/2 & max(labels_table)>num_obs/2){
      refined_pred[i] = names(labels_table)[which.max(labels_table)]
    }else{
      refined_pred[i] = spot_of_interest
    }

  }

  return(refined_pred)

}


library(tidyverse)
library(mclust)
library(CVXR)
permLabel <- function(labels, perm, zeroidx = FALSE)
{
  if(zeroidx){
    labels <- labels + 1
  }
  K <- length(perm)
  idx <- list()
  for(i in 1:K)
  {
    idx[[i]] <- labels == i
  }
  for(i in 1:K)
  {
    labels[idx[[i]]] <- perm[i]
  }
  labels
}

matchLabel <- function(est_labels, true_labels)
{
  C <- length(unique(true_labels))
  est_labels <- factor(est_labels, 1:C)
  A <- matrix(table(est_labels, true_labels), C, C)
  P <- Variable(C, C, boolean = T) # row permutation matrix
  Y <- Variable(C, C)
  problem <- suppressMessages(
    Problem(Maximize(matrix_trace(Y)), list(Y == P %*% A, 
      sum_entries(P, axis = 1) == 1, sum_entries(P, axis = 2) == 1))
  ) 
  result <- suppressMessages(solve(problem))
  P <- result$getValue(P)
  perm <- apply(P, MARGIN = 2, function(x) which(x==1))
  est_labels <- permLabel(est_labels, perm)
}


simu = function(
  location,
  label,
  init_params,
  scenario,
  J,
  batch_facLoc,
  de_prop,
  de_facLoc,
  de_facScale,
  sim_seed,
  debug = FALSE
  )
{ 

  N <- nrow(location)
  set.seed(sim_seed)
  # 1.simulate count data
  noBatch <- ifelse(batch_facLoc == 0, TRUE, FALSE)
  params <- setParams(
    init_params,
    batchCells = rep(3 * N, 1),
    batch.rmEffect = noBatch,
    batch.facLoc = batch_facLoc,
    nGenes = J,
    group.prob = c(0.25, 0.25, 0.25,0.25),
    out.prob = 0,
    de.prob = de_prop,
    de.facLoc = de_facLoc,
    de.facScale = de_facScale,
    seed = sim_seed)

  sim_groups <- splatSimulate(
    params = params,
    method = "groups",
    verbose = FALSE)

  # remove cells having no expressed genes
    idx_zerosize <- apply(counts(sim_groups), MARGIN = 2, sum) == 0
    sim_groups <- sim_groups[, !idx_zerosize]


   if(scenario == 1){
      prop <- c(0.7, 0.1, 0.1,0.1)
    }else if(scenario == 2){
      prop <- c(0.6, 0.3, 0.05,0.05)
    }else if(scenario == 3){
      prop <- c(0.45, 0.45, 0.05,0.05)
    }else if(scenario == 4){
      prop <- c(0.35, 0.30, 0.30,0.05)
    }

  # 3.generate cell types
  print("generate cell types")

    ztrue <- label
    c_simu <- rep(NA, length(ztrue))
    if(scenario == 1){ # scenario 1: 4 cell types
      print("scenario == 1")
      c_simu <- ztrue # cell type assignment same as region assignment
    }else if(scenario != 1){
      print("scenario != 1")
      for(z in unique(label)){
        zi_idx <- ztrue == z # zi_idx is index for region z
        c_simu[zi_idx] <- sample(map_z2c(z), sum(zi_idx), prob = prop, replace = T)
      }
    }

    # 4.assign count data
    groups <- as.data.frame(colData(sim_groups)) %>% filter(Batch == "Batch" %&% 1)
    sim_cnt <- array(NA, c(J, N))
    for(c in 1:4){
      c_size <- sum(c_simu == c)  # true number of cells in cell type c
      c_cells <- groups$Cell[grepl(c, groups$Group)] # generated cells in group c
      cells_select <- sample(as.character(c_cells), c_size, replace = F)
      # sample same number of group c cells in real data from generated cells
      sim_cnt[, c_simu == c] <- as.matrix(counts(sim_groups)[, cells_select])
      # for positions of original cell type c cells, assign generated counts of group c
    }
    colnames(sim_cnt) <- "Cell" %&% 1:N
    rownames(sim_cnt) <- "Gene" %&% 1:J

  return(list(sim_cnt, c_simu, sim_seed))
}



make_grid = function(square_size, location){
  x=location[,1]
  y=location[,2]
  max_x = max(x)
  min_x = min(x)
  max_y = max(y)
  min_y = min(y)
  grid_num_x = round((max_x-min_x)/square_size)+1
  grid_num_y = round((max_y-min_y)/square_size)+1



  grid = list()
  grid_ind = 0
  for(grid_x_id in 1:grid_num_x){
    for(grid_y_id in 1:grid_num_y){
      grid_ind = grid_ind + 1
      x_left = min_x + (grid_x_id-1)*square_size
      x_right = min_x + (grid_x_id)*square_size
      y_low = min_y + (grid_y_id-1)*square_size
      y_up = min_y + (grid_y_id)*square_size
      cell_ids = which(x>=x_left & x<=x_right & y>=y_low & y<=y_up)
      center_x = (x_left+x_right)/2
      center_y = (y_low+y_up)/2
      grid[[grid_ind]] = list()
      grid[[grid_ind]]$cell_ids = cell_ids
      grid[[grid_ind]]$cell_num = length(cell_ids)
      grid[[grid_ind]]$center_x = center_x
      grid[[grid_ind]]$center_y = center_y
      grid[[grid_ind]]$pseudo_x = grid_x_id
      grid[[grid_ind]]$pseudo_y = grid_y_id
    }
  }
  return(grid)
}


make_spot = function(grid_obj, count_mat,celltypes, celltruths){
  gene_num = dim(count_mat)[1]
  spot_num = length(grid_obj)
  count_spot = matrix(0,gene_num,spot_num)
  location_spot = matrix(NA,spot_num,2)
  pseudo_location_spot = matrix(NA,spot_num,2)

  celltype = c()
  subspottruth = c()
  for(spot in 1:spot_num){
    if(length(grid_obj[[spot]]$cell_ids)==1){
      count_spot[,spot] = count_mat[,grid_obj[[spot]]$cell_ids]
      location_spot[spot,1] = grid_obj[[spot]]$center_x
      location_spot[spot,2] = grid_obj[[spot]]$center_y
      pseudo_location_spot[spot,1] = grid_obj[[spot]]$pseudo_x
      pseudo_location_spot[spot,2] = grid_obj[[spot]]$pseudo_y
      celltype[spot] = names(table(celltypes[grid_obj[[spot]]$cell_ids]))[which.max(table(celltypes[grid_obj[[spot]]$cell_ids]))]
      subspottruth[spot] = names(table(celltruths[grid_obj[[spot]]$cell_ids]))[which.max(table(celltruths[grid_obj[[spot]]$cell_ids]))]

    }else if(length(grid_obj[[spot]]$cell_ids)>=2){
      count_spot[,spot] = rowSums(count_mat[,grid_obj[[spot]]$cell_ids])
      location_spot[spot,1] = grid_obj[[spot]]$center_x
      location_spot[spot,2] = grid_obj[[spot]]$center_y
      pseudo_location_spot[spot,1] = grid_obj[[spot]]$pseudo_x
      pseudo_location_spot[spot,2] = grid_obj[[spot]]$pseudo_y
      celltype[spot] = names(table(celltypes[grid_obj[[spot]]$cell_ids]))[which.max(table(celltypes[grid_obj[[spot]]$cell_ids]))]
      subspottruth[spot] = names(table(celltruths[grid_obj[[spot]]$cell_ids]))[which.max(table(celltruths[grid_obj[[spot]]$cell_ids]))]
    }else{
      count_spot[,spot] = 0
      location_spot[spot,1] = grid_obj[[spot]]$center_x
      location_spot[spot,2] = grid_obj[[spot]]$center_y
      pseudo_location_spot[spot,1] = grid_obj[[spot]]$pseudo_x
      pseudo_location_spot[spot,2] = grid_obj[[spot]]$pseudo_y
      celltype[spot] = "empty"
      subspottruth[spot] = "empty"

    }
  }


  return(list("count_spot"=count_spot,"location_spot"=location_spot,"pseudo_location_spot"=pseudo_location_spot,"subspottruth"=subspottruth,"celltype"=celltype))
}


make_spot_from_subspot = function(count_location_subspot){
  count_subspot = count_location_subspot[[1]]
  location_subspot = count_location_subspot[[3]]/3
  truth_subspot = count_location_subspot[[4]]

  spot_x_max = floor(max(location_subspot[,1]))
  spot_y_max = floor(max(location_subspot[,2]))

  spot_num = spot_x_max * spot_y_max
  used_subspots = c()
  truth_spot = c()
  count_spot = matrix(0, dim(count_subspot)[1], spot_num)
  location_spot = matrix(NA, spot_num, 2)
  count = 0
  for(spot_id_x in 1:spot_x_max){
    for(spot_id_y in 1:spot_y_max){
      count = count + 1
      subspot_id = which(location_subspot[,1]>=spot_id_x-0.5 & location_subspot[,1]<=spot_id_x+0.5 & location_subspot[,2]>=spot_id_y-0.5 & location_subspot[,2]<=spot_id_y+0.5)
      used_subspots = c(used_subspots, subspot_id)
      count_spot[, count] = rowSums(count_subspot[,subspot_id])
      location_spot[count,1] = mean(location_subspot[subspot_id,1])
      location_spot[count,2] = mean(location_subspot[subspot_id,2])
      truth_spot[count] = names(table(count_location_subspot$subspottruth[subspot_id]))
    }
  }

  return(list("count_spot"=count_spot, "location_spot" = location_spot, "truth_spot"=truth_spot,"used_subspots" = used_subspots))

}

code_utility_python.py

import anndata as ad
import SpaGCN as spg
import pandas as pd
import numpy as np
import scanpy as sc
from scanpy import read_10x_h5
import random
import torch
import cv2
import os

def run_SpaGCN_py(sim_cnt, info, R, p = 0.5):
  adata = ad.AnnData(sim_cnt, obs = info)
  # calculate adjacency matrix
  x_array = adata.obs["x"].tolist()
  y_array = adata.obs["y"].tolist()
  adj = spg.calculate_adj_matrix(x = x_array, y = y_array, histology = False)
  
  # Expression data preprocessing
  adata.var_names_make_unique()
  spg.prefilter_genes(adata)
  spg.prefilter_specialgenes(adata)
  sc.pp.normalize_per_cell(adata)
  sc.pp.log1p(adata)

  # set hyper-parameters
  l = spg.search_l(p, adj, start = 0.01, end = 1000, tol = 0.01, max_run = 100)
  n_clusters = R
  r_seed = t_seed = n_seed = 0
  res = spg.search_res(adata, adj, l, n_clusters, 
    start = 0.7, step = 0.1, tol = 5e-3, lr = 0.05, 
    max_epochs = 20, r_seed = r_seed, t_seed = t_seed, 
    n_seed = n_seed)

  # run spaGCN
  clf = spg.SpaGCN()
  clf.set_l(l)
  random.seed(r_seed)
  torch.manual_seed(t_seed)
  np.random.seed(n_seed)
  clf.train(adata, adj, init_spa = True, init = "louvain", res = res, 
    tol = 5e-3, lr = 0.05, max_epochs = 200)
  y_pred, prob = clf.predict()
  adata.obs["pred"]= y_pred
  adata.obs["pred"] = adata.obs["pred"].astype('category')

  # cluster refinement
  adj_2d = spg.calculate_adj_matrix(x = x_array, y = y_array, histology = False)
  refined_pred = spg.refine(sample_id = adata.obs.index.tolist(), 
    pred = adata.obs["pred"].tolist(), dis = adj_2d, shape = "hexagon")
  adata.obs["refined_pred"] = refined_pred
  adata.obs["refined_pred"] = adata.obs["refined_pred"].astype('category')

  return(adata.obs)

Simulate data, 10000 cells

args <- as.numeric(commandArgs(TRUE))
i = args[1] # scenario
j = args[2] # repeat
print(i)

  library(tidyverse)
  library(Giotto)
  library(scater)
  library(Seurat)
  library(mclust)
  library(SC3)
  library(BayesSpace)
  library(gtools)
  library(splatter)
  library(reticulate)
  library(mclust )
  library(igraph)
  library(assertthat)
  library(SpatialPCA)
  library(ggplot2)
  
source("code_utility.R")  
# can be found in the above sections
load("LIBDsubsample.RData")
load("init_params_LIBD.RData")
# These two R object can be downloaded from https://drive.google.com/drive/folders/18rwQjB3-g86A-M9xYPPJlHz60bfMABdE?usp=sharing.

res = simu(location=subsample[,2:3],label = subsample$label,init_params,
    scenario=i,J=5000, batch_facLoc=0, de_prop=0.5, de_facLoc=0.5, de_facScale=0.5,sim_seed=j, debug = FALSE)

save(res, file = paste0("LIBD_res_scenairo_",i,"_rep_",j,".RData"))


count_mat = res[[1]]
truth = subsample$label
location=as.matrix(subsample[,2:3])
# truth = LIBDsimu_pseudo$info$z
location=as.matrix(location)
celltypes = res[[2]]
count_spot = count_mat
location_spot = location
rownames(location_spot) = colnames(count_spot) = paste0("spot",1:dim(count_spot)[2])
rownames(count_spot) = paste0("gene",1:dim(count_spot)[1])
dim(count_spot)


###############
# Run SpatialPCA
###############

LIBDsimu = CreateSpatialPCAObject(counts=count_spot, location=location_spot, project = "SpatialPCA",gene.type="spatial",sparkversion="sparkx", gene.number=3000,customGenelist=NULL,min.loctions = 20, min.features=20)
LIBDsimu = SpatialPCA_buildKernel(LIBDsimu, kerneltype="gaussian", bandwidthtype="Silverman")
LIBDsimu = SpatialPCA_EstimateLoading(LIBDsimu,fast=TRUE,SpatialPCnum=20)
LIBDsimu = SpatialPCA_SpatialPCs(LIBDsimu, fast=TRUE)


# Collect results
SpatialPCA_result = list()
SpatialPCA_result$SpatialPCs  = LIBDsimu@SpatialPCs
SpatialPCA_result$normalized_expr  = LIBDsimu@normalized_expr
SpatialPCA_result$location = LIBDsimu@location
pred_cluster= louvain_clustering_new(4,SpatialPCA_result$SpatialPCs,500 )
SpatialPCA_result$clusterlabel = pred_cluster
SpatialPCA_result$truth = truth[match(rownames(LIBDsimu@location),rownames(location_spot))]
SpatialPCA_result$ARI = adjustedRandIndex(SpatialPCA_result$clusterlabel,SpatialPCA_result$truth)
SpatialPCA_result$NMI = compare(as.factor(SpatialPCA_result$clusterlabel),as.factor(SpatialPCA_result$truth), method = "nmi")
SpatialPCA_result$CHAOS = fx_CHAOS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
SpatialPCA_result$PAS = fx_PAS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)

save(SpatialPCA_result, file = paste0("celllevel_SpatialPCA_spatialgene_result_scenario_",i,"_rep_",j,".RData"))

###############
# BayesSpace
###############

print("run BayesSpace, ST, hvg genes")

# filter out spots with 0 counts
ind_keep=which(colSums(count_spot) > 0)
location_spot_bayesSpace=location_spot[ind_keep,]
count_spot_BayesSpace=count_spot[,ind_keep]
colnames(location_spot_bayesSpace) <- c("row", "col")

sce_LIBDsimu_ST <- SingleCellExperiment(assays = list(counts = count_spot_BayesSpace), colData = location_spot_bayesSpace)
sce_LIBDsimu_ST <- spatialPreprocess(sce_LIBDsimu_ST, platform="ST",n.PCs = 15, n.HVGs = 2000, log.normalize = T)
sce_LIBDsimu_ST <- spatialCluster(sce_LIBDsimu_ST, q=4, d=15, platform='ST',nrep=10000, gamma=3, save.chain=FALSE) 
sce_labels=sce_LIBDsimu_ST$spatial.cluster

BayesSpace_ST_result = list()
BayesSpace_ST_result$sce_LIBDsimu_ST = sce_LIBDsimu_ST
BayesSpace_ST_result$clusterlabel = sce_labels
BayesSpace_ST_result$location = location_spot_bayesSpace
BayesSpace_ST_result$truth = truth[ind_keep]
BayesSpace_ST_result$ARI = adjustedRandIndex(BayesSpace_ST_result$clusterlabel,BayesSpace_ST_result$truth)
BayesSpace_ST_result$NMI = compare(BayesSpace_ST_result$clusterlabel,BayesSpace_ST_result$truth, method = "nmi")
BayesSpace_ST_result$CHAOS = fx_CHAOS(BayesSpace_ST_result$clusterlabel, BayesSpace_ST_result$location)
BayesSpace_ST_result$PAS = fx_PAS(BayesSpace_ST_result$clusterlabel, BayesSpace_ST_result$location)

print(BayesSpace_ST_result$ARI)

save(BayesSpace_ST_result,  file = paste0("celllevel_BayesSpace_hvggene_result_scenario_",i,"_rep_",j,".RData"))

###############
# prepare SpaGCN data
# and then run in python
###############

print("SpaGCN")
# SpaGCN uses all genes

library(DropletUtils)
write10xCounts(
  paste0("celllevel_SpaGCN_scenairo_",i,"_rep_",j,"_count_allgene.h5"),
  count_spot,
  barcodes = colnames(count_spot),
  gene.id = rownames(count_spot),
  gene.symbol = rownames(count_spot),
  gene.type = "Gene Expression",
  overwrite = FALSE,
  type = c( "HDF5"),
  genome = "unknown",
  #version = c("2", "3"),
  #chemistry = "Single Cell 3' v3",
  original.gem.groups = 1L,
  library.ids = "custom"
)

Simulate data, spot level

args <- as.numeric(commandArgs(TRUE))
i = args[1] # scenario
j = args[2] # repeatment
k = args[3] # method
num = args[4] # spot level dataset
print(i)
print(j)
print(k)
print(num)

if(num == 1){
	spotnum=5077 # 90um
}else if(num==2){
	spotnum=3602 # 107um
}else{
	spotnum=1948 # 145um
}


setwd("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation")

library(tidyverse)
library(Giotto)
library(scater)
library(SC3)
library(BayesSpace)
library(gtools)
library(splatter)
library(igraph)
library(assertthat)
library(SPARK)
library(Seurat)
library(peakRAM)
library(ggplot2)
library(ggpubr)
library(mclust) # ARI
library(aricode)# NMI
library(SpatialPCA)# NMI
library(lisi) # lisi score
library(reticulate) 
library(FNN)

source("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/code_utility.R")
load("/net/mulan/disk2/shanglu/Projects/SpatialPCA/reviewer/simulation/LIBDsimu/LIBDsubsample.RData")
load("/net/mulan/disk2/shanglu/Projects/SpatialPCA/reviewer/simulation/LIBDsimu/init_params_LIBD.RData")

load(paste0("LIBD_res_scenairo_",i,"_rep_",j,".RData"))


D3=c("#f4f1de","#e07a5f","#3d405b","#81b29a","#f2cc8f","#a8dadc","#f1faee","#f08080",
	"#F94144", "#dda15e", "#F8961E" ,"#bc6c25", "#F9C74F" ,"#90BE6D", "#43AA8B")


# subspot level
count_mat = res[[1]]
truth = subsample$label
location=as.matrix(subsample[,2:3])
location=as.matrix(location)
celltypes = res[[2]]
# first generate subspot level data
if(spotnum == 5077){
	grid_subspot = make_grid(square_size = 4,location)
}else if(spotnum == 3602){
	grid_subspot = make_grid(square_size = 5,location)
}else if(spotnum == 1948){
	grid_subspot = make_grid(square_size = 7,location)
}
count_location_subspot = make_spot(grid_subspot,count_mat,celltypes,subsample$label)
count_subspot = count_location_subspot$count_spot
location_subspot = count_location_subspot$pseudo_location_spot/3
truth_subspot = count_location_subspot$subspottruth
spot_level_data = make_spot_from_subspot(count_location_subspot)
# truth_subspot[spot_level_data$used_subspots]
# spot level
truth=spot_level_data$truth_spot
truth_empty = which(truth=="empty")
count_spot = spot_level_data$count_spot[,-truth_empty]
location_spot = spot_level_data$location_spot[-truth_empty,]
truth=truth[-truth_empty]
rownames(location_spot) = colnames(count_spot) = paste0("spot",1:dim(count_spot)[2])
rownames(count_spot) = paste0("gene",1:dim(count_spot)[1])

save(count_spot,location_spot, truth,celltypes,file=paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/data/res_scenairo_",i,"_rep_",j,"_dataset_",num,"_count_location.RData"))



if(k ==1){

	if(num==1){

		simu = CreateSpatialPCAObject(counts=count_spot, location=location_spot, project = "SpatialPCA",gene.type="spatial",sparkversion="spark", gene.number=3000,customGenelist=NULL,min.loctions = 20, min.features=20)
		simu = SpatialPCA_buildKernel(simu, kerneltype="gaussian", bandwidthtype="Silverman")
		simu = SpatialPCA_EstimateLoading(simu,fast=FALSE,SpatialPCnum=20)
		simu = SpatialPCA_SpatialPCs(simu, fast=FALSE)
		SpatialPCA_result = list()
		SpatialPCA_result$SpatialPCs  = simu@SpatialPCs
		SpatialPCA_result$location = simu@location
		SpatialPCA_result$celltypes = paste0("celltype",celltypes)
		SpatialPCA_result$Truth = truth[match(rownames(simu@location),rownames(location_spot))]
		SpatialPCA_result$clusterlabel = louvain_clustering(4,SpatialPCA_result$SpatialPCs,500) # original:500
		SpatialPCA_result$clusterlabel_refine = refine_cluster_10x(SpatialPCA_result$clusterlabel, SpatialPCA_result$location,shape="square") 
		SpatialPCA_result$ARI = adjustedRandIndex(SpatialPCA_result$clusterlabel,SpatialPCA_result$Truth)
		SpatialPCA_result$ARI_refine = adjustedRandIndex(SpatialPCA_result$clusterlabel_refine,SpatialPCA_result$Truth)
		SpatialPCA_result$NMI = NMI(as.character(SpatialPCA_result$clusterlabel),as.character(SpatialPCA_result$Truth))
		SpatialPCA_result$CHAOS = fx_CHAOS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$PAS = fx_PAS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$LISI = fx_lisi(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$count_spot = count_spot
		SpatialPCA_result$location_spot = location_spot
		SpatialPCA_result$normalized_expr = simu@normalized_expr
		clusterNum=length(unique(SpatialPCA_result$clusterlabel))
		save(SpatialPCA_result, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

		#load(paste0("cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
		clusterNum=length(unique(SpatialPCA_result$clusterlabel))
		pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/figure/SpatialPCA_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
		p1=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
		p2=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("SpatialPCA"),color_in=D3[1:clusterNum])
		print(ggarrange(p1, p2, ncol = 2, nrow = 1))
		dev.off()

	}else if (num==2){

		simu = CreateSpatialPCAObject(counts=count_spot, location=location_spot, project = "SpatialPCA",gene.type="spatial",sparkversion="spark", gene.number=3000,customGenelist=NULL,min.loctions = 20, min.features=20)
		simu = SpatialPCA_buildKernel(simu, kerneltype="gaussian", bandwidthtype="Silverman")
		simu = SpatialPCA_EstimateLoading(simu,fast=FALSE,SpatialPCnum=20)
		simu = SpatialPCA_SpatialPCs(simu, fast=FALSE)
		SpatialPCA_result = list()
		SpatialPCA_result$SpatialPCs  = simu@SpatialPCs
		SpatialPCA_result$location = simu@location
		SpatialPCA_result$celltypes = paste0("celltype",celltypes)
		SpatialPCA_result$Truth = truth[match(rownames(simu@location),rownames(location_spot))]
		SpatialPCA_result$clusterlabel = louvain_clustering(4,SpatialPCA_result$SpatialPCs,500) # original:500
		SpatialPCA_result$clusterlabel_refine = refine_cluster_10x(SpatialPCA_result$clusterlabel, SpatialPCA_result$location,shape="square") 
		SpatialPCA_result$ARI = adjustedRandIndex(SpatialPCA_result$clusterlabel,SpatialPCA_result$Truth)
		SpatialPCA_result$ARI_refine = adjustedRandIndex(SpatialPCA_result$clusterlabel_refine,SpatialPCA_result$Truth)
		SpatialPCA_result$NMI = NMI(as.character(SpatialPCA_result$clusterlabel),as.character(SpatialPCA_result$Truth))
		SpatialPCA_result$CHAOS = fx_CHAOS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$PAS = fx_PAS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$LISI = fx_lisi(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$count_spot = count_spot
		SpatialPCA_result$location_spot = location_spot
		SpatialPCA_result$normalized_expr = simu@normalized_expr
		clusterNum=length(unique(SpatialPCA_result$clusterlabel))
		save(SpatialPCA_result, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	#load(paste0("cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	clusterNum=length(unique(SpatialPCA_result$clusterlabel))
		pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/figure/SpatialPCA_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
		p1=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
		p2=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("SpatialPCA"),color_in=D3[1:clusterNum])
		print(ggarrange(p1, p2, ncol = 2, nrow = 1))
		dev.off()

	}else if (num==3){

		simu = CreateSpatialPCAObject(counts=count_spot, location=location_spot, project = "SpatialPCA",gene.type="spatial",sparkversion="spark", gene.number=3000,customGenelist=NULL,min.loctions = 20, min.features=20)
		simu = SpatialPCA_buildKernel(simu, kerneltype="gaussian", bandwidthtype="Silverman")
		simu = SpatialPCA_EstimateLoading(simu,fast=FALSE,SpatialPCnum=20)
		simu = SpatialPCA_SpatialPCs(simu, fast=FALSE)
		SpatialPCA_result = list()
		SpatialPCA_result$SpatialPCs  = simu@SpatialPCs
		SpatialPCA_result$location = simu@location
		SpatialPCA_result$celltypes = paste0("celltype",celltypes)
		SpatialPCA_result$Truth = truth[match(rownames(simu@location),rownames(location_spot))]
		SpatialPCA_result$clusterlabel = walktrap_clustering(4,SpatialPCA_result$SpatialPCs,100) 
		SpatialPCA_result$clusterlabel_refine = refine_cluster_10x(SpatialPCA_result$clusterlabel, SpatialPCA_result$location,shape="square") 
		SpatialPCA_result$ARI = adjustedRandIndex(SpatialPCA_result$clusterlabel,SpatialPCA_result$Truth)
		SpatialPCA_result$ARI_refine = adjustedRandIndex(SpatialPCA_result$clusterlabel_refine,SpatialPCA_result$Truth)
		SpatialPCA_result$NMI = NMI(as.character(SpatialPCA_result$clusterlabel),as.character(SpatialPCA_result$Truth))
		SpatialPCA_result$CHAOS = fx_CHAOS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$PAS = fx_PAS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$LISI = fx_lisi(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$count_spot = count_spot
		SpatialPCA_result$location_spot = location_spot
		SpatialPCA_result$normalized_expr = simu@normalized_expr
		clusterNum=length(unique(SpatialPCA_result$clusterlabel))
		save(SpatialPCA_result, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	#load(paste0("cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	clusterNum=length(unique(SpatialPCA_result$clusterlabel))
		pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/figure/SpatialPCA_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
		p1=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
		p2=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("SpatialPCA"),color_in=D3[1:clusterNum])
		print(ggarrange(p1, p2, ncol = 2, nrow = 1))
		dev.off()

	}



}else if (k==2){ # BayesSpace

	load(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	match_id=match(colnames(SpatialPCA_result$normalized_expr), colnames(SpatialPCA_result$count_spot))
	count_in = SpatialPCA_result$count_spot[,match_id]
	loc_in=as.data.frame( SpatialPCA_result$location_spot[match_id,])
	clusterNum = length(unique(SpatialPCA_result$Truth))
	colnames(loc_in)=c("x","y") # important

	res = BayesSpace_func(count_in, loc_in, clusternum=clusterNum,nrep=50000,ifLIBD=FALSE,platform="ST")
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res$clusterlabel))

	save(res, file = paste0("cells_BayesSpace_result_scenario_",i,"_rep_",j,".RData"))

	BayesSpace_result = list()
	BayesSpace_result$clusterlabel = as.character(res$clusterlabel)
	BayesSpace_result$location = SpatialPCA_result$location
	BayesSpace_result$Truth = SpatialPCA_result$Truth
	BayesSpace_result$ARI = adjustedRandIndex(tabb[,1], tabb[,2])
	BayesSpace_result$NMI = NMI(tabb[,1],tabb[,2])
	BayesSpace_result$CHAOS = fx_CHAOS(res$clusterlabel, SpatialPCA_result$location)
	BayesSpace_result$PAS = fx_PAS(res$clusterlabel, SpatialPCA_result$location)
	BayesSpace_result$LISI = fx_lisi(res$clusterlabel, SpatialPCA_result$location)
	BayesSpace_result$count_spot = SpatialPCA_result$count_spot
	BayesSpace_result$location_spot = SpatialPCA_result$location_spot
	BayesSpace_result$res = res

	save(BayesSpace_result, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_BayesSpace_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	clusterNum=length(unique(BayesSpace_result$clusterlabel))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/figure/BayesSpace_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
	p1=plot_cluster(location=BayesSpace_result$location,clusterlabel=BayesSpace_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=BayesSpace_result$location,clusterlabel=BayesSpace_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("BayesSpace"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()



}else if(k==3){ # SpaGCN

	library(reticulate)
	library(tidyverse)
	use_python("/net/mulan/home/shanglu/anaconda3/envs/SpaGCN/bin/python3.7", required=TRUE)
	source_python("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/code_utility_python.py")

	load(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	match_id=match(colnames(SpatialPCA_result$normalized_expr), colnames(SpatialPCA_result$count_spot))
	count_in = SpatialPCA_result$count_spot[,match_id]
	loc_in=as.data.frame( SpatialPCA_result$location_spot[match_id,])
	clusterNum = length(unique(SpatialPCA_result$Truth))
	colnames(loc_in)=c("x","y") # important
	res = run_SpaGCN_py(t(count_in), loc_in , clusterNum) # cell by gene 
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res$refined_pred))

	SpaGCN_result = list()
	SpaGCN_result$clusterlabel = res$refined_pred
	SpaGCN_result$location = res[,c("x","y")]
	SpaGCN_result$Truth = SpatialPCA_result$Truth
	SpaGCN_result$ARI = adjustedRandIndex(tabb[,1], tabb[,2])
	SpaGCN_result$NMI = NMI(tabb[,1],tabb[,2])
	SpaGCN_result$CHAOS = fx_CHAOS(res$refined_pred, SpaGCN_result$location)
	SpaGCN_result$PAS = fx_PAS(res$refined_pred, SpaGCN_result$location)
	SpaGCN_result$LISI = fx_lisi(res$refined_pred, SpaGCN_result$location)
	SpaGCN_result$count_spot = SpatialPCA_result$count_spot
	SpaGCN_result$location_spot = SpatialPCA_result$location_spot

	save(SpaGCN_result, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_SpaGCN_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/figure/SpaGCN_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
	p1=plot_cluster(location=SpaGCN_result$location,clusterlabel=SpaGCN_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=SpaGCN_result$location,clusterlabel=SpaGCN_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("SpaGCN"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()


}else if(k==4){ # HMRF
	
	library(reticulate)
	library(tidyverse)
	library(peakRAM)	
	load(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	match_id=match(colnames(SpatialPCA_result$normalized_expr), colnames(SpatialPCA_result$count_spot))
	count_in = SpatialPCA_result$count_spot[,match_id]
	loc_in=as.data.frame( SpatialPCA_result$location_spot[match_id,])
	clusterNum = length(unique(SpatialPCA_result$Truth))
	colnames(loc_in)=c("x","y") # important

	path_out = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/HMRF/scenario_",i,"_rep_",j,"_dataset_",num)
	res = HMRF_func(count_in=t(count_in), location_in=loc_in, clusternum=clusterNum,path_out=path_out,betas = c(0,2,6)) # betas = [start, step, number]
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res[,6]))

	HMRF_result = list()
	HMRF_result$clusterlabel = as.character(res[,6])
	HMRF_result$location = SpatialPCA_result$location
	HMRF_result$Truth = SpatialPCA_result$Truth
	HMRF_result$ARI = adjustedRandIndex(tabb[,1], tabb[,2])
	HMRF_result$NMI = NMI(tabb[,1],tabb[,2])
	HMRF_result$CHAOS = fx_CHAOS(HMRF_result$clusterlabel, HMRF_result$location)
	HMRF_result$PAS = fx_PAS(HMRF_result$clusterlabel, HMRF_result$location)
	HMRF_result$LISI = fx_lisi(HMRF_result$clusterlabel, HMRF_result$location)
	HMRF_result$res = res

	save(HMRF_result, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_HMRF_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/figure/HMRF_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
	p1=plot_cluster(location=HMRF_result$location,clusterlabel=HMRF_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=HMRF_result$location,clusterlabel=HMRF_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("HMRF"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()



}else if(k==5){ # NMF

	library(peakRAM)

	load(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	clusterNum = length(unique(na.omit(SpatialPCA_result$Truth)))
	match_id=match(colnames(SpatialPCA_result$normalized_expr), colnames(SpatialPCA_result$count_spot))
	count_in = SpatialPCA_result$count_spot[,match_id]
	loc_in=as.data.frame( SpatialPCA_result$location_spot[match_id,])

	res = NMF_cluster_func(count_in=count_in, genelist=rownames(count_in),PCnum=20,cluster_method="louvain",clusternum=clusterNum)
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res$clusterlabel))

	NMF_result = list()
	NMF_result$clusterlabel = res$clusterlabel
	NMF_result$location = SpatialPCA_result$location
	NMF_result$Truth = SpatialPCA_result$Truth
	NMF_result$ARI =  adjustedRandIndex(tabb[,1], tabb[,2])
	NMF_result$NMI = NMI(tabb[,1],tabb[,2])
	NMF_result$CHAOS = fx_CHAOS(NMF_result$clusterlabel, SpatialPCA_result$location)
	NMF_result$PAS = fx_PAS(NMF_result$clusterlabel, SpatialPCA_result$location)
	NMF_result$LISI = fx_lisi(NMF_result$clusterlabel, SpatialPCA_result$location)
	NMF_result$PCs = res$PC

	save(NMF_result, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_NMF_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/figure/NMF_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
	p1=plot_cluster(location=NMF_result$location,clusterlabel=NMF_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=NMF_result$location,clusterlabel=NMF_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("NMF"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()



}else if(k==6){ # PCA

	
	load(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	clusterNum = length(unique(na.omit(SpatialPCA_result$Truth)))
	match_id = match(colnames(SpatialPCA_result$normalized_expr),colnames(SpatialPCA_result$count_spot))
	loc_in = as.data.frame(SpatialPCA_result$location_spot[match_id,])
	count_in = as.matrix(SpatialPCA_result$count_spot[,match_id])

	res = PCA_cluster_func(expr=SpatialPCA_result$normalized_expr,PCnum=20,cluster_method="louvain",clusternum=clusterNum)
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res$clusterlabel))

	PCA_result = list()
	PCA_result$clusterlabel = res$clusterlabel
	PCA_result$location = SpatialPCA_result$location
	PCA_result$Truth = SpatialPCA_result$Truth
	PCA_result$ARI =  adjustedRandIndex(tabb[,1], tabb[,2])
	PCA_result$NMI = NMI(tabb[,1],tabb[,2])
	PCA_result$CHAOS = fx_CHAOS(PCA_result$clusterlabel, PCA_result$location)
	PCA_result$PAS = fx_PAS(PCA_result$clusterlabel, PCA_result$location)
	PCA_result$LISI = fx_lisi(PCA_result$clusterlabel, PCA_result$location)
	PCA_result$PCs = res$PC
	PCA_result$count_spot = SpatialPCA_result$count_spot
	PCA_result$location_spot = SpatialPCA_result$location_spot


	save(PCA_result, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/result/cells_PCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/simulation/figure/PCA_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
	p1=plot_cluster(location=PCA_result$location,clusterlabel=PCA_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=PCA_result$location,clusterlabel=PCA_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("PCA"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()


}

Simulate data, spot level with stripe pattern

args <- as.numeric(commandArgs(TRUE))
i = args[1] # scenario
j = args[2] # repeatment
k = args[3] # method
print(i)
print(j)
print(k)

setwd("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q2")

library(tidyverse)
library(Giotto)
library(scater)
library(SC3)
library(BayesSpace)
library(gtools)
library(splatter)
library(igraph)
library(assertthat)
library(SPARK)
library(Seurat)
library(peakRAM)
library(ggplot2)
library(ggpubr)
library(mclust) # ARI
library(aricode)# NMI
library(SpatialPCA)# NMI
library(lisi) # lisi score
library(reticulate) 


source("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/code_utility.R")


D3=c("#f4f1de","#e07a5f","#3d405b","#81b29a","#f2cc8f","#a8dadc","#f1faee","#f08080",
	"#F94144", "#dda15e", "#F8961E" ,"#bc6c25", "#F9C74F" ,"#90BE6D", "#43AA8B",
 "#4D908E", "#577590", "#277DA1", "#AEC7E8" ,"#FFBB78" ,"#98DF8A", "#FF9896",
"#C5B0D5" ,"#C49C94", "#F7B6D2", "#C7C7C7" ,"#DBDB8D" ,"#9EDAE5", "#393B79",
"#637939", "#8C6D31", "#843C39", "#7B4173" ,"#5254A3" ,"#8CA252", "#BD9E39",
"#AD494A", "#A55194", "#6B6ECF", "#B5CF6B", "#E7BA52" ,"#D6616B", "#CE6DBD",
"#9C9EDE", "#CEDB9C" ,"#E7CB94", "#E7969C", "#DE9ED6" ,"#3182BD", "#E6550D",
"#31A354", "#756BB1" ,"#636363", "#6BAED6" ,"#FD8D3C" ,"#74C476", "#9E9AC8",
"#969696", "#9ECAE1" ,"#FDAE6B", "#A1D99B" ,"#BCBDDC" ,"#BDBDBD", "#C6DBEF",
"#FDD0A2" ,"#C7E9C0" ,"#DADAEB", "#D9D9D9")

load("/net/mulan/disk2/shanglu/Projects/SpatialPCA/reviewer/simulation/LIBDsimu/LIBDsubsample.RData")
load("/net/mulan/disk2/shanglu/Projects/SpatialPCA/reviewer/simulation/LIBDsimu/init_params_LIBD.RData")

# 1,2,1,2,1,2 regions
# still 10000 cells
a = t(combn(1:1200,2))
b=a
b[,1] = a[,2]
b[,2] = a[,1]
c=rbind(a,b)
# x: 1:1000
# y: 1:200
a1=c[which(c[,2]<=200),]
a1 = as.data.frame(a1)
a1$label = 1
a1$label[which(a1[,1]>200 & a1[,1]<=400)] =2
a1$label[which(a1[,1]>600 & a1[,1]<=800)] =2
a1$label[which(a1[,1]>1000 & a1[,1]<=1200)] =2
a1$label = as.character(a1$label)

set.seed(06112022)
subsample = a1[sample(c(1:dim(a1)[1]),10000),]

save(subsample, file = "subsample_stripe.RData")


load("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q2/subsample_stripe.RData")


seed=06112022+j

res = simu_stripe(location=subsample[,1:2],label = subsample[,3],init_params,
    scenario=i,J=5000, batch_facLoc=0, de_prop=0.95, de_facLoc=0.95, de_facScale=0.05,sim_seed=seed, debug = FALSE)

save(res, file=paste0("stripe_res_scenairo_",i,"_rep_",j,".RData"))

# subspot level
count_mat = res[[1]]
truth = subsample[,3]
location=as.matrix(subsample[,1:2])
# truth = stripesimu_pseudo$info$z
location=as.matrix(location)
celltypes = res[[2]]

count_spot = count_mat
location_spot = location
rownames(location_spot) = colnames(count_spot) = paste0("spot",1:dim(count_spot)[2])
rownames(count_spot) = paste0("gene",1:dim(count_spot)[1])
dim(count_spot)

save(count_spot,location_spot, truth,celltypes,file=paste0("stripe_res_scenairo_",i,"_rep_",j,"_count_location.RData"))


print("data saved!")

print("data loading!")

load(paste0("stripe_res_scenairo_",i,"_rep_",j,"_count_location.RData"))

if(k ==1){

stripesimu = CreateSpatialPCAObject(counts=count_spot, location=location_spot, project = "SpatialPCA",gene.type="spatial",sparkversion="sparkx", gene.number=3000,customGenelist=NULL,min.loctions = 20, min.features=20)
stripesimu = SpatialPCA_buildKernel(stripesimu, kerneltype="gaussian", bandwidthtype="Silverman")
stripesimu = SpatialPCA_EstimateLoading(stripesimu,fast=TRUE,SpatialPCnum=20)
stripesimu = SpatialPCA_SpatialPCs(stripesimu, fast=TRUE)

SpatialPCA_result = list()
SpatialPCA_result$SpatialPCs  = stripesimu@SpatialPCs
SpatialPCA_result$location = stripesimu@location
SpatialPCA_result$clusterlabel = louvain_clustering(2,SpatialPCA_result$SpatialPCs,100) # original:500
SpatialPCA_result$celltypes = paste0("celltype",celltypes)
SpatialPCA_result$Truth = truth[match(rownames(stripesimu@location),rownames(location_spot))]
SpatialPCA_result$ARI = adjustedRandIndex(SpatialPCA_result$clusterlabel,SpatialPCA_result$Truth)
SpatialPCA_result$NMI = NMI(as.character(SpatialPCA_result$clusterlabel),as.character(SpatialPCA_result$Truth))
SpatialPCA_result$CHAOS = fx_CHAOS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
SpatialPCA_result$PAS = fx_PAS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
SpatialPCA_result$LISI = fx_lisi(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
SpatialPCA_result$count_spot = count_spot
SpatialPCA_result$location_spot = location_spot
SpatialPCA_result$normalized_expr = stripesimu@normalized_expr

clusterNum=length(unique(SpatialPCA_result$clusterlabel))

save(SpatialPCA_result, file = paste0("stripe_pattern_SpatialPCA_result_scenario_",i,"_rep_",j,".RData"))
	
	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q2/figure/SpatialPCA_scenario_",i,"_rep_",j,".pdf"),width=20,height=5)
	p1=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("SpatialPCA"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()

}else if (k==2){ # BayesSpace


	load(paste0("stripe_pattern_SpatialPCA_result_scenario_",i,"_rep_",j,".RData"))
	count_in = SpatialPCA_result$count_spot
	loc_in=SpatialPCA_result$location_spot
	clusterNum = length(unique(SpatialPCA_result$Truth))
	res = BayesSpace_func(count_in, loc_in, clusternum=clusterNum,nrep=50000,ifLIBD=FALSE)
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res$clusterlabel))

	save(res, file = paste0("stripe_pattern_BayesSpace_result_scenario_",i,"_rep_",j,".RData"))

	BayesSpace_result = list()
	BayesSpace_result$clusterlabel = as.character(res$clusterlabel)
	BayesSpace_result$location = SpatialPCA_result$location
	BayesSpace_result$Truth = SpatialPCA_result$Truth
	BayesSpace_result$ARI = adjustedRandIndex(tabb[,1], tabb[,2])
	BayesSpace_result$NMI = NMI(tabb[,1],tabb[,2])
	BayesSpace_result$CHAOS = fx_CHAOS(res$clusterlabel, SpatialPCA_result$location)
	BayesSpace_result$PAS = fx_PAS(res$clusterlabel, SpatialPCA_result$location)
	BayesSpace_result$LISI = fx_lisi(res$clusterlabel, SpatialPCA_result$location)
	BayesSpace_result$count_spot = SpatialPCA_result$count_spot
	BayesSpace_result$location_spot = SpatialPCA_result$location_spot
	BayesSpace_result$res = res

	save(BayesSpace_result, file = paste0("stripe_pattern_BayesSpace_result_scenario_",i,"_rep_",j,".RData"))

	clusterNum=length(unique(BayesSpace_result$clusterlabel))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q2/figure/BayesSpace_scenario_",i,"_rep_",j,".pdf"),width=20,height=5)
	p1=plot_cluster(location=BayesSpace_result$location,clusterlabel=BayesSpace_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=BayesSpace_result$location,clusterlabel=BayesSpace_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("BayesSpace"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()



}else if(k==3){ # SpaGCN

	library(reticulate)
	library(tidyverse)
	use_python("/net/mulan/home/shanglu/anaconda3/envs/SpaGCN/bin/python3.7", required=TRUE)
	source_python("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/code_utility_python.py")

	load(paste0("stripe_pattern_SpatialPCA_result_scenario_",i,"_rep_",j,".RData"))
	count_in = SpatialPCA_result$count_spot
	loc_in=as.data.frame( SpatialPCA_result$location_spot)
	clusterNum = length(unique(SpatialPCA_result$Truth))
	colnames(loc_in)=c("x","y") # important
	res = run_SpaGCN_py(t(count_in), loc_in , clusterNum) # cell by gene 
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res$refined_pred))

	SpaGCN_result = list()
	SpaGCN_result$clusterlabel = res$refined_pred
	SpaGCN_result$location = res[,c("x","y")]
	SpaGCN_result$Truth = SpatialPCA_result$Truth
	SpaGCN_result$ARI = adjustedRandIndex(tabb[,1], tabb[,2])
	SpaGCN_result$NMI = NMI(tabb[,1],tabb[,2])
	SpaGCN_result$CHAOS = fx_CHAOS(res$refined_pred, SpaGCN_result$location)
	SpaGCN_result$PAS = fx_PAS(res$refined_pred, SpaGCN_result$location)
	SpaGCN_result$LISI = fx_lisi(res$refined_pred, SpaGCN_result$location)
	SpaGCN_result$count_spot = SpatialPCA_result$count_spot
	SpaGCN_result$location_spot = SpatialPCA_result$location_spot

	save(SpaGCN_result, file = paste0("stripe_pattern_SpaGCN_result_scenario_",i,"_rep_",j,".RData"))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q2/figure/SpaGCN_scenario_",i,"_rep_",j,".pdf"),width=20,height=5)
	p1=plot_cluster(location=SpaGCN_result$location,clusterlabel=SpaGCN_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=SpaGCN_result$location,clusterlabel=SpaGCN_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("SpaGCN"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()




}else if(k==4){ # HMRF
	
	library(reticulate)
	library(tidyverse)
	library(peakRAM)

	load(paste0("stripe_pattern_SpatialPCA_result_scenario_",i,"_rep_",j,".RData"))
	clusterNum = length(unique(na.omit(SpatialPCA_result$Truth)))
	loc_in=as.data.frame( SpatialPCA_result$location_spot)
	count_in =  SpatialPCA_result$count_spot

	path_out = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q2/HMRF/scenario_",i,"_rep_",j)
	res = HMRF_func(count_in=t(count_in), location_in=loc_in, clusternum=clusterNum,path_out=path_out,betas = c(0,2,6)) # betas = [start, step, number]
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res[,6]))

	HMRF_result = list()
	HMRF_result$clusterlabel = as.character(res[,6])
	HMRF_result$location = SpatialPCA_result$location
	HMRF_result$Truth = SpatialPCA_result$Truth
	HMRF_result$ARI = adjustedRandIndex(tabb[,1], tabb[,2])
	HMRF_result$NMI = NMI(tabb[,1],tabb[,2])
	HMRF_result$CHAOS = fx_CHAOS(HMRF_result$clusterlabel, HMRF_result$location)
	HMRF_result$PAS = fx_PAS(HMRF_result$clusterlabel, HMRF_result$location)
	HMRF_result$LISI = fx_lisi(HMRF_result$clusterlabel, HMRF_result$location)
	HMRF_result$res = res

	save(HMRF_result, file = paste0("stripe_pattern_HMRF_result_scenario_",i,"_rep_",j,".RData"))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q2/figure/HMRF_scenario_",i,"_rep_",j,".pdf"),width=20,height=5)
	p1=plot_cluster(location=HMRF_result$location,clusterlabel=HMRF_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=HMRF_result$location,clusterlabel=HMRF_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("HMRF"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()



}else if(k==5){ # NMF

	library(peakRAM)

	load(paste0("stripe_pattern_SpatialPCA_result_scenario_",i,"_rep_",j,".RData"))
	clusterNum = length(unique(na.omit(SpatialPCA_result$Truth)))
	loc_in=as.data.frame( SpatialPCA_result$location_spot)
	count_in =  SpatialPCA_result$count_spot

	res = NMF_cluster_func(count_in=count_in, genelist=rownames(count_in),PCnum=20,cluster_method="louvain",clusternum=clusterNum)
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res$clusterlabel))

	NMF_result = list()
	NMF_result$clusterlabel = res$clusterlabel
	NMF_result$location = SpatialPCA_result$location
	NMF_result$Truth = SpatialPCA_result$Truth
	NMF_result$ARI =  adjustedRandIndex(tabb[,1], tabb[,2])
	NMF_result$NMI = NMI(tabb[,1],tabb[,2])
	NMF_result$CHAOS = fx_CHAOS(NMF_result$clusterlabel, SpatialPCA_result$location)
	NMF_result$PAS = fx_PAS(NMF_result$clusterlabel, SpatialPCA_result$location)
	NMF_result$LISI = fx_lisi(NMF_result$clusterlabel, SpatialPCA_result$location)
	NMF_result$PCs = res$PC

save(NMF_result, file = paste0("stripe_pattern_NMF_result_scenario_",i,"_rep_",j,".RData"))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q2/figure/NMF_scenario_",i,"_rep_",j,".pdf"),width=20,height=5)
	p1=plot_cluster(location=NMF_result$location,clusterlabel=NMF_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=NMF_result$location,clusterlabel=NMF_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("NMF"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()



}else if(k==6){ # PCA

	
	load(paste0("stripe_pattern_SpatialPCA_result_scenario_",i,"_rep_",j,".RData"))
	clusterNum = length(unique(na.omit(SpatialPCA_result$Truth)))
	match_id = match(colnames(SpatialPCA_result$normalized_expr),colnames(SpatialPCA_result$count_spot))
	loc_in = as.data.frame(SpatialPCA_result$location_spot[match_id,])
	count_in = as.matrix(SpatialPCA_result$count_spot[,match_id])

	res = PCA_cluster_func(expr=SpatialPCA_result$normalized_expr,PCnum=20,cluster_method="louvain",clusternum=clusterNum)
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res$clusterlabel))

	PCA_result = list()
	PCA_result$clusterlabel = res$clusterlabel
	PCA_result$location = SpatialPCA_result$location
	PCA_result$Truth = SpatialPCA_result$Truth
	PCA_result$ARI =  adjustedRandIndex(tabb[,1], tabb[,2])
	PCA_result$NMI = NMI(tabb[,1],tabb[,2])
	PCA_result$CHAOS = fx_CHAOS(PCA_result$clusterlabel, PCA_result$location)
	PCA_result$PAS = fx_PAS(PCA_result$clusterlabel, PCA_result$location)
	PCA_result$LISI = fx_lisi(PCA_result$clusterlabel, PCA_result$location)
	PCA_result$PCs = res$PC
	PCA_result$count_spot = SpatialPCA_result$count_spot
	PCA_result$location_spot = SpatialPCA_result$location_spot


	save(PCA_result, file = paste0("stripe_pattern_PCA_result_scenario_",i,"_rep_",j,".RData"))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q2/figure/PCA_scenario_",i,"_rep_",j,".pdf"),width=20,height=5)
	p1=plot_cluster(location=PCA_result$location,clusterlabel=PCA_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=PCA_result$location,clusterlabel=PCA_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("PCA"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()


}

Simulate data, spot level with arbitrary spatial correlation

args <- as.numeric(commandArgs(TRUE))
i = args[1] # scenario
j = args[2] # repeatment
k = args[3] # method
num = args[4] # spot level dataset
print(i)
print(j)
print(k)
print(num)

if(num == 1){
	spotnum=5077
}else if(num==2){
	spotnum=3602
}else{
	spotnum=1948
}

# cd /net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3

#--------------------------------------------------------------------


setwd("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3/July29")



library(tidyverse)
library(Giotto)
library(scater)
library(SC3)
library(BayesSpace)
library(gtools)
library(splatter)
library(igraph)
library(assertthat)
library(SPARK)
library(Seurat)
library(peakRAM)
library(ggplot2)
library(ggpubr)
library(mclust) # ARI
library(aricode)# NMI
library(SpatialPCA)# NMI
library(lisi) # lisi score
library(reticulate) 
library(FNN)

source("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/code_utility.R")
load("/net/mulan/disk2/shanglu/Projects/SpatialPCA/reviewer/simulation/LIBDsimu/LIBDsubsample.RData")
load("/net/mulan/disk2/shanglu/Projects/SpatialPCA/reviewer/simulation/LIBDsimu/init_params_LIBD.RData")

load(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/reviewer/simulation/LIBDsimu/results_5077_spark/LIBD_res_scenairo_",i,"_rep_",j,".RData"))


D3=c("#f4f1de","#e07a5f","#3d405b","#81b29a","#f2cc8f","#a8dadc","#f1faee","#f08080",
	"#F94144", "#dda15e", "#F8961E" ,"#bc6c25", "#F9C74F" ,"#90BE6D", "#43AA8B")


# subspot level
count_mat = res[[1]]
truth = subsample$label
location=as.matrix(subsample[,2:3])
knearest = 4
indNN = FNN::get.knn(location,k=knearest)[[1]] 
# randomly select 2500 cells to fuse with its nearest 4 cells
set.seed(j+07132022)
tofuse_cells = sample(1:10000,2500)
count_mat_use = count_mat
for(cell_id in tofuse_cells){
	set.seed(cell_id+07292022)
	sample_tmp = sample(1:10,knearest)
	prop = matrix(sample_tmp/sum(sample_tmp),1,knearest) 
	# for each nearest cell, randomly split the current cell to 4 components, and fuse to the 4 nearest cells
	count_mat_use[,indNN[cell_id,]] = count_mat[,indNN[cell_id,]]+ count_mat[,cell_id] %*% prop
	count_mat_use[,cell_id]=0
}
count_mat= count_mat_use
location=as.matrix(location)
celltypes = res[[2]]
# first generate subspot level data
if(spotnum == 5077){
	grid_subspot = make_grid(square_size = 4,location)
}else if(spotnum == 3602){
	grid_subspot = make_grid(square_size = 5,location)
}else if(spotnum == 1948){
	grid_subspot = make_grid(square_size = 7,location)
}
count_location_subspot = make_spot(grid_subspot,count_mat,celltypes,subsample$label)
count_subspot = count_location_subspot$count_spot
location_subspot = count_location_subspot$pseudo_location_spot/3
truth_subspot = count_location_subspot$subspottruth
spot_level_data = make_spot_from_subspot(count_location_subspot)
# truth_subspot[spot_level_data$used_subspots]
# spot level
truth=spot_level_data$truth_spot
truth_empty = which(truth=="empty")
count_spot = spot_level_data$count_spot[,-truth_empty]
location_spot = spot_level_data$location_spot[-truth_empty,]
truth=truth[-truth_empty]
rownames(location_spot) = colnames(count_spot) = paste0("spot",1:dim(count_spot)[2])
rownames(count_spot) = paste0("gene",1:dim(count_spot)[1])

save(count_spot,location_spot, truth,celltypes,file=paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3/July29/fuse_res_scenairo_",i,"_rep_",j,"_dataset_",num,"_count_location.RData"))



load(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3/July29/fuse_res_scenairo_",i,"_rep_",j,"_dataset_",num,"_count_location.RData"))


if(k ==1){

	if(num==1){

		fusesimu = CreateSpatialPCAObject(counts=count_spot, location=location_spot, project = "SpatialPCA",gene.type="spatial",sparkversion="spark", gene.number=3000,customGenelist=NULL,min.loctions = 20, min.features=20)
		fusesimu = SpatialPCA_buildKernel(fusesimu, kerneltype="gaussian", bandwidthtype="Silverman")
		fusesimu = SpatialPCA_EstimateLoading(fusesimu,fast=FALSE,SpatialPCnum=20)
		fusesimu = SpatialPCA_SpatialPCs(fusesimu, fast=FALSE)
		SpatialPCA_result = list()
		SpatialPCA_result$SpatialPCs  = fusesimu@SpatialPCs
		SpatialPCA_result$location = fusesimu@location
		SpatialPCA_result$celltypes = paste0("celltype",celltypes)
		SpatialPCA_result$Truth = truth[match(rownames(fusesimu@location),rownames(location_spot))]
		SpatialPCA_result$clusterlabel = louvain_clustering(4,SpatialPCA_result$SpatialPCs,500) # original:500
		SpatialPCA_result$clusterlabel_refine = refine_cluster_10x(SpatialPCA_result$clusterlabel, SpatialPCA_result$location,shape="square") 
		SpatialPCA_result$ARI = adjustedRandIndex(SpatialPCA_result$clusterlabel,SpatialPCA_result$Truth)
		SpatialPCA_result$ARI_refine = adjustedRandIndex(SpatialPCA_result$clusterlabel_refine,SpatialPCA_result$Truth)
		SpatialPCA_result$NMI = NMI(as.character(SpatialPCA_result$clusterlabel),as.character(SpatialPCA_result$Truth))
		SpatialPCA_result$CHAOS = fx_CHAOS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$PAS = fx_PAS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$LISI = fx_lisi(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$count_spot = count_spot
		SpatialPCA_result$location_spot = location_spot
		SpatialPCA_result$normalized_expr = fusesimu@normalized_expr
		clusterNum=length(unique(SpatialPCA_result$clusterlabel))
		save(SpatialPCA_result, file = paste0("fused_cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

		#load(paste0("fused_cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
		clusterNum=length(unique(SpatialPCA_result$clusterlabel))
		pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3/July29/figure/SpatialPCA_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
		p1=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
		p2=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("SpatialPCA"),color_in=D3[1:clusterNum])
		print(ggarrange(p1, p2, ncol = 2, nrow = 1))
		dev.off()

	}else if (num==2){

		fusesimu = CreateSpatialPCAObject(counts=count_spot, location=location_spot, project = "SpatialPCA",gene.type="spatial",sparkversion="spark", gene.number=3000,customGenelist=NULL,min.loctions = 20, min.features=20)
		fusesimu = SpatialPCA_buildKernel(fusesimu, kerneltype="gaussian", bandwidthtype="Silverman")
		fusesimu = SpatialPCA_EstimateLoading(fusesimu,fast=FALSE,SpatialPCnum=20)
		fusesimu = SpatialPCA_SpatialPCs(fusesimu, fast=FALSE)
		SpatialPCA_result = list()
		SpatialPCA_result$SpatialPCs  = fusesimu@SpatialPCs
		SpatialPCA_result$location = fusesimu@location
		SpatialPCA_result$celltypes = paste0("celltype",celltypes)
		SpatialPCA_result$Truth = truth[match(rownames(fusesimu@location),rownames(location_spot))]
		SpatialPCA_result$clusterlabel = louvain_clustering(4,SpatialPCA_result$SpatialPCs,500) # original:500
		SpatialPCA_result$clusterlabel_refine = refine_cluster_10x(SpatialPCA_result$clusterlabel, SpatialPCA_result$location,shape="square") 
		SpatialPCA_result$ARI = adjustedRandIndex(SpatialPCA_result$clusterlabel,SpatialPCA_result$Truth)
		SpatialPCA_result$ARI_refine = adjustedRandIndex(SpatialPCA_result$clusterlabel_refine,SpatialPCA_result$Truth)
		SpatialPCA_result$NMI = NMI(as.character(SpatialPCA_result$clusterlabel),as.character(SpatialPCA_result$Truth))
		SpatialPCA_result$CHAOS = fx_CHAOS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$PAS = fx_PAS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$LISI = fx_lisi(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$count_spot = count_spot
		SpatialPCA_result$location_spot = location_spot
		SpatialPCA_result$normalized_expr = fusesimu@normalized_expr
		clusterNum=length(unique(SpatialPCA_result$clusterlabel))
		save(SpatialPCA_result, file = paste0("fused_cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	#load(paste0("fused_cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	clusterNum=length(unique(SpatialPCA_result$clusterlabel))
		pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3/July29/figure/SpatialPCA_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
		p1=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
		p2=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("SpatialPCA"),color_in=D3[1:clusterNum])
		print(ggarrange(p1, p2, ncol = 2, nrow = 1))
		dev.off()

	}else if (num==3){

		fusesimu = CreateSpatialPCAObject(counts=count_spot, location=location_spot, project = "SpatialPCA",gene.type="spatial",sparkversion="spark", gene.number=3000,customGenelist=NULL,min.loctions = 20, min.features=20)
		fusesimu = SpatialPCA_buildKernel(fusesimu, kerneltype="gaussian", bandwidthtype="Silverman")
		fusesimu = SpatialPCA_EstimateLoading(fusesimu,fast=FALSE,SpatialPCnum=20)
		fusesimu = SpatialPCA_SpatialPCs(fusesimu, fast=FALSE)
		SpatialPCA_result = list()
		SpatialPCA_result$SpatialPCs  = fusesimu@SpatialPCs
		SpatialPCA_result$location = fusesimu@location
		SpatialPCA_result$celltypes = paste0("celltype",celltypes)
		SpatialPCA_result$Truth = truth[match(rownames(fusesimu@location),rownames(location_spot))]
		SpatialPCA_result$clusterlabel = walktrap_clustering(4,SpatialPCA_result$SpatialPCs,100) 
		SpatialPCA_result$clusterlabel_refine = refine_cluster_10x(SpatialPCA_result$clusterlabel, SpatialPCA_result$location,shape="square") 
		SpatialPCA_result$ARI = adjustedRandIndex(SpatialPCA_result$clusterlabel,SpatialPCA_result$Truth)
		SpatialPCA_result$ARI_refine = adjustedRandIndex(SpatialPCA_result$clusterlabel_refine,SpatialPCA_result$Truth)
		SpatialPCA_result$NMI = NMI(as.character(SpatialPCA_result$clusterlabel),as.character(SpatialPCA_result$Truth))
		SpatialPCA_result$CHAOS = fx_CHAOS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$PAS = fx_PAS(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$LISI = fx_lisi(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
		SpatialPCA_result$count_spot = count_spot
		SpatialPCA_result$location_spot = location_spot
		SpatialPCA_result$normalized_expr = fusesimu@normalized_expr
		clusterNum=length(unique(SpatialPCA_result$clusterlabel))
		save(SpatialPCA_result, file = paste0("fused_cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	#load(paste0("fused_cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	clusterNum=length(unique(SpatialPCA_result$clusterlabel))
		pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3/July29/figure/SpatialPCA_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
		p1=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
		p2=plot_cluster(location=SpatialPCA_result$location,clusterlabel=SpatialPCA_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("SpatialPCA"),color_in=D3[1:clusterNum])
		print(ggarrange(p1, p2, ncol = 2, nrow = 1))
		dev.off()

	}



}else if (k==2){ # BayesSpace

	load(paste0("fused_cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	match_id=match(colnames(SpatialPCA_result$normalized_expr), colnames(SpatialPCA_result$count_spot))
	count_in = SpatialPCA_result$count_spot[,match_id]
	loc_in=as.data.frame( SpatialPCA_result$location_spot[match_id,])
	clusterNum = length(unique(SpatialPCA_result$Truth))
	colnames(loc_in)=c("x","y") # important

	res = BayesSpace_func(count_in, loc_in, clusternum=clusterNum,nrep=50000,ifLIBD=FALSE,platform="ST")
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res$clusterlabel))

	save(res, file = paste0("fused_cells_BayesSpace_result_scenario_",i,"_rep_",j,".RData"))

	BayesSpace_result = list()
	BayesSpace_result$clusterlabel = as.character(res$clusterlabel)
	BayesSpace_result$location = SpatialPCA_result$location
	BayesSpace_result$Truth = SpatialPCA_result$Truth
	BayesSpace_result$ARI = adjustedRandIndex(tabb[,1], tabb[,2])
	BayesSpace_result$NMI = NMI(tabb[,1],tabb[,2])
	BayesSpace_result$CHAOS = fx_CHAOS(res$clusterlabel, SpatialPCA_result$location)
	BayesSpace_result$PAS = fx_PAS(res$clusterlabel, SpatialPCA_result$location)
	BayesSpace_result$LISI = fx_lisi(res$clusterlabel, SpatialPCA_result$location)
	BayesSpace_result$count_spot = SpatialPCA_result$count_spot
	BayesSpace_result$location_spot = SpatialPCA_result$location_spot
	BayesSpace_result$res = res

	save(BayesSpace_result, file = paste0("fused_cells_BayesSpace_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	clusterNum=length(unique(BayesSpace_result$clusterlabel))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3/July29/figure/BayesSpace_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
	p1=plot_cluster(location=BayesSpace_result$location,clusterlabel=BayesSpace_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=BayesSpace_result$location,clusterlabel=BayesSpace_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("BayesSpace"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()



}else if(k==3){ # SpaGCN

	library(reticulate)
	library(tidyverse)
	use_python("/net/mulan/home/shanglu/anaconda3/envs/SpaGCN/bin/python3.7", required=TRUE)
	source_python("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/code_utility_python.py")

	load(paste0("fused_cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	match_id=match(colnames(SpatialPCA_result$normalized_expr), colnames(SpatialPCA_result$count_spot))
	count_in = SpatialPCA_result$count_spot[,match_id]
	loc_in=as.data.frame( SpatialPCA_result$location_spot[match_id,])
	clusterNum = length(unique(SpatialPCA_result$Truth))
	colnames(loc_in)=c("x","y") # important
	res = run_SpaGCN_py(t(count_in), loc_in , clusterNum) # cell by gene 
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res$refined_pred))

	SpaGCN_result = list()
	SpaGCN_result$clusterlabel = res$refined_pred
	SpaGCN_result$location = res[,c("x","y")]
	SpaGCN_result$Truth = SpatialPCA_result$Truth
	SpaGCN_result$ARI = adjustedRandIndex(tabb[,1], tabb[,2])
	SpaGCN_result$NMI = NMI(tabb[,1],tabb[,2])
	SpaGCN_result$CHAOS = fx_CHAOS(res$refined_pred, SpaGCN_result$location)
	SpaGCN_result$PAS = fx_PAS(res$refined_pred, SpaGCN_result$location)
	SpaGCN_result$LISI = fx_lisi(res$refined_pred, SpaGCN_result$location)
	SpaGCN_result$count_spot = SpatialPCA_result$count_spot
	SpaGCN_result$location_spot = SpatialPCA_result$location_spot

	save(SpaGCN_result, file = paste0("fused_cells_SpaGCN_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3/July29/figure/SpaGCN_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
	p1=plot_cluster(location=SpaGCN_result$location,clusterlabel=SpaGCN_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=SpaGCN_result$location,clusterlabel=SpaGCN_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("SpaGCN"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()


}else if(k==4){ # HMRF
	
	library(reticulate)
	library(tidyverse)
	library(peakRAM)	
	load(paste0("fused_cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	match_id=match(colnames(SpatialPCA_result$normalized_expr), colnames(SpatialPCA_result$count_spot))
	count_in = SpatialPCA_result$count_spot[,match_id]
	loc_in=as.data.frame( SpatialPCA_result$location_spot[match_id,])
	clusterNum = length(unique(SpatialPCA_result$Truth))
	colnames(loc_in)=c("x","y") # important

	path_out = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3/HMRF/scenario_",i,"_rep_",j,"_dataset_",num)
	res = HMRF_func(count_in=t(count_in), location_in=loc_in, clusternum=clusterNum,path_out=path_out,betas = c(0,2,6)) # betas = [start, step, number]
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res[,6]))

	HMRF_result = list()
	HMRF_result$clusterlabel = as.character(res[,6])
	HMRF_result$location = SpatialPCA_result$location
	HMRF_result$Truth = SpatialPCA_result$Truth
	HMRF_result$ARI = adjustedRandIndex(tabb[,1], tabb[,2])
	HMRF_result$NMI = NMI(tabb[,1],tabb[,2])
	HMRF_result$CHAOS = fx_CHAOS(HMRF_result$clusterlabel, HMRF_result$location)
	HMRF_result$PAS = fx_PAS(HMRF_result$clusterlabel, HMRF_result$location)
	HMRF_result$LISI = fx_lisi(HMRF_result$clusterlabel, HMRF_result$location)
	HMRF_result$res = res

	save(HMRF_result, file = paste0("fused_cells_HMRF_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3/July29/figure/HMRF_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
	p1=plot_cluster(location=HMRF_result$location,clusterlabel=HMRF_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=HMRF_result$location,clusterlabel=HMRF_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("HMRF"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()



}else if(k==5){ # NMF

	library(peakRAM)

	load(paste0("fused_cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	clusterNum = length(unique(na.omit(SpatialPCA_result$Truth)))
	match_id=match(colnames(SpatialPCA_result$normalized_expr), colnames(SpatialPCA_result$count_spot))
	count_in = SpatialPCA_result$count_spot[,match_id]
	loc_in=as.data.frame( SpatialPCA_result$location_spot[match_id,])

	res = NMF_cluster_func(count_in=count_in, genelist=rownames(count_in),PCnum=20,cluster_method="louvain",clusternum=clusterNum)
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res$clusterlabel))

	NMF_result = list()
	NMF_result$clusterlabel = res$clusterlabel
	NMF_result$location = SpatialPCA_result$location
	NMF_result$Truth = SpatialPCA_result$Truth
	NMF_result$ARI =  adjustedRandIndex(tabb[,1], tabb[,2])
	NMF_result$NMI = NMI(tabb[,1],tabb[,2])
	NMF_result$CHAOS = fx_CHAOS(NMF_result$clusterlabel, SpatialPCA_result$location)
	NMF_result$PAS = fx_PAS(NMF_result$clusterlabel, SpatialPCA_result$location)
	NMF_result$LISI = fx_lisi(NMF_result$clusterlabel, SpatialPCA_result$location)
	NMF_result$PCs = res$PC

	save(NMF_result, file = paste0("fused_cells_NMF_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3/July29/figure/NMF_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
	p1=plot_cluster(location=NMF_result$location,clusterlabel=NMF_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=NMF_result$location,clusterlabel=NMF_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("NMF"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()



}else if(k==6){ # PCA

	
	load(paste0("fused_cells_SpatialPCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))
	clusterNum = length(unique(na.omit(SpatialPCA_result$Truth)))
	match_id = match(colnames(SpatialPCA_result$normalized_expr),colnames(SpatialPCA_result$count_spot))
	loc_in = as.data.frame(SpatialPCA_result$location_spot[match_id,])
	count_in = as.matrix(SpatialPCA_result$count_spot[,match_id])

	res = PCA_cluster_func(expr=SpatialPCA_result$normalized_expr,PCnum=20,cluster_method="louvain",clusternum=clusterNum)
	tabb = na.omit(data.frame("Truth"=SpatialPCA_result$Truth,"clusterlabel"=res$clusterlabel))

	PCA_result = list()
	PCA_result$clusterlabel = res$clusterlabel
	PCA_result$location = SpatialPCA_result$location
	PCA_result$Truth = SpatialPCA_result$Truth
	PCA_result$ARI =  adjustedRandIndex(tabb[,1], tabb[,2])
	PCA_result$NMI = NMI(tabb[,1],tabb[,2])
	PCA_result$CHAOS = fx_CHAOS(PCA_result$clusterlabel, PCA_result$location)
	PCA_result$PAS = fx_PAS(PCA_result$clusterlabel, PCA_result$location)
	PCA_result$LISI = fx_lisi(PCA_result$clusterlabel, PCA_result$location)
	PCA_result$PCs = res$PC
	PCA_result$count_spot = SpatialPCA_result$count_spot
	PCA_result$location_spot = SpatialPCA_result$location_spot


	save(PCA_result, file = paste0("fused_cells_PCA_result_scenario_",i,"_rep_",j,"_dataset_",num,".RData"))

	pdf(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer2/Q3/July29/figure/PCA_scenario_",i,"_rep_",j,"_dataset_",num,".pdf"),width=10,height=5)
	p1=plot_cluster(location=PCA_result$location,clusterlabel=PCA_result$Truth,pointsize=2,text_size=20 ,title_in=paste0("Truth"),color_in=D3[1:clusterNum])
	p2=plot_cluster(location=PCA_result$location,clusterlabel=PCA_result$clusterlabel,pointsize=2,text_size=20 ,title_in=paste0("PCA"),color_in=D3[1:clusterNum])
	print(ggarrange(p1, p2, ncol = 2, nrow = 1))
	dev.off()


}

High resolution spatial map reconstruction

args <- as.numeric(commandArgs(TRUE))
i = args[1] # scenario
j = args[2] # repeat
print(i)

  library(tidyverse)
  library(Giotto)
  library(scater)
  library(Seurat)
  library(mclust)
  library(SC3)
  library(BayesSpace)
  library(gtools)
  library(splatter)
  library(reticulate)
  library(mclust )
  library(igraph)
  library(assertthat)
  library(SpatialPCA)
  library(ggplot2)
  
 source("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/code_utility.R")
load("/net/mulan/disk2/shanglu/Projects/SpatialPCA/reviewer/simulation/LIBDsimu/LIBDsubsample.RData")
load("/net/mulan/disk2/shanglu/Projects/SpatialPCA/reviewer/simulation/LIBDsimu/init_params_LIBD.RData")
load(paste0("LIBD_res_scenairo_",i,"_rep_",j,".RData"))


# subspot level
count_mat = res[[1]]
truth = subsample$label
location=as.matrix(subsample[,2:3])
# truth = LIBDsimu_pseudo$info$z
location=as.matrix(location)
celltypes = res[[2]]
# first generate subspot level data
grid_subspot = make_grid(square_size = 7,location)
count_location_subspot = make_spot(grid_subspot,count_mat,celltypes,subsample$label)
count_subspot = count_location_subspot$count_spot
location_subspot = count_location_subspot$pseudo_location_spot/3
truth_subspot = count_location_subspot$subspottruth
spot_level_data = make_spot_from_subspot(count_location_subspot)
truth_subspot[spot_level_data$used_subspots]
# spot level
truth=spot_level_data$truth_spot
truth_empty = which(truth=="empty")
count_spot = spot_level_data$count_spot[,-truth_empty]
location_spot = spot_level_data$location_spot[-truth_empty,]
truth=truth[-truth_empty]
rownames(location_spot) = colnames(count_spot) = paste0("spot",1:dim(count_spot)[2])
rownames(count_spot) = paste0("gene",1:dim(count_spot)[1])

dim(count_spot)

save(count_spot, location_spot, truth, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/Q3/simulation/prediction/data/Data_count_location_truth_scenario_",i,"_rep_",j,".RData"))


#----------------------------------  
# Run SpatialPCA
#----------------------------------  
print("run SpatialPCA")

LIBDsimu = CreateSpatialPCAObject(counts=count_spot, location=location_spot, project = "SpatialPCA",gene.type="spatial",sparkversion="spark", gene.number=3000,customGenelist=NULL,min.loctions = 20, min.features=20)
LIBDsimu = SpatialPCA_buildKernel(LIBDsimu, kerneltype="gaussian", bandwidthtype="SJ")
LIBDsimu = SpatialPCA_EstimateLoading(LIBDsimu,fast=FALSE,SpatialPCnum=20)
LIBDsimu = SpatialPCA_SpatialPCs(LIBDsimu, fast=FALSE)
SpatialPCA_result = list()
SpatialPCA_result$SpatialPCs  = LIBDsimu@SpatialPCs
SpatialPCA_result$normalized_expr  = LIBDsimu@normalized_expr
SpatialPCA_result$location = LIBDsimu@location
pred_cluster= walktrap_clustering_new(4,SpatialPCA_result$SpatialPCs,100 )
spotlist=rownames(SpatialPCA_result$location)
dist = as.matrix(dist(SpatialPCA_result$location))
pred_refine = refine_v2(spotlist, pred_cluster, dist,shape="square") 
SpatialPCA_result$pred_cluster = pred_cluster
SpatialPCA_result$clusterlabel = pred_refine
SpatialPCA_result$truth = truth[match(rownames(LIBDsimu@location),rownames(location_spot))]
SpatialPCA_result$ARI = adjustedRandIndex(SpatialPCA_result$clusterlabel,SpatialPCA_result$truth)
SpatialPCA_result$NMI = compare(as.factor(SpatialPCA_result$clusterlabel),as.factor(SpatialPCA_result$truth), method = "nmi")
SpatialPCA_result$CHAOS = fx_chaos(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
SpatialPCA_result$PAS = fx_PAEP(SpatialPCA_result$clusterlabel, SpatialPCA_result$location)
print(SpatialPCA_result$ARI )

save(SpatialPCA_result, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/Q3/simulation/prediction/result/spotlevel_SpatialPCA_spatialgene_result_scenario_",i,"_rep_",j,".RData"))

# load(paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/reviewer/simulation/LIBDsimu/results_1948/spotlevel_SpatialPCA_spatialgene_result_scenario_",i,"_rep_",j,".RData"))

#----------------------------------  
# run BayesSpace, ST, hvg genes
#----------------------------------  

print("run BayesSpace, ST, hvg genes")
# filter out spots with 0 counts
ind_keep=which(colSums(count_spot) > 0)
location_spot_bayesSpace=location_spot[ind_keep,]
count_spot_BayesSpace=count_spot[,ind_keep]
colnames(location_spot_bayesSpace) <- c("row", "col")

sce_LIBDsimu_ST <- SingleCellExperiment(assays = list(counts = count_spot_BayesSpace), colData = location_spot_bayesSpace)
sce_LIBDsimu_ST <- spatialPreprocess(sce_LIBDsimu_ST, platform="ST",n.PCs = 15, n.HVGs = 2000, log.normalize = T)
sce_LIBDsimu_ST <- spatialCluster(sce_LIBDsimu_ST, q=4, d=15, platform='ST',nrep=10000, gamma=3, save.chain=FALSE) 
sce_labels=sce_LIBDsimu_ST$spatial.cluster

save(sce_LIBDsimu_ST,  file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/Q3/simulation/prediction/result/BayesSpace_sce_LIBDsimu_ST_scenario_",i,"_rep_",j,".RData"))


#-------------------------------------
# run BayesSpace, high resolution, ST
#-------------------------------------

print("BayesSpace, high resolution, ST")
sce_LIBDsimu_ST_enhanced <- spatialEnhance(sce_LIBDsimu_ST, q=4, platform="ST", d=15,
                                    model="t", gamma=2,
                                    jitter_prior=0.3, jitter_scale=3.5,
                                    nrep=10000, 
                                    burn.in=1000,
                                    save.chain=FALSE)
sce_LIBDsimu_ST_enhanced=BayesSpace_high_ST_result$sce_LIBDsimu_ST_enhanced
sce_LIBDsimu_ST_enhanced_labels = sce_LIBDsimu_ST_enhanced$spatial.cluster
vertices_ST <- unique(.make_vertices(sce_LIBDsimu_ST_enhanced, sce_LIBDsimu_ST_enhanced_labels, "ST", is.enhanced="TRUE"))
vertices_loc = unique(vertices_ST[,c(1,2,4)])
vertices_location = vertices_loc[,1:2]
BayesSpace_high_ST_result = list()
BayesSpace_high_ST_result$sce_LIBDsimu_ST_enhanced = sce_LIBDsimu_ST_enhanced
BayesSpace_high_ST_result$sce_cluster = sce_LIBDsimu_ST_enhanced_labels
BayesSpace_high_ST_result$location = unique(data.frame("x"=vertices_location$y.pos,"y"=vertices_location$x.pos))
coordinate_allsubspot = c()
for(k in 1:dim(location_subspot)[1]){
  x = round(location_subspot[k,1],3)
  y = round(location_subspot[k,2],3)
  coordinate_allsubspot[k] = paste0(x,"_",y)
}
coordinate_BayesSpacesubspot = c()
for(k in 1:dim(BayesSpace_high_ST_result$location)[1]){
  x = round(BayesSpace_high_ST_result$location[k,1],3)
  y = round(BayesSpace_high_ST_result$location[k,2],3)
  coordinate_BayesSpacesubspot[k] = paste0(x,"_",y)
}
id_empty = which(BayesSpace_high_ST_result$truth=="empty")
ind_match_totalsubspots = match(coordinate_BayesSpacesubspot, coordinate_allsubspot)
BayesSpace_high_ST_result$truth =truth_subspot[ind_match_totalsubspots]
BayesSpace_high_ST_result$ARI = adjustedRandIndex(BayesSpace_high_ST_result$sce_cluster[-id_empty],BayesSpace_high_ST_result$truth[-id_empty])
BayesSpace_high_ST_result$NMI = compare(as.factor(BayesSpace_high_ST_result$sce_cluster[-id_empty]),as.factor(BayesSpace_high_ST_result$truth[-id_empty]), method = "nmi")
BayesSpace_high_ST_result$CHAOS = fx_chaos(BayesSpace_high_ST_result$sce_cluster[-id_empty], BayesSpace_high_ST_result$location[-id_empty,])
BayesSpace_high_ST_result$PAS = fx_PAEP(BayesSpace_high_ST_result$sce_cluster[-id_empty], BayesSpace_high_ST_result$location[-id_empty,])
print(BayesSpace_high_ST_result$ARI )

save(BayesSpace_high_ST_result, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/Q3/simulation/prediction/result/BayesSpace_sce_LIBDsimu_ST_enhanced_scenario_",i,"_rep_",j,".RData"))

#-------------------------------------- 
# run SpatialPCA, high resolution, ST
#-------------------------------------- 

print("run SpatialPCA, high resolution, ST")
newloc_ST = BayesSpace_high_ST_result$location[-id_empty,] # note, here x and y switched in BayesSpace vertex
LIBDsimu_high_ST = SpatialPCA_highresolution(LIBDsimu,newloc_ST)
expr_high = LIBDsimu@W %*% LIBDsimu_high_ST@highPCs
rownames(expr_high) = rownames(LIBDsimu@normalized_expr)
colnames(expr_high) = vertices_loc$spot[-id_empty]
SpatialPCA_high_ST_result = list()
SpatialPCA_high_ST_result$LIBDsimu_high_ST = LIBDsimu_high_ST
pred_cluster= louvain_clustering_new(4,LIBDsimu_high_ST@highPCs,1000 )
SpatialPCA_high_ST_result$clusterlabel = pred_cluster
SpatialPCA_high_ST_result$highPCs = LIBDsimu_high_ST@highPCs
SpatialPCA_high_ST_result$location = LIBDsimu_high_ST@highPos
SpatialPCA_high_ST_result$expr_high = expr_high
SpatialPCA_high_ST_result$truth =BayesSpace_high_ST_result$truth[-id_empty]
SpatialPCA_high_ST_result$ARI = adjustedRandIndex(SpatialPCA_high_ST_result$clusterlabel,SpatialPCA_high_ST_result$truth)
SpatialPCA_high_ST_result$NMI = compare(as.factor(SpatialPCA_high_ST_result$clusterlabel),as.factor(SpatialPCA_high_ST_result$truth), method = "nmi")
SpatialPCA_high_ST_result$CHAOS = fx_chaos(SpatialPCA_high_ST_result$clusterlabel, SpatialPCA_high_ST_result$location)
SpatialPCA_high_ST_result$PAS = fx_PAEP(SpatialPCA_high_ST_result$clusterlabel, SpatialPCA_high_ST_result$location)
print(SpatialPCA_high_ST_result$ARI)
save(SpatialPCA_high_ST_result,  file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/Q3/simulation/prediction/result/spotlevel_SpatialPCA_highST_result_scenario_",i,"_rep_",j,".RData"))

#-------------------------------------- 
# run BayesSpace, prediction gene expression
#-------------------------------------- 

print("run BayesSpace, prediction gene expression")
gene_input = rownames(SpatialPCA_result$normalized_expr)
sce_pred = function(index){
  sce_enhanced_each <- enhanceFeatures(sce_LIBDsimu_ST_enhanced, sce_LIBDsimu_ST,
                                model="xgboost",
                                feature_names=gene_input[index],
                                nrounds=0)
  logcounts = logcounts(sce_enhanced_each)
  return(logcounts[which(rownames(logcounts) %in% gene_input[index]),])
}
expr_high_BayesSpace = matrix(NA, dim(expr_high)[1], dim(expr_high)[2])
for(index in 1:length(gene_input)){
  print(index)
  pred_expr =c(sce_pred(index))
  expr_high_BayesSpace[index,] = pred_expr[-id_empty]
}
rownames(expr_high_BayesSpace) = gene_input

save(expr_high_BayesSpace, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/Q3/simulation/prediction/result/expr_high_BayesSpace_result_scenairo_",i,"_rep_",j,".RData"))

coordinate_allsubspot = c()
for(k in 1:dim(location_subspot)[1]){
  x = round(location_subspot[k,1],3)
  y = round(location_subspot[k,2],3)
  coordinate_allsubspot[k] = paste0(x,"_",y)
}
coordinate_BayesSpacesubspot = c()
for(k in 1:dim(BayesSpace_high_ST_result$location)[1]){
  x = round(BayesSpace_high_ST_result$location[k,1],3)
  y = round(BayesSpace_high_ST_result$location[k,2],3)
  coordinate_BayesSpacesubspot[k] = paste0(x,"_",y)
}
ind_match_totalsubspots = match(coordinate_BayesSpacesubspot, coordinate_allsubspot)
real_subspot_count = count_location_subspot$count_spot[,ind_match_totalsubspots]
real_subspot_count = real_subspot_count[,-id_empty]
rownames(real_subspot_count) = paste0("gene",1:5000)

real_subspot_count_sub = real_subspot_count[match(rownames(expr_high),rownames(real_subspot_count)),]
ind_remove = which(is.na(real_subspot_count_sub[1,]))
dat_result = list()
dat_result$real_subspot_count=real_subspot_count
dat_result$real_subspot_count_sub=real_subspot_count_sub[,-ind_remove]
dat_result$expr_high=expr_high[,-ind_remove]
dat_result$expr_high_BayesSpace=expr_high_BayesSpace[,-ind_remove]

save(dat_result, file = paste0("/net/mulan/disk2/shanglu/Projects/SpatialPCA/NC/Reviewer1/Q3/simulation/prediction/result/Compare_pred_expr_",i,"_rep_",j,".RData"))

Table of Contents

Prepare simulation datasets

Generate single cell data

Main functions in simulation

Simulate data, 10000 cells

Simulate data, spot level

Simulate data, spot level with stripe pattern

Simulate data, spot level with arbitrary spatial correlation

High resolution spatial map reconstruction