# Clear working space and load libraries
rm(list = ls())
library(tidydr)
library(foreach)
library(doParallel)
library(dplyr)
library(parallelDist)
source("src/clusteringMetric.R")
my_color <- c("#5050FF", "#CE3D32", "#749B58", "#F0E685", "#00BFC4", "#B79F00", "#00BA38")

Determine the optimal clustering strategy

# Load MAAS results
maas.res <- readRDS("../7.MAAS_running/maas.res.all.rds")
barcode.list <- readRDS("../7.MAAS_running/share.barcode.rds")
set.seed(1)

Determine the reasonable clustering strategy

clusPerformance <- data.frame(matrix(nrow = length(maas.test)-1, ncol = 5),
                              row.names = paste0("dims=", 2:length(maas.test)))
colnames(clusPerformance) <- paste0("k=", 2:6)
for(i in 1:(length(maas.test)-1)){
  for(j in 2:6){
    df <- as.data.frame(maas.test[[i]]$W)
    maas.tmp.clu <- withr::with_seed(2, kmeans(df, centers = j)$cluster)
    clusPerformance[i,j-1] <- clusteringMetric(maas.test[[i]]$W, clu = maas.tmp.clu, disMethod = "cosine")
  }
}

# Re-running clustering with the optimal performance
df <- as.data.frame(maas.test[[1]]$W)
maas.clu <- data.frame(Cluster = withr::with_seed(2, kmeans(df, centers = 2)$cluster))
maas.clu$Cluster <- as.factor(maas.clu$Cluster)

## Visualization
umap.axis <- withr::with_seed(2, uwot::umap(df, n_neighbors = 10, metric = "correlation"))
umap.axis <- as.data.frame(umap.axis); umap.axis$Cluster <- maas.clu$Cluster
colnames(umap.axis) <- c("UMAP-1", "UMAP-2", "Cluster")
ggplot(umap.axis, aes(`UMAP-1`, `UMAP-2`))+
  geom_point(aes(color = Cluster), size = 1.75)+
  theme_dr()+
  labs(x = "UMAP-1", y = "UMAP-2")+
  theme(panel.grid = element_blank(),
        axis.title = element_text(size = 14),
        axis.text = element_blank(),
        legend.text = element_text(size = 12))