We have conducted benchmark of several SNV denoising tools designed for single-cell data, and found that CBM show superior performance (please refer to our paper for more details). Users should install and run CBM before running MAAS.

In this tutorial, CBM has been installed in ~/App/cbm-main.

library(dplyr)
library(data.table)
library(stringr)

input_file <- "snv.CBMinput.txt"

The raw SNV profile as input for CBM is a cell-by-mutation matrix (cells in rows and mutations in columns). The cell and mutation names will be recovered after executing CBM.

input <- read.table("http://bioinfo.szbl.ac.cn/share/MAAS_data/snv.CBMinput.txt", sep = '\t', header = T)
input[1:10,1:10]
#>    X1 X0 X0.1 X0.2 X0.3 X0.4 X0.5 X0.6 X0.7 X0.8
#> 1   0  0    1    0    0    0    0    0    0    0
#> 2   0  0    0    0    0    0    0    0    0    0
#> 3   0  0    0    0    0    0    0    0    0    0
#> 4   0  0    0    0    0    0    0    0    0    0
#> 5   0  0    0    0    0    0    0    0    0    0
#> 6   0  0    0    0    0    0    0    0    0    0
#> 7   0  0    0    0    0    0    0    0    0    0
#> 8   0  0    0    0    0    0    0    0    0    0
#> 9   0  0    0    0    0    0    0    0    0    0
#> 10  0  0    0    0    0    0    0    0    0    0
# sample.tmp <- gsub("\\.snv\\.CBMinput\\.txt", "", file.tmp)
output_file <- "./"
cmd <- paste("~/App/cbm-main/bin/cbm -i", input_file, "-o", output_file, "-K 10") # Set up to 10 clones
system(cmd)
pred.cluster.genotype <- as.data.frame(fread(".clones.txt"))
rownames(pred.cluster.genotype) <- 0:(nrow(pred.cluster.genotype)-1)
pred.cluster.genotype[,1:10]

The genotype of each cluster. Clusters in rows and mutations in columns

#>   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
#> 0  1  0  1  1  0  0  0  0  0   0
#> 1  0  0  0  0  0  0  0  0  0   0
#> 2  0  0  0  0  0  0  0  0  0   0
#> 3  0  0  0  0  0  0  0  0  0   0
#> 4  0  0  0  0  0  0  0  0  0   0
#> 5  0  0  0  0  0  0  0  0  0   0
#> 6  0  0  0  0  0  0  0  0  0   0
#> 7  0  0  0  0  0  0  0  0  1   1
#> 8  0  0  0  0  0  0  0  0  0   0
#> 9  1  0  1  1  0  0  0  0  0   1
pred.cluster.label <- fread(".assignment.txt")
pred.cluster.label <- as.data.frame(t(pred.cluster.label))
colnames(pred.cluster.label) <- "Cluster" # One-dimension vector: Cells in rows with assigned clusters
pred.cluster.label[1:10,,drop = F]
#>     Cluster
#> V1        0
#> V2        1
#> V3        2
#> V4        3
#> V5        4
#> V6        4
#> V7        3
#> V8        3
#> V9        4
#> V10       3

The cells from the same clone are assumed to be same. Hence, we assign cluster-level mutations to individual cells

pred <- do.call(rbind, lapply(pred.cluster.label$Cluster, function(cluster) {
  pred.cluster.genotype[as.character(cluster), ]
}))
pred[1:10,1:10]
#>                  chr1:634045:T>G chr16:46390534:G>A chr17:22521407:G>A
#> AAACTCGCATTCTTTG               1                  0                  1
#> AAACTCGTCCATTGTT               0                  0                  0
#> AAACTCGTCGTTGTTT               0                  0                  0
#> AAACTGCAGAGTCCGA               0                  0                  0
#> AAACTGCCAGGGAGTT               0                  0                  0
#> AAACTGCTCTACATCT               0                  0                  0
#> AAAGATGCAGGTGTCC               0                  0                  0
#> AAAGATGGTCCGTGCA               0                  0                  0
#> AAAGGATGTTCCGCGA               0                  0                  0
#> AAAGGGCAGACCAATA               0                  0                  0
#>                  chr17:22521415:T>C chr17:22521426:T>C chr22:10742083:C>T
#> AAACTCGCATTCTTTG                  1                  0                  0
#> AAACTCGTCCATTGTT                  0                  0                  0
#> AAACTCGTCGTTGTTT                  0                  0                  0
#> AAACTGCAGAGTCCGA                  0                  0                  0
#> AAACTGCCAGGGAGTT                  0                  0                  0
#> AAACTGCTCTACATCT                  0                  0                  0
#> AAAGATGCAGGTGTCC                  0                  0                  0
#> AAAGATGGTCCGTGCA                  0                  0                  0
#> AAAGGATGTTCCGCGA                  0                  0                  0
#> AAAGGGCAGACCAATA                  0                  0                  0
#>                  chr22:10742088:G>T chr3:93470405:A>T chr3:93470446:G>A
#> AAACTCGCATTCTTTG                  0                 0                 0
#> AAACTCGTCCATTGTT                  0                 0                 0
#> AAACTCGTCGTTGTTT                  0                 0                 0
#> AAACTGCAGAGTCCGA                  0                 0                 0
#> AAACTGCCAGGGAGTT                  0                 0                 0
#> AAACTGCTCTACATCT                  0                 0                 0
#> AAAGATGCAGGTGTCC                  0                 0                 0
#> AAAGATGGTCCGTGCA                  0                 0                 0
#> AAAGGATGTTCCGCGA                  0                 0                 0
#> AAAGGGCAGACCAATA                  0                 0                 0
#>                  chr3:93470451:C>T
#> AAACTCGCATTCTTTG                 0
#> AAACTCGTCCATTGTT                 0
#> AAACTCGTCGTTGTTT                 0
#> AAACTGCAGAGTCCGA                 0
#> AAACTGCCAGGGAGTT                 0
#> AAACTGCTCTACATCT                 0
#> AAAGATGCAGGTGTCC                 0
#> AAAGATGGTCCGTGCA                 0
#> AAAGGATGTTCCGCGA                 0
#> AAAGGGCAGACCAATA                 0
raw.snv <- readRDS("snv.mat.rds")
rownames(pred) <- colnames(raw.snv)
colnames(pred) <- rownames(raw.snv)
saveRDS(pred, "denoised.mat.rds")