--- title: 'Benchmarking SignacX and SingleR with synovial flow cytometry data' date: Compiled `r Sys.Date()` output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Benchmarking SignacX and SingleR with flow-sorted data} %\VignetteEngine{knitr::rmarkdown_notangle} %\VignetteEncoding{UTF-8} --- In Figure 3 of the [pre-print](https://www.biorxiv.org/content/10.1101/2021.02.01.429207v3.full), we validated Signac with flow cytometry and compared Signac to SingleR. We reproduced that analysis using Seurat in this vignette, and provide interactive access to the data [here](https://kleintools.hms.harvard.edu/tools/springViewer_1_6_dev.html?client_datasets/AMP_Phase1_RA_Apr2019/FullDataset_v1). We start with raw counts. ```{r setup0, include=FALSE} all_times <- list() # store the time for each chunk knitr::knit_hooks$set(time_it = local({ now <- NULL function(before, options) { if (before) { now <<- Sys.time() } else { res <- difftime(Sys.time(), now, units = "secs") all_times[[options$label]] <<- res } } })) knitr::opts_chunk$set( tidy = TRUE, tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, time_it = TRUE ) celltypes = readRDS(file = "fls/celltypes_amp_synovium.rds") True_labels = readRDS(file = "fls/celltypes_amp_synovium_true.rds") load(file = "fls/SingleR_Results.rda") ``` ## Load data Read the CEL-seq2 data. ```{r read CELSeq2, message = F, eval = F} ReadCelseq <- function (counts.file, meta.file) { E = suppressWarnings(readr::read_tsv(counts.file)); gns <- E$gene; E = E[,-1] E = Matrix::Matrix(as.matrix(E), sparse = TRUE) rownames(E) <- gns E } counts.file = "./fls/celseq_matrix_ru10_molecules.tsv.gz" meta.file = "./fls/celseq_meta.immport.723957.tsv" E = ReadCelseq(counts.file = counts.file, meta.file = meta.file) M = suppressWarnings(readr::read_tsv(meta.file)) # filter data based on depth and number of genes detected kmu = Matrix::colSums(E != 0) kmu2 = Matrix::colSums(E) E = E[,kmu > 200 & kmu2 > 500] # filter by mitochondrial percentage logik = grepl("^MT-", rownames(E)) MitoFrac = Matrix::colSums(E[logik,]) / Matrix::colSums(E) * 100 E = E[,MitoFrac < 20] ``` ## SingleR ```{r filter celseq, message = F, eval = F} require(SingleR) data("hpca") Q = SingleR(sc_data = E, ref_data = hpca$data, types = hpca$main_types, fine.tune = F, numCores = 4) save(file = "fls/SingleR_Results.rda", Q) True_labels = M$type[match(colnames(E), M$cell_name)] saveRDS(True_labels, file = "fls/celltypes_amp_synovium_true.rds") ``` ## Seurat Start with the standard pre-processing steps for a Seurat object. ```{r setupSeurat, message = F, eval = F} library(Seurat) ``` Create a Seurat object, and then perform SCTransform normalization. Note: * You can use the legacy functions here (i.e., NormalizeData, ScaleData, etc.), use SCTransform or any other normalization method (including no normalization). We did not notice a significant difference in cell type annotations with different normalization methods. * We think that it is best practice to use SCTransform, but it is not a necessary step. Signac will work fine without it. ```{r Seurat, message = T, eval = F} # load data synovium <- CreateSeuratObject(counts = E, project = "FACs") # run sctransform synovium <- SCTransform(synovium, verbose = F) ``` Perform dimensionality reduction by PCA and UMAP embedding. Note: * Signac actually needs these functions since it uses the nearest neighbor graph generated by Seurat. ```{r Seurat 2, message = T, eval = F} # These are now standard steps in the Seurat workflow for visualization and clustering synovium <- RunPCA(synovium, verbose = FALSE) synovium <- RunUMAP(synovium, dims = 1:30, verbose = FALSE) synovium <- FindNeighbors(synovium, dims = 1:30, verbose = FALSE) ``` ## SignacX ```{r setup2, message = F, eval = F} library(SignacX) ``` Generate Signac labels for the Seurat object. Note: * Optionally, you can do parallel computing by setting num.cores > 1 in the Signac function. * Run time is ~10 minutes for <100,000 cells. ```{r Signac, message = T, eval = F} # Run Signac labels <- Signac(synovium, num.cores = 4) celltypes = GenerateLabels(labels, E = synovium) ``` ## Compare SignacX and SingleR with FACs labels ```{r Seurat Visualization 1110, message = F, echo = F} xx = celltypes$CellTypes xx = as.character(xx) xx[xx == "Plasma.cells"] = "B" xx[xx == "NonImmune"] = as.character(celltypes$CellStates)[xx == "NonImmune"] xx[xx == "B"] = "B" xx[xx == "Fibroblasts"] = "F" xx[xx == "MPh"] = "M" xx[xx == "TNK"] = "T" xx[celltypes$CellStates == "NK"] = "NK" xy = xx True_labels[True_labels == "B cell"] = "B" True_labels[True_labels == "Fibroblast"] = "F" True_labels[True_labels == "Monocyte"] = "M" True_labels[True_labels == "T cell"] = "T" tabsignac = table(FACs = True_labels, Signac = xy) ``` SignacX (rows are FACs labels, columns are SignacX) ```{r Compare 22, echo = F, eval = T} knitr::kable(tabsignac, format = "html") ``` ```{r Seurat Visualization 10, message = F, echo = F} xx = Q$labels xx[xx %in% c("Macrophage", "DC", "Monocyte", "Platelets", "Neutrophils")] = "M" xx[xx %in% c("B_cell", "Pre-B_cell_CD34-", "Pro-B_cell_CD34+")] = "B" xx[xx == "Fibroblasts"] = "F" xx[xx == "T_cells"] = "T" xx[xx %in% c("Astrocyte", "Osteoblasts", "Tissue_stem_cells", "Smooth_muscle_cells", "MSC")] = "NonImmune" xx[xx == "NK_cell"] = "NK" xx[xx == "Chondrocytes"] = "Chondr." tab = table(FACs = True_labels, SingleR = xx) ``` SingleR (rows are FACs labels, columns are SingleR) ```{r Compare 223, echo = F, eval = T} knitr::kable(tab, format = "html") ``` Note: * These data were flow-sorted for CD3, and therefore lack NK cells. Signac detected no NK cells. * F = Fibroblasts; M = monocytes; T = T cells; B = B cells * Note that SingleR inaccurately annotated the majority of fibroblast cells as chondrocytes (Chondr.) and non immune cells, and also significantly identified NK cells (even though the data were flow-sorted for CD3, and thus lacked NK cells). Signac accuracy ```{r Seurat Visualization 11110, message = F, echo = T} logik = xy != "Unclassified" Signac_Accuracy = round(sum(xy[logik] == True_labels[logik]) / sum(logik) * 100, 2) Signac_Accuracy ``` SingleR accuracy ```{r Seurat Visualization 111110, message = F, echo = T} SingleR_Accuracy = round(sum(xx == True_labels) / sum(logik) * 100, 2) SingleR_Accuracy ``` Save results ```{r save results, message = F, eval = F} saveRDS(synovium, file = "synovium_signac.rds") saveRDS(celltypes, file = "synovium_signac_celltypes.rds") ``` ```{r save.times, include = FALSE, eval = F} write.csv(x = t(as.data.frame(all_times)), file = "fls/tutorial_times_signac-seurat_singler_AMP_RA.csv") ```
**Session Info** ```{r, echo=FALSE} sessionInfo() ```