-
Notifications
You must be signed in to change notification settings - Fork 0
/
RunSingleR.R
42 lines (27 loc) · 1.37 KB
/
RunSingleR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
options(warn = -1)
suppressMessages(library(tidyverse))
suppressMessages(library(Matrix))
suppressMessages(library(SingleCellExperiment))
suppressMessages(library(SingleR))
setwd("/scratch/alper.eroglu/tools/ASTRID/")
args <- commandArgs(TRUE)
input_adata <- args[1]
output_csv <- args[2]
excludeSamples <- read.table(file = "data/excludeSingleR.csv", header = T, sep = ",")
pseudobulk_matrix <- read.csv(input_adata, row.names=1)
newRef <- readRDS("data/ASTRID_SingleR_Reference_20240701.Rds")
if(nrow(excludeSamples > 0)){
newRef <- newRef[,!(colnames(newRef) %in% excludeSamples$samples)]
}
newRefCounts <- assays(newRef)[["counts"]]
pseudobulk_matrix <- t(pseudobulk_matrix)
common_genes <- intersect(rownames(pseudobulk_matrix), rownames(newRefCounts))
pseudobulk_matrix <- pseudobulk_matrix[common_genes,]
pseudobulk_matrix <- sweep(pseudobulk_matrix,2,colSums(pseudobulk_matrix), `/`)
pseudobulk_matrix <- log10(pseudobulk_matrix *1e5 + 1)
colnames(pseudobulk_matrix) <- gsub("clustering\\_level\\_2\\.", replacement = "", colnames(pseudobulk_matrix))
newRefCounts <- newRefCounts[common_genes,]
newRefCounts <- sweep(newRefCounts,2,colSums(newRefCounts), `/`)
newRefCounts <- log10(newRefCounts *1e5 + 1)
predictions <- SingleR(pseudobulk_matrix, ref = newRefCounts, labels = newRef$cellTypeGRINT, de.n = 50)
write.table(predictions, file = output_csv, sep = ",", quote = F)