-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalign_embeddings.R
91 lines (62 loc) · 3.57 KB
/
align_embeddings.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# ____________________________________________________________________________
# Script information ####
# title: Align data
# author: Jose Alquicira Hernandez
# date: 2021-10-30
# description: Align data (all pools) using harmony
# ____________________________________________________________________________
# HPC details ####
# screen -S align
# qrsh -N align -l mem_requested=300G -q long.q
# conda activate r-4.1.1
# ____________________________________________________________________________
# Import libraries ####
library("dsLib")
library("data.table")
library("Seurat")
library("SeuratDisk")
library("harmony")
library("ggplot2")
# ____________________________________________________________________________
# Set output ####
output <- set_output("2021-10-30", "aligned_data")
# ____________________________________________________________________________
# Import data ####
path <- "results/2021-10-30_combine_pools"
data <- LoadH5Seurat(here(path, "onek1k.h5Seurat"),
assays = list(RNA = "counts"))
# ____________________________________________________________________________
# Process data ####
data <- NormalizeData(data)
data <- FindVariableFeatures(data, nfeatures = 5000)
data <- ScaleData(data)
# ____________________________________________________________________________
# PCA ####
data <- RunPCA(data)
# ____________________________________________________________________________
# Align data ####
inicio("Align embeddings")
alignment <- HarmonyMatrix(Embeddings(data, reduction = "pca"), meta_data = data[[]],
vars_use = "pool", do_pca = FALSE,
max.iter.harmony = 30,
epsilon.cluster = -Inf,
epsilon.harmony = -Inf)
fin()
data[["harmony"]] <- CreateDimReducObject(alignment, key = "harmony_", assay = "RNA")
data <- RunUMAP(data, dims = 1:40, reduction = "harmony")
# ____________________________________________________________________________
# Plot data ####
p <- DimPlot(data, group.by = "predicted.celltype.l2", reduction = "umap",
label = TRUE, repel = TRUE, label.size = 3)
p2 <- DimPlot(data, group.by = "pool_number", reduction = "umap") + NoLegend()
p <- p + xlab("UMAP 1") + ylab("UMAP 2") + ggtitle("") + theme(legend.text = element_text(size = 7))
p2 <- p2 + xlab("UMAP 1") + ylab("UMAP 2") + ggtitle("")
ggsave(here(output, "umap_integrated.png"), p, width = 8.3, height = 5.5, dpi = "print")
ggsave(here(output, "umap_integrated_pool.png"), p2, width = 6.3, height = 5.5, dpi = "print")
# ____________________________________________________________________________
# Export data ####
SaveH5Seurat(data, filename = here(output, "integrated.h5seurat"))
saveRDS(data, file = here(output, "integrated.RDS"))
# ____________________________________________________________________________
# Session info ####
print_session(here(output))