Skip to content

Latest commit

 

History

History
159 lines (129 loc) · 5.85 KB

Notebook3_E18.5_and_fulltermP2.md

File metadata and controls

159 lines (129 loc) · 5.85 KB
title author date
Notebook3_E18.5_and_fullterm_P2
Laura Wolbeck
2023-02-23

In this notebook UMAP embedding, DEG and GSEA analysis will be performed for full-term samples E18.5 and P2. Moreover, this script includes code for all figures of full-term samples. For DEG and GSEA the package "cacoa" was used: https://github.com/kharchenkolab/cacoa/tree/main

Setup

#Load helper functions
source("scRNA_helper_functions_preterm.R")

#Load libraries
library(conos) #version 1.4.0
library(pagoda2)
library(magrittr)
library(ggplot2)
library(cacoa)
library(xlsx)
library(enrichplot)

1. Read in count matrices (cms)

Build cms from filtered cms from notebook 1

#load filtered cms
#extract fullterm samples
cms <- append(cms1[c(1, 2)], cms2[c(1,2)])
names = c("E18_5_1_",
           "E18_5_2_",
           "full_P2_1_",
           "full_P2_2_")

2. UMAP embedding and clustering

pre-processing of each dataset with pagoda2, followed by using conos to build the joint UMAP graph with forced alignment (alignment.strength = 0.2) to integrate samples well

con <- quickConos(cms,
                  names,
                  n.cores.p2=10,
                  n.cores.con=20, get.tsne = TRUE, alignment.strength=0.2)
con <- con$con

Additional clusters

Rerun leiden clustering to find additional clusters

con$findCommunities(method = leiden.community, resolution = 5, min.group.size = 15)
leiden30_full <- con$clusters$leiden$groups %>% factor

The final clusters were generated using a combination of manual selection and findSubcommunities() function of conos to split the clusters further. The clusters were annotated using known cell-type specific marker genes from the literature.

3. DEG analysis

Read in high resolution annotation file "anno"

Create condition factor for cacoa object

condition <- setNames(c("early", "early", "late", "late"), names(con$samples))

Create Cacoa object

cao <- Cacoa$new(con, cell.groups = anno, sample.groups=condition, n.cores = 10, ref.level = "early", target.level = "late")

estimate DE genes

cao$estimatePerCellTypeDE(n.cores=50) 

save DEG of RG as excel file

DEG <- cao$test.results$de %>% lapply("[[", "res") 
write.xlsx(x = DEG$RG, file= "DEG_fullterm_RG.xlsx")

4. GSEA

cao$estimateOntology(type="GSEA", org.db=org.Mm.eg.db)

save GSEA results of RG as excel file

cao$saveOntologyAsTable("fullterm_GSEA.tsv", name="GSEA")
GSEA <- read.table("fullterm_GSEA.tsv", sep="\t")
write.xlsx(x = GSEA[GSEA$V1=="RG",], file= "GSEA_fullterm_RG.xlsx")

5. Figures

Fig 1H upper plot

enrichplot::dotplot(cao$test.results$GSEA$res$RG$BP,showCategory=c("ribosome biogenesis" , "ribonucleoprotein complex biogenesis", "ribonucleoprotein complex subunit organization", "ribonucleoprotein complex assembly"))

Fig S5A

cao$estimateCellLoadings()
cao$plotCellLoadings(signif.threshold=0.05, show.pvals = F) + xlab("separation coefficient") 
ggsave("Compositionshifts_fullterm.pdf",width=6, height=4.5)

Fig S7A

#defining colours of high resolution clusters
colours_high <- c("#e1af32","#FFAF32","#3232FF","#32FF32","#FFFF00","#CDCD32","#FF9b9b","#EB8C32","#C8961E","#FF3232","#AF3232","#B49191","#694646","#820000","#965050","#550505","#C80F0F","#AF32FF","#E632E6","#FFC8FF","#DBFF00") %>% setNames(c("Immature ependymal cells-2","Immature ependymal cells-2","RG","Intermediate progenitor cells","Dividing cells-2","Dividing cells-1","Neuroblasts-1", "Dividing cells-4", "Dividing cells-3","Neuroblasts-2","Neurons 1-1","Neurons 1-2","Neurons 2","Neurons 3-3","Neurons 3-4","Neurons 3-1","Neurons 3-2","OPC","Microglia", "Endothelial/Pericytes/VSMC","Erythrocytes"))
con$plotGraph(groups=anno, embedding="90", label="") + scale_colour_manual(values=colours_high) +  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

Fig. S7B

#define words to exclude from colapsed GO terms
ex_words <- c('regulation', 'process', 'cell')
RG <- cao$plotOntologyHeatmapCollapsed(
  name="GSEA", genes="down", n=50, size.range=c(1, 4), subtype="BP", exclude.words=ex_words, clust.method="ward.D")

#filter for Radial glia
RG$data %<>% filter(G2=="RG", value > 0)

df <- RG$data
df %<>% arrange(value) %>% mutate(G1 = G1 %>% factor(., levels = .))

#define color legend
color.guide <- guide_colorbar(title = "-log10(p-value)", title.position = "left", 
        title.theme = element_text(angle = 90, hjust = 0.5))


ggplot(df, aes(x=G2, y = G1, fill = value)) + geom_tile( colour="white", size=0.2) +  scale_fill_distiller(direction = 1 , palette = "Blues") + guides(fill = color.guide) + labs(title="Top 30 down of RG fullterm", y = " ", x= " ") + theme_grey(base_size = 11) + theme(axis.ticks.x = element_blank(),  axis.text.x = element_blank())

Fig. S7C

#define words to exclude from colapsed GO terms
ex_words <- c('regulation', 'process', 'cell')
RG <- cao$plotOntologyHeatmapCollapsed(
  name="GSEA", genes="up", n=46, size.range=c(1, 4), subtype="BP", exclude.words=ex_words, clust.method="ward.D")

#filter for Radial glia
RG$data %<>% filter(G2=="RG", value > 0)

df <- RG$data
df %<>% arrange(value) %>% mutate(G1 = G1 %>% factor(., levels = .))

#define color legend
color.guide <- guide_colorbar(title = "-log10(p-value)", title.position = "left", 
        title.theme = element_text(angle = 90, hjust = 0.5))

ggplot(df, aes(x=G2, y = G1, fill = value)) + geom_tile( colour="white", size=0.2) +  scale_fill_distiller(direction = 1 , palette = "Reds") + guides(fill = color.guide) + labs(title="Top 30 up of RG fullterm", y = " ", x= " ") + theme_grey(base_size = 11) + theme(axis.ticks.x = element_blank(),  axis.text.x = element_blank())