Skip to content

Commit

Permalink
Merge pull request #4 from timbitz/bugfix_2
Browse files Browse the repository at this point in the history
Bugfix 2
  • Loading branch information
mkabza committed Jan 25, 2024
2 parents 05fbc24 + 95c9ad0 commit d9e34cb
Show file tree
Hide file tree
Showing 19 changed files with 240 additions and 253 deletions.
12 changes: 10 additions & 2 deletions R/assign_intron_strand.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#' @param genome_fasta_file A string containing a genome FASTA file path.
#' @param min_intron_length An integer scalar specifying the minimal length
#' of introns to assign strand to.
#' @param max_intron_length An integer scalar specifying the maximum length
#' of introns to assign strand to.
#' @param known_intron_motifs A character vector specifying the known intron
#' motifs.
#' @param rescue_annotated_introns A logical scalar specifying if introns
Expand All @@ -20,6 +22,7 @@ assign_intron_strand <- function(intron_granges,
anno_data,
genome_fasta_file,
min_intron_length = 30,
max_intron_length = 5e6,
known_intron_motifs = c("GT-AG"),
rescue_annotated_introns = FALSE) {

Expand All @@ -31,6 +34,7 @@ assign_intron_strand <- function(intron_granges,
assertthat::assert_that(assertthat::is.string(genome_fasta_file))
assertthat::assert_that(file.exists(genome_fasta_file))
assertthat::assert_that(assertthat::is.count(min_intron_length))
assertthat::assert_that(assertthat::is.count(max_intron_length))
assertthat::assert_that(is.character(known_intron_motifs))
assertthat::assert_that(assertthat::is.flag(rescue_annotated_introns))

Expand Down Expand Up @@ -58,7 +62,9 @@ assign_intron_strand <- function(intron_granges,
intron_seq_plus <- BSgenome::getSeq(genome_seq, intron_granges_plus)
intron_lenght_plus <- BiocGenerics::width(intron_seq_plus)
intron_seq_plus[intron_lenght_plus < min_intron_length] <-
strrep("N", min_intron_length)
strrep("N", 4)
intron_seq_plus[intron_lenght_plus > max_intron_length] <-
strrep("N", 4)
intron_motif_plus <- glue::glue(
"{XVector::subseq(intron_seq_plus, start = 1, width = 2)}", "-",
"{XVector::subseq(intron_seq_plus, end = BiocGenerics::width(intron_seq_plus), width = 2)}"
Expand All @@ -70,7 +76,9 @@ assign_intron_strand <- function(intron_granges,
intron_seq_minus <- BSgenome::getSeq(genome_seq, intron_granges_minus)
intron_lenght_minus <- BiocGenerics::width(intron_seq_minus)
intron_seq_minus[intron_lenght_minus < min_intron_length] <-
strrep("N", min_intron_length)
strrep("N", 4)
intron_seq_minus[intron_lenght_minus > max_intron_length] <-
strrep("N", 4)
intron_motif_minus <- glue::glue(
"{XVector::subseq(intron_seq_minus, start = 1, width = 2)}", "-",
"{XVector::subseq(intron_seq_minus, end = BiocGenerics::width(intron_seq_minus), width = 2)}"
Expand Down
18 changes: 17 additions & 1 deletion R/bam_to_read_structures.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ bam_to_read_structures <- function(bam_files,
bam_file_con <- Rsamtools::BamFile(bam_file, yieldSize = chunk_size)
bam_param <- Rsamtools::ScanBamParam(
what = "qname",
flag = Rsamtools::scanBamFlag(isSupplementaryAlignment = FALSE)
flag = Rsamtools::scanBamFlag(
isSupplementaryAlignment = FALSE,
isSecondaryAlignment = FALSE,
isNotPassingQualityControls = FALSE,
isDuplicate = FALSE
)
)
open(bam_file_con)
repeat {
Expand Down Expand Up @@ -96,6 +101,17 @@ bam_to_read_structures <- function(bam_files,
}, BPPARAM = BPPARAM)
read_summary <- do.call(rbind, read_summary)

# Remove inter-chromosomal alignments
is_same_seq_name <- sapply(
strsplit(read_summary$intron_positions, ","),
function(intron_positions) {
seq_names <- sapply(strsplit(intron_positions, ":"), "[", 1)
same_seq_name <- length(unique(seq_names)) == 1
return(same_seq_name)
}
)
read_summary <- read_summary[is_same_seq_name,]

# Merge read structures from different BAM files
read_summary <- read_summary %>%
dplyr::group_by(.data$intron_positions) %>%
Expand Down
7 changes: 6 additions & 1 deletion R/bam_to_tcc.R
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,12 @@ bam_to_tcc <- function(bam_files,
}
bam_param <- Rsamtools::ScanBamParam(
what = "qname",
flag = Rsamtools::scanBamFlag(isSupplementaryAlignment = FALSE),
flag = Rsamtools::scanBamFlag(
isSupplementaryAlignment = FALSE,
isSecondaryAlignment = FALSE,
isNotPassingQualityControls = FALSE,
isDuplicate = FALSE
),
tag = bam_tags
)
open(bam_file_con)
Expand Down
10 changes: 10 additions & 0 deletions R/prepare_bam_transcripts.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#' @param genome_fasta_file A string containing a genome FASTA file path.
#' @param min_intron_length An integer scalar specifying the minimal length
#' of introns to assign strand to.
#' @param max_intron_length An integer scalar specifying the maximum length
#' of introns to assign strand to.
#' @param known_intron_motifs A character vector specifying the known intron
#' motifs.
#' @param rescue_annotated_introns A logical scalar specifying if introns
Expand All @@ -34,6 +36,7 @@ prepare_bam_transcripts <- function(bam_parsed,
anno_data,
genome_fasta_file,
min_intron_length = 30,
max_intron_length = 5e6,
known_intron_motifs = c("GT-AG"),
rescue_annotated_introns = FALSE,
known_intron_granges = NULL,
Expand All @@ -42,6 +45,11 @@ prepare_bam_transcripts <- function(bam_parsed,

# Check arguments
assertthat::assert_that(is.data.frame(bam_parsed))
assertthat::assert_that(
length(bam_parsed$intron_positions) ==
length(unique(bam_parsed$intron_positions)),
msg = "bam_parsed$intron_positions contains non-unique values"
)
assertthat::assert_that(is.list(anno_data))
assertthat::assert_that(assertthat::has_name(anno_data, "gene_df"))
assertthat::assert_that(is.data.frame(anno_data$gene_df))
Expand All @@ -54,6 +62,7 @@ prepare_bam_transcripts <- function(bam_parsed,
assertthat::assert_that(assertthat::is.string(genome_fasta_file))
assertthat::assert_that(file.exists(genome_fasta_file))
assertthat::assert_that(assertthat::is.count(min_intron_length))
assertthat::assert_that(assertthat::is.count(max_intron_length))
assertthat::assert_that(is.character(known_intron_motifs))
assertthat::assert_that(assertthat::is.flag(rescue_annotated_introns))
if (!is.null(known_intron_granges)) {
Expand Down Expand Up @@ -83,6 +92,7 @@ prepare_bam_transcripts <- function(bam_parsed,
anno_data = anno_data,
genome_fasta_file = genome_fasta_file,
min_intron_length = min_intron_length,
max_intron_length = max_intron_length,
known_intron_motifs = known_intron_motifs,
rescue_annotated_introns = rescue_annotated_introns)
nr_intron_positions <- intron_data$nr_intron_positions
Expand Down
13 changes: 12 additions & 1 deletion R/prepare_transcripts.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#' only reference transcripts are used.
#' @param min_intron_length An integer scalar specifying the minimal length
#' of introns to assign strand to.
#' @param max_intron_length An integer scalar specifying the maximum length
#' of introns to assign strand to.
#' @param known_intron_motifs A character vector specifying the known intron
#' motifs.
#' @param rescue_annotated_introns A logical scalar specifying if introns
Expand Down Expand Up @@ -37,6 +39,7 @@ prepare_transcripts <- function(gtf_file,
genome_fasta_file,
bam_parsed,
min_intron_length = 30,
max_intron_length = 5e6,
known_intron_motifs = c("GT-AG"),
rescue_annotated_introns = FALSE,
known_intron_granges = NULL,
Expand All @@ -51,8 +54,14 @@ prepare_transcripts <- function(gtf_file,
assertthat::assert_that(file.exists(genome_fasta_file))
if (!is.null(bam_parsed)) {
assertthat::assert_that(is.data.frame(bam_parsed))
assertthat::assert_that(
length(bam_parsed$intron_positions) ==
length(unique(bam_parsed$intron_positions)),
msg = "bam_parsed$intron_positions contains non-unique values"
)
}
assertthat::assert_that(assertthat::is.count(min_intron_length))
assertthat::assert_that(assertthat::is.count(max_intron_length))
assertthat::assert_that(is.character(known_intron_motifs))
assertthat::assert_that(assertthat::is.flag(rescue_annotated_introns))
if (!is.null(known_intron_granges)) {
Expand Down Expand Up @@ -96,7 +105,9 @@ prepare_transcripts <- function(gtf_file,
if (!is.null(bam_parsed)) {
tx_list_bam <- prepare_bam_transcripts(
bam_parsed = bam_parsed, anno_data = anno_data,
genome_fasta_file = genome_fasta_file, min_intron_length = min_intron_length,
genome_fasta_file = genome_fasta_file,
min_intron_length = min_intron_length,
max_intron_length = max_intron_length,
known_intron_motifs = known_intron_motifs,
rescue_annotated_introns = rescue_annotated_introns,
known_intron_granges = known_intron_granges,
Expand Down
3 changes: 3 additions & 0 deletions R/process_intron_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ process_intron_data <- function(bam_parsed,
anno_data,
genome_fasta_file,
min_intron_length = 30,
max_intron_length = 5e6,
known_intron_motifs = c("GT-AG"),
rescue_annotated_introns = FALSE) {

Expand All @@ -18,6 +19,7 @@ process_intron_data <- function(bam_parsed,
assertthat::assert_that(assertthat::is.string(genome_fasta_file))
assertthat::assert_that(file.exists(genome_fasta_file))
assertthat::assert_that(assertthat::is.count(min_intron_length))
assertthat::assert_that(assertthat::is.count(max_intron_length))
assertthat::assert_that(is.character(known_intron_motifs))
assertthat::assert_that(assertthat::is.flag(rescue_annotated_introns))

Expand All @@ -37,6 +39,7 @@ process_intron_data <- function(bam_parsed,
anno_data = anno_data,
genome_fasta_file = genome_fasta_file,
min_intron_length = min_intron_length,
max_intron_length = max_intron_length,
known_intron_motifs = known_intron_motifs,
rescue_annotated_introns = rescue_annotated_introns)
nr_intron_positions <- as.character(nr_intron_granges)
Expand Down
64 changes: 32 additions & 32 deletions docs/Isosceles.html

Large diffs are not rendered by default.

Binary file modified docs/Isosceles.pdf
Binary file not shown.
14 changes: 7 additions & 7 deletions docs/Mouse_E18_brain_analysis.html

Large diffs are not rendered by default.

Loading

0 comments on commit d9e34cb

Please sign in to comment.