Skip to content

Commit

Permalink
Merge pull request #558 from belleau/main
Browse files Browse the repository at this point in the history
Add function processBlockChr and correct some importFrom
  • Loading branch information
adeschen authored Oct 15, 2024
2 parents ca39ffd + 79692f8 commit bb4fbde
Show file tree
Hide file tree
Showing 6 changed files with 360 additions and 2 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(add1KG2SampleGDS)
export(addGeneBlockGDSRefAnnot)
export(addGeneBlockRefAnnot)
export(addRef2GDS1KG)
export(addStudy1Kg)
export(computeAncestryFromSynthetic)
Expand Down
99 changes: 97 additions & 2 deletions R/process1KG.R
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,8 @@ generatePhase1KG2GDS <- function(gdsReference, gdsReferencePhase,
#' unlink(fileRefPhaseGDS, force=TRUE)
#'
#' @author Pascal Belleau, Astrid Deschênes and Alexander Krasnitz
#' @importFrom gdsfmt index.gdsn read.gdsn readmode.gdsn
#' @importFrom gdsfmt createfn.gds closefn.gds index.gdsn read.gdsn readmode.gdsn
#' @importFrom SNPRelate snpgdsOpen
#' @encoding UTF-8
#' @export
generatePhaseRef <- function(fileReferenceGDS, fileReferenceAnnotGDS,
Expand Down Expand Up @@ -774,6 +775,8 @@ identifyRelative <- function(gds, maf=0.05, thresh=2^(-11/2),
#' @author Pascal Belleau, Astrid Deschênes and Alexander Krasnitz
#'
#' @importFrom GENESIS pcairPartition
#' @importFrom gdsfmt closefn.gds
#' @importFrom SNPRelate snpgdsOpen
#' @importFrom S4Vectors isSingleNumber
#' @importFrom methods is
#' @encoding UTF-8
Expand Down Expand Up @@ -997,7 +1000,7 @@ getRef1KGPop <- function(gdsReference, popName="superPop") {
#'
#'
#' @author Pascal Belleau, Astrid Deschênes and Alexander Krasnitz
#' @importFrom gdsfmt index.gdsn read.gdsn
#' @importFrom gdsfmt index.gdsn read.gdsn closefn.gds
#' @importFrom stats rmultinom
#' @importFrom SNPRelate snpgdsOpen
#' @encoding UTF-8
Expand Down Expand Up @@ -1140,3 +1143,95 @@ addGeneBlockGDSRefAnnot <- function(gdsReference, gdsRefAnnotFile,
return(0L)
}

#' @title Append information associated to blocks, as indexes, into the
#' Population Reference SNV Annotation GDS file
#'
#' @description The function appends the information about the blocks into
#' the Population Reference SNV Annotation GDS file. The information is
#' extracted from the Population Reference GDS file.
#'
#' @param fileReferenceGDS a \code{character} string representing the file
#' name of the Reference GDS file. The file must exist.
#'
#' @param gdsRefAnnotFile a \code{character} string representing the
#' file name corresponding the Reference SNV
#' Annotation GDS file. The function will
#' open it in write mode and close it after. The file must exist.
#'
#' @param winSize a single positive \code{integer} representing the
#' size of the window to use to group the SNVs when the SNVs are in a
#' non-coding region. Default: \code{10000L}.
#'
#' @param ensDb An object with the ensembl genome annotation
#' Default: \code{EnsDb.Hsapiens.v86}.
#'
#' @param suffixBlockName a \code{character} string that identify the source
#' of the block and that will be added to the block description into
#' the Reference SNV Annotation GDS file, as example: Ensembl.Hsapiens.v86.
#'
#' @return The integer \code{OL} when the function is successful.
#'
#' @examples
#'
#' ## Path to the demo pedigree file is located in this package
#' dataDir <- system.file("extdata", package="RAIDS")
#'
# ## Temporary file
#' fileAnnotGDS <- file.path(tempdir(), "ex1_good_small_1KG_Ann_GDS.gds")
#'
#' ## Required library
#' if (requireNamespace("EnsDb.Hsapiens.v86", quietly=TRUE)) {
#'
#' file.copy(file.path(dataDir, "tests",
#' "ex1_NoBlockGene.1KG_Annot_GDS.gds"), fileAnnotGDS)
#'
#' ## Making a "short cut" on the ensDb object
#' edb <- EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86
#'
#' ## GDS Reference file
#' fileReferenceGDS <- file.path(dataDir, "tests",
#' "ex1_good_small_1KG.gds")
#'
#' \donttest{
#'
#'
#' ## Append information associated to blocks
#' addGeneBlockRefAnnot(fileReferenceGDS=fileReferenceGDS,
#' gdsRefAnnotFile=fileAnnotGDS,
#' ensDb=edb,
#' suffixBlockName="EnsDb.Hsapiens.v86")
#'
#' gdsAnnot1KG <- openfn.gds(fileAnnotGDS)
#' print(gdsAnnot1KG)
#' print(read.gdsn(index.gdsn(gdsAnnot1KG, "block.annot")))
#'
#' closefn.gds(gdsAnnot1KG)
#' }
#'
#' ## Remove temporary file
#' unlink(fileAnnotGDS, force=TRUE)
#'
#' }
#'
#' @author Pascal Belleau, Astrid Deschênes and Alexander Krasnitz
#' @importFrom gdsfmt openfn.gds closefn.gds
#' @importFrom SNPRelate snpgdsOpen
#' @importFrom S4Vectors isSingleNumber
#' @encoding UTF-8
#' @export
addGeneBlockRefAnnot <- function(fileReferenceGDS, gdsRefAnnotFile,
winSize=10000, ensDb, suffixBlockName) {


if (!(is.character(fileReferenceGDS) && (file.exists(fileReferenceGDS)))) {
stop("The \'fileReferenceGDS\' must be a character string ",
"representing the Reference GDS file. The file must exist.")
}

gdsReference <- snpgdsOpen(filename=fileReferenceGDS)
res <- addGeneBlockGDSRefAnnot(gdsReference, gdsRefAnnotFile,
winSize=10000, ensDb, suffixBlockName)
closefn.gds(gdsReference)
## Success
return(res)
}
108 changes: 108 additions & 0 deletions R/tools_internal.R
Original file line number Diff line number Diff line change
Expand Up @@ -438,3 +438,111 @@ readSNVVCF <- function(fileName,

return(matSample)
}

#' @title The function create a vector of integer representing the linkage
#' disequilibrium block for each SNV in the in the same order
#' than the variant in Population reference dataset.
#'
#' @description The function create a vector of integer representing the linkage
#' disequilibrium block for each SNV in the in the same order
#' than the variant in Population reference dataset.
#'
#' @param fileReferenceGDS a \code{character} string representing the file
#' name of the Reference GDS file. The file must exist.
#'
#' @param fileBlock a \code{character} string representing the file
#' name of output file det from the plink block command for a chromosome.
#'
#' @return a \code{list} containing 2 entries:
#' \describe{
#' \item{\code{chr}}{ a \code{integer} representing a the chromosome from
#' fileBlock.
#' }
#' \item{\code{block.snp}}{ the a \code{array} of integer
#' representing the linkage disequilibrium block for
#' each SNV in the in the same order than the variant
#' in Population reference dataset.
#' }
#' }
#'
#'
#' @examples
#'
#' ## Path to the demo pedigree file is located in this package
#' dataDir <- system.file("extdata", package="RAIDS")
#'
#' ## Demo of Reference GDS file containing reference information
#' fileReferenceGDS <- file.path(dataDir, "PopulationReferenceDemo.gds")
#'
#' ## Demo of of output file det from the plink block
#' ## command for chromosome 1
#' fileLdBlock <- file.path(dataDir, "block.sp.EUR.Ex.chr1.blocks.det")
#'
#' listLdBlock <- RAIDS:::processBlockChr(fileReferenceGDS, fileLdBlock)
#'
#' @author Pascal Belleau, Astrid Deschênes and Alexander Krasnitz
#' @importFrom gdsfmt read.gdsn index.gdsn closefn.gds
#' @importFrom SNPRelate snpgdsOpen
#' @encoding UTF-8
#' @keywords internal
processBlockChr <- function(fileReferenceGDS, fileBlock) {

if (!(is.character(fileReferenceGDS) && (file.exists(fileReferenceGDS)))) {
stop("The \'fileReferenceGDS\' must be a character string ",
"representing the Reference GDS file. The file must exist.")
}
if (!(is.character(fileBlock) && (file.exists(fileBlock)))) {
stop("The \'fileBlock\' must be a character string ",
"representing the file .det from plink block result. The file must exist.")
}

gdsReference <- snpgdsOpen(filename=fileReferenceGDS)
blockChr <- read.delim(fileBlock, sep="")

listChr <- unique(blockChr$CHR)
if(length(listChr) != 1){
stop(paste0("There is not just one CHR in ", fileBlock, "\n"))
}
listChr <- as.integer(gsub("chr", "", listChr))
listSNVChr <- read.gdsn(index.gdsn(gdsReference, "snp.chromosome"))
listSNVChr <- which(listSNVChr == listChr)
snp.keep <- read.gdsn(index.gdsn(gdsReference, "snp.position"))[listSNVChr]
closefn.gds(gdsReference)
z <- cbind(c(blockChr$BP1, snp.keep, blockChr$BP2+1),
c(seq_len(nrow(blockChr)),
rep(0, length(snp.keep)), -1*seq_len(nrow(blockChr))))

z <- z[order(z[,1]),]
block.snp <- cumsum(z[,2])[z[,2] == 0]

curStart <- 0
activeBlock <- 0
blockState <- 0
block.inter <- rep(0, length(which(block.snp == 0)))
k <- 1
for(i in seq_len(length(block.snp))){
if(block.snp[i] == 0){
if(activeBlock == 1){
if(snp.keep[i] - curStart >= 10000) {
blockState <- blockState - 1

curStart <- snp.keep[i]
}
} else{
blockState <- blockState - 1
curStart <- snp.keep[i]
curStart <- snp.keep[i]
activeBlock <- 1
}
block.inter[k] <- blockState
k <- k + 1
}else{
activeBlock <- 0
}
}
block.snp[block.snp == 0] <- block.inter
res <- list(chr=listChr,
block.snp=block.snp)
return(res)
}

10 changes: 10 additions & 0 deletions inst/extdata/block.sp.EUR.Ex.chr1.blocks.det
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CHR BP1 BP2 KB NSNPS SNPS
1 51897 51927 0.031 2 s3|s4
1 54707 54715 0.009 2 s6|s7
1 55544 59039 3.496 2 s14|s15
1 61986 66506 4.521 3 s17|s18|s31
1 76837 77873 1.037 3 s39|s42|s43
1 79771 80140 0.37 2 s48|s49
1 82675 86330 3.656 12 s55|s58|s61|s62|s66|s67|s68|s71|s72|s75|s78|s79
1 87189 88337 1.149 3 s81|s89|s96
1 631489 633328 1.84 3 s150|s159|s160
89 changes: 89 additions & 0 deletions man/addGeneBlockRefAnnot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit bb4fbde

Please sign in to comment.