diff --git a/bin/MyApp/Project.toml b/bin/MyApp/Project.toml deleted file mode 100644 index 1d1655c..0000000 --- a/bin/MyApp/Project.toml +++ /dev/null @@ -1,9 +0,0 @@ -name = "MyApp" -uuid = "b2b8273f-e373-4bd2-bdd1-cbbb72be896e" -authors = ["chelseatrotter "] -version = "0.1.0" - -[deps] -DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" -LMGPU = "80ae3f88-c08a-44f8-80fe-4bd6150eb394" -PackageCompiler = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d" diff --git a/bin/MyApp/precompile_app.jl b/bin/MyApp/precompile_app.jl deleted file mode 100644 index 502f8d8..0000000 --- a/bin/MyApp/precompile_app.jl +++ /dev/null @@ -1,3 +0,0 @@ -using MyApp -push!(ARGS, "arg") -MyApp.julia_main() diff --git a/bin/MyApp/src/MyApp.jl b/bin/MyApp/src/MyApp.jl deleted file mode 100644 index ad1d581..0000000 --- a/bin/MyApp/src/MyApp.jl +++ /dev/null @@ -1,67 +0,0 @@ -module MyApp - - -using LMGPU -using DelimitedFiles - -function julia_main() - try - main() - catch - Base.invokelatest(Base.display_error, Base.catch_stack()) - return 1 - end - return 0 -end - - -function main() - - args = ARGS - @info "getting args" - output_dir = args[1] - output_file = args[2] - rqtl_file = args[3] - export_matrix = args[4] == "true" - - @info "getting geno file and pheno file" - geno_file = joinpath(output_dir,"geno_prob.csv") - pheno_file = joinpath(output_dir, "pheno.csv") - output_file = joinpath(output_dir, output_file) - - LMGPU.set_blas_threads(16); - # Read in data. - G = LMGPU.get_geno_data(geno_file) - Y = LMGPU.get_pheno_data(pheno_file) - # getting geno and pheno file size. - n = size(Y,1) - m = size(Y,2) - p = size(G,2) - println("******* Indivuduals n: $n, Traits m: $m, Markers p: $p ****************"); - # cpu_timing = benchmark(5, cpurun, Y, G,n,export_matrix); - - # running analysis. - lod = LMGPU.cpurun(Y, G,n,export_matrix); - if !export_matrix - gmap = LMGPU.get_gmap_info(rqtl_file) - idx = trunc.(Int, lod[:,1]) - gmap_info = LMGPU.match_gmap(idx, gmap) - lod = hcat(gmap_info, lod) - header = reshape(["marker", "chr", "pos", "idx", "lod"], 1,:) - lod = vcat(header, lod) - end - - # write output to file - writedlm(output_file, lod, ',') - println("Lod exported to $output_file") - - # TODO: generate plot? - return lod - -end - -if abspath(PROGRAM_FILE) == @__FILE__ - main() -end - -end # module diff --git a/bin/build-bin.jl b/bin/build-bin.jl deleted file mode 100644 index d5b91ea..0000000 --- a/bin/build-bin.jl +++ /dev/null @@ -1,6 +0,0 @@ -using PackageCompiler - -app_dir = joinpath(@__DIR__, "MyApp") -compile_dir = joinpath(@__DIR__, "MyAppCompiled") -precompile_file = joinpath(app_dir,"precompile_app.jl") -create_app(app_dir, compile_dir, force=true,incremental=false,precompile_execution_file=precompile_file) diff --git a/bin/install_packages.jl b/bin/install_packages.jl deleted file mode 100644 index 575f54b..0000000 --- a/bin/install_packages.jl +++ /dev/null @@ -1,6 +0,0 @@ -using Pkg - -Pkg.activate(".") -Pkg.instantiate(; verbose = false) -Pkg.activate("./bin/MyApp") -Pkg.instantiate(; verbose = false) diff --git a/r/Rqtl2scan.R b/r/Rqtl2scan.R deleted file mode 100644 index ec8d3f4..0000000 --- a/r/Rqtl2scan.R +++ /dev/null @@ -1,31 +0,0 @@ -library(qtl2) -## readin data in R/qtl -bxd <- read.cross2(file="../data/input-for-rqtl/geno-pheno-rqtl.csv",format="csv",crosstype="risib",genotypes=c("B","D")) -pheno<-read.csv("../data/input-for-rqtl/traits.csv",sep=",") - -#drop obs. & traits with all NAs -keepidx<-which(rowSums(is.na(bxd$pheno))<35500) - -c1<-subset(bxd,ind=keepidx) -rownames(c1$pheno)<-c1$pheno$ID -c1$pheno<-c1$pheno[,-1] -# extract genotype data from the processed data -#gen<-pull.geno(c1) -#write.csv(gen,file="genotypedata.csv") - -droptrait<-which(colSums(is.na(c1$pheno))==79) -c1$pheno<-c1$pheno[,-droptrait] -c1$pheno<-pheno - -library(tictoc) -library(qtl2) -# convert a cross from the qtl format to the qtl2 format -cvt1<-convert2cross2(c1) -#insert pseudomarker -map <- insert_pseudomarkers(cvt1$gmap, step=0) -pr <- calc_genoprob(cvt1, map, error_prob=0.002, cores=4) - -tic() -out <- scan1(pr, cvt1$pheno, cores=32) -toc() -write.csv(out,file="../data/results/rqtl_lod_score.csv") diff --git a/r/cleaning.R b/r/cleaning.R deleted file mode 100644 index 364a301..0000000 --- a/r/cleaning.R +++ /dev/null @@ -1,99 +0,0 @@ -library(mice) -library(parallel) -library(qtl2) -library(tidyverse) -library(tictoc) - -getdata<-function(url){ - return(read_cross2(url)) -} - -keep_row_idx<-function(pheno, droprate){ - rs = rowSums(is.na(pheno)) - keepidx <- which(rs/ncol(pheno) <= droprate) - return(keepidx) -} - -keep_col_idx<-function(pheno, droprate){ - - cs = colSums(is.na(pheno)) - keepidx <- which(cs/nrow(pheno) <= droprate) - return(keepidx) -} - -calc_gprob_update_gmap<-function(gmap_file, cross, ncore=1, error_prob=0.002, step=0, pseudomarker=FALSE){ - - #insert pseudomarker - map = cross$gmap - if(pseudomarker){ - map <- insert_pseudomarkers(map, step=step) - cat("++++++++ writing out to +++++++++++++ ", gmap_file) - write.csv(map, file = gmap_file,row.names = FALSE) - } - pr <- calc_genoprob(cross, map, error_prob=error_prob, cores=ncore) - return(pr) -} - -#get whole genotype prob file -getGenopr<-function(x){ - temp<<-NULL - m=length(attributes(x)$names) - cnames<-attributes(x)$names - for (i in 1:m) { - d<-eval(parse(text=paste(c('dim(x$\'', cnames[i] ,'\')'),collapse=''))) - nam<-eval(parse(text=paste(c('dimnames(x$\'',cnames[i],'\')[[2]]'),collapse = ''))) - cnam<-rep(nam,d[3]) - p_chr<-paste(c('array(x$\'',cnames[i],'\',dim=c(d[1],d[2]*d[3]))'),collapse='') - prob<-eval(parse(text = p_chr)) - temp<-cbind(temp,prob) - } - return(temp) -} - -clean_and_write<-function(url, output_dir, scan=FALSE,geno_output_file="geno_prob.csv", pheno_output_file="pheno.csv", new_gmap_file="gmap.csv", - result_file="rqtl_result.csv", - indi_droprate=0.0, trait_droprate=0.0, nseed=100, ncores=1, error_prob=0.002, stepsize=0){ - - bxd = getdata(url) - print("got data from url") - - dir.create(output_dir, recursive=TRUE) - geno_output_file <- file.path(output_dir, geno_output_file) - pheno_output_file <- file.path(output_dir, pheno_output_file) - new_gmap_file <- file.path(output_dir, new_gmap_file) - result_file <- file.path(output_dir, result_file) - scan <- scan == "TRUE" - - # innerjoin - # pick out shared bxd ids in geno and pheno - bxd_ids <- ind_ids_gnp(bxd) - cat("dimention of bxd_ids:", dim(bxd_ids)) - joint_bxd <- subset(bxd, ind = bxd_ids) - - # pick out the ones with no missing data - filled_ids <- ind_ids(joint_bxd)[complete.cases(joint_bxd$pheno)] - cat("dimention of filled_ids :", dim(filled_ids)) - filled_bxd = subset(joint_bxd, ind = filled_ids) - - # calculate genotype probablity - pr = calc_gprob_update_gmap(new_gmap_file, filled_bxd, ncores, error_prob, stepsize, FALSE) - prob1 = getGenopr(pr) - print("calculating geno prob done") - cat("dimention of geno :", dim(prob1)) - - write.csv(filled_bxd$pheno, file = pheno_output_file) - write.csv(prob1, file = geno_output_file) - print("writing out pheno and geno done") - - if(scan){ - print("Doing genome scan") - tic() - out = scan1(pr, filled_bxd$pheno, cores=32) - toc() - print("writing out rqtl result file.") - write.csv(out,file=result_file) - } -} - -args = commandArgs(trailingOnly=TRUE) -clean_and_write(args[1], args[2], args[3]) diff --git a/sh/rqtl-julia.sh b/sh/rqtl-julia.sh deleted file mode 100755 index 7b56be2..0000000 --- a/sh/rqtl-julia.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# Assuming you are in LMGPU directory - -# Input in Rqtl2 format. -URL="./data/HC_M2_0606_R.zip" -# Intermediate and final scan result will be stored here. -output_dir="./data/HIPPO_CLEAN_DATA/" -# Do genome scan with R/qtl2, default is False. Only True if we need to compare genome scan results produced by LMGPU. -scan="FALSE" - -time Rscript --vanilla ./r/cleaning.R $URL $output_dir $scan - -# If export_matrix set to true, then the entire LOD score matrix will be exported. If false, only maximum lod and related gmpa info will be exported. -export_matrix="false" -# genome scan results. -output_file="julia_result.csv" -# rqtl_file is needed to find gmap.csv. -rqtl_file="./data/HC_M2_0606_R.zip" - - -time JULIA_NUM_THREADS=16 ./bin/MyAppCompiled/bin/MyApp $output_dir $output_file $rqtl_file $export_matrix diff --git a/sh/run_lmgpu.sh b/sh/run_lmgpu.sh deleted file mode 100755 index dec4b7e..0000000 --- a/sh/run_lmgpu.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -# Assuming you are in LMGPU directory - - -geno_file="../data/cleandata/geno_prob.csv" -pheno_file="../data/cleandata/traits.csv" -export_matrix="false" -output_file="../data/results/output.csv" -rqtl_file="../data/UTHSC_SPL_RMA_1210.zip" - -time JULIA_NUM_THREADS=8 ./MyAppCompiled/bin/MyApp $geno_file $pheno_file $export_matrix $output_file $rqtl_file diff --git a/src/LMGPU.jl b/src/LMGPU.jl index 45132b1..23fbff8 100644 --- a/src/LMGPU.jl +++ b/src/LMGPU.jl @@ -6,21 +6,19 @@ using LinearAlgebra using Base.Threads using ZipFile using CUDA -# using CuArrays -# using CUDAnative -# using CUDAdrv -# import CuArrays.CuArray + #put all your source file here. include("data_io.jl") +export get_geno_data, get_pheno_data, get_gmap_file include("util.jl") include("cpu.jl") +export cpurun include("gpu.jl") include("common_func.jl") include("match_gmap_info.jl") +export get_gmap_info, match_gmap # include("cli.jl") -#put all your public functions (functions that you want user to use) here. -export get_geno_data, get_pheno_data, cpurun, get_gmap_info, match_gmap #, gpurun end # module diff --git a/src/cpu.jl b/src/cpu.jl index 63e06a7..387e082 100644 --- a/src/cpu.jl +++ b/src/cpu.jl @@ -61,7 +61,7 @@ end ##################### Running CPU Function ################### -function cpurun(a::AbstractArray{<:Real, 2}, b::AbstractArray{<:Real, 2}, n::Int, maxlod::Bool) +function cpurun(a::AbstractArray{<:Real, 2}, b::AbstractArray{<:Real, 2}, n::Int, export_matrix::Bool) a_std = get_standardized_matrix(a); b_std = get_standardized_matrix(b); #step 2: calculate R, matrix of corelation coefficients @@ -70,8 +70,9 @@ function cpurun(a::AbstractArray{<:Real, 2}, b::AbstractArray{<:Real, 2}, n::Int # lod = lod_score(n, r); lod = lod_score_multithread(n,r) - if maxlod - println("exporting max lod") + + if !export_matrix + println("Calculating max lod") return find_max_idx_value(lod) else println("exporting matrix.") diff --git a/src/data_io.jl b/src/data_io.jl index 0b13f5d..b4e884f 100644 --- a/src/data_io.jl +++ b/src/data_io.jl @@ -12,13 +12,24 @@ end # return convert(Array{datatype,2}, pheno) # end +function try_string2num(num) + return tryparse(Float64,num) != nothing +end + function get_pheno_data(file, datatype; transposed=true) - #first column is individual ID such as : BXD1 - pheno = readdlm(file, ','; skipstart=1) + #first column is individual ID such as : BXD1 , need to be removed. + pheno = readdlm(file, ','; skipstart=1)[:, 2:end] + + if pheno[1,end] == "f" || pheno[1,end] == "m" + @info "Removing sex column of phenotype. " + pheno = pheno[:, 1:end-1] + + end + pheno = convert(Array{datatype,2}, pheno) - # pheno = convert2float.(pheno, datatype) + if transposed return transpose(pheno) |> collect else diff --git a/src/match_gmap_info.jl b/src/match_gmap_info.jl index 57c02f4..7287865 100644 --- a/src/match_gmap_info.jl +++ b/src/match_gmap_info.jl @@ -16,24 +16,8 @@ function extension(url::String) end end -function get_gmap_info(rqtl_file) - - # if passing in rqtl_file as a zip, extract gmap file. - if extension(rqtl_file) == ".zip" - dir = ZipFile.Reader(rqtl_file) - f = findfile(dir, "gmap.csv") - gmap = readdlm(f, ',') - close(dir) - # if passing in just gmap file. - elseif extension(rqtl_file) == ".csv" - if occursin("gmap.csv", rqtl_file) - gmap = readdlm(rqtl_file, ',') - else - error("no gmap file found.") - end - else - error("Rqtl file is not passed in as a .zip, need to handle this.") - end +function get_gmap_info(gmap_file) + gmap = readdlm(gmap_file, ',', header=true) return gmap end