diff --git a/bin/MyApp/Project.toml b/bin/MyApp/Project.toml
deleted file mode 100644
index 1d1655c..0000000
--- a/bin/MyApp/Project.toml
+++ /dev/null
@@ -1,9 +0,0 @@
-name = "MyApp"
-uuid = "b2b8273f-e373-4bd2-bdd1-cbbb72be896e"
-authors = ["chelseatrotter <chelsea.xhu@gmail.com>"]
-version = "0.1.0"
-
-[deps]
-DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
-LMGPU = "80ae3f88-c08a-44f8-80fe-4bd6150eb394"
-PackageCompiler = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d"
diff --git a/bin/MyApp/precompile_app.jl b/bin/MyApp/precompile_app.jl
deleted file mode 100644
index 502f8d8..0000000
--- a/bin/MyApp/precompile_app.jl
+++ /dev/null
@@ -1,3 +0,0 @@
-using MyApp
-push!(ARGS, "arg")
-MyApp.julia_main()
diff --git a/bin/MyApp/src/MyApp.jl b/bin/MyApp/src/MyApp.jl
deleted file mode 100644
index ad1d581..0000000
--- a/bin/MyApp/src/MyApp.jl
+++ /dev/null
@@ -1,67 +0,0 @@
-module MyApp
-
-
-using LMGPU
-using DelimitedFiles
-
-function julia_main()
-    try
-        main()
-    catch
-        Base.invokelatest(Base.display_error, Base.catch_stack())
-        return 1
-    end
-    return 0
-end
-
-
-function main()
-
-    args = ARGS
-    @info "getting args"
-    output_dir = args[1]
-    output_file = args[2]
-    rqtl_file = args[3]
-    export_matrix = args[4] == "true"
-
-    @info "getting geno file and pheno file"
-    geno_file = joinpath(output_dir,"geno_prob.csv")
-    pheno_file = joinpath(output_dir, "pheno.csv")
-    output_file = joinpath(output_dir, output_file)
-
-    LMGPU.set_blas_threads(16);
-    # Read in data.
-    G = LMGPU.get_geno_data(geno_file)
-    Y = LMGPU.get_pheno_data(pheno_file)
-    # getting geno and pheno file size.
-    n = size(Y,1)
-    m = size(Y,2)
-    p = size(G,2)
-    println("******* Indivuduals n: $n, Traits m: $m, Markers p: $p ****************");
-    # cpu_timing = benchmark(5, cpurun, Y, G,n,export_matrix);
-
-    # running analysis.
-    lod = LMGPU.cpurun(Y, G,n,export_matrix);
-    if !export_matrix
-        gmap = LMGPU.get_gmap_info(rqtl_file)
-        idx = trunc.(Int, lod[:,1])
-        gmap_info = LMGPU.match_gmap(idx, gmap)
-        lod = hcat(gmap_info, lod)
-        header = reshape(["marker", "chr", "pos", "idx", "lod"], 1,:)
-        lod = vcat(header, lod)
-    end
-
-    # write output to file
-    writedlm(output_file, lod, ',')
-    println("Lod exported to $output_file")
-
-    # TODO: generate plot?
-    return lod
-
-end
-
-if abspath(PROGRAM_FILE) == @__FILE__
-    main()
-end
-
-end # module
diff --git a/bin/build-bin.jl b/bin/build-bin.jl
deleted file mode 100644
index d5b91ea..0000000
--- a/bin/build-bin.jl
+++ /dev/null
@@ -1,6 +0,0 @@
-using PackageCompiler
-
-app_dir = joinpath(@__DIR__, "MyApp")
-compile_dir = joinpath(@__DIR__, "MyAppCompiled")
-precompile_file = joinpath(app_dir,"precompile_app.jl")
-create_app(app_dir, compile_dir, force=true,incremental=false,precompile_execution_file=precompile_file)
diff --git a/bin/install_packages.jl b/bin/install_packages.jl
deleted file mode 100644
index 575f54b..0000000
--- a/bin/install_packages.jl
+++ /dev/null
@@ -1,6 +0,0 @@
-using Pkg
-
-Pkg.activate(".")
-Pkg.instantiate(; verbose = false)
-Pkg.activate("./bin/MyApp")
-Pkg.instantiate(; verbose = false)
diff --git a/r/Rqtl2scan.R b/r/Rqtl2scan.R
deleted file mode 100644
index ec8d3f4..0000000
--- a/r/Rqtl2scan.R
+++ /dev/null
@@ -1,31 +0,0 @@
-library(qtl2)
-## readin data in R/qtl
-bxd <- read.cross2(file="../data/input-for-rqtl/geno-pheno-rqtl.csv",format="csv",crosstype="risib",genotypes=c("B","D"))
-pheno<-read.csv("../data/input-for-rqtl/traits.csv",sep=",")
-
-#drop obs. & traits with all NAs
-keepidx<-which(rowSums(is.na(bxd$pheno))<35500)
-
-c1<-subset(bxd,ind=keepidx)
-rownames(c1$pheno)<-c1$pheno$ID
-c1$pheno<-c1$pheno[,-1]
-# extract genotype data from the processed data
-#gen<-pull.geno(c1)
-#write.csv(gen,file="genotypedata.csv")
-
-droptrait<-which(colSums(is.na(c1$pheno))==79)
-c1$pheno<-c1$pheno[,-droptrait]
-c1$pheno<-pheno
-
-library(tictoc)
-library(qtl2)
-# convert a cross from the qtl format to the qtl2 format
-cvt1<-convert2cross2(c1)
-#insert pseudomarker
-map <- insert_pseudomarkers(cvt1$gmap, step=0)
-pr <- calc_genoprob(cvt1, map, error_prob=0.002, cores=4)
-
-tic()
-out <- scan1(pr, cvt1$pheno, cores=32)
-toc()
-write.csv(out,file="../data/results/rqtl_lod_score.csv")
diff --git a/r/cleaning.R b/r/cleaning.R
deleted file mode 100644
index 364a301..0000000
--- a/r/cleaning.R
+++ /dev/null
@@ -1,99 +0,0 @@
-library(mice)
-library(parallel)
-library(qtl2)
-library(tidyverse)
-library(tictoc)
-
-getdata<-function(url){
-  return(read_cross2(url))
-}
-
-keep_row_idx<-function(pheno, droprate){
-  rs = rowSums(is.na(pheno))
-  keepidx <- which(rs/ncol(pheno) <= droprate)
-  return(keepidx)
-}
-
-keep_col_idx<-function(pheno, droprate){
-
-  cs = colSums(is.na(pheno))
-  keepidx <- which(cs/nrow(pheno) <= droprate)
-  return(keepidx)
-}
-
-calc_gprob_update_gmap<-function(gmap_file, cross, ncore=1, error_prob=0.002, step=0, pseudomarker=FALSE){
-
-  #insert pseudomarker
-  map = cross$gmap
-  if(pseudomarker){
-    map <- insert_pseudomarkers(map, step=step)
-    cat("++++++++ writing out to +++++++++++++ ", gmap_file)
-    write.csv(map, file = gmap_file,row.names = FALSE)
-  }
-  pr <- calc_genoprob(cross, map, error_prob=error_prob, cores=ncore)
-  return(pr)
-}
-
-#get whole genotype prob file
-getGenopr<-function(x){
-  temp<<-NULL
-  m=length(attributes(x)$names)
-  cnames<-attributes(x)$names
-  for (i in 1:m) {
-    d<-eval(parse(text=paste(c('dim(x$\'', cnames[i] ,'\')'),collapse='')))
-    nam<-eval(parse(text=paste(c('dimnames(x$\'',cnames[i],'\')[[2]]'),collapse = '')))
-    cnam<-rep(nam,d[3])
-    p_chr<-paste(c('array(x$\'',cnames[i],'\',dim=c(d[1],d[2]*d[3]))'),collapse='')
-    prob<-eval(parse(text = p_chr))
-    temp<-cbind(temp,prob)
-  }
-  return(temp)
-}
-
-clean_and_write<-function(url, output_dir, scan=FALSE,geno_output_file="geno_prob.csv", pheno_output_file="pheno.csv", new_gmap_file="gmap.csv",
-                          result_file="rqtl_result.csv",
-                          indi_droprate=0.0, trait_droprate=0.0, nseed=100, ncores=1, error_prob=0.002, stepsize=0){
-
-  bxd = getdata(url)
-  print("got data from url")
-
-  dir.create(output_dir, recursive=TRUE)
-  geno_output_file <- file.path(output_dir, geno_output_file)
-  pheno_output_file <- file.path(output_dir, pheno_output_file)
-  new_gmap_file <- file.path(output_dir, new_gmap_file)
-  result_file <- file.path(output_dir, result_file)
-  scan <- scan == "TRUE"
-
-  # innerjoin
-  # pick out shared bxd ids in geno and pheno
-  bxd_ids <- ind_ids_gnp(bxd)
-  cat("dimention of bxd_ids:", dim(bxd_ids))
-  joint_bxd <- subset(bxd, ind = bxd_ids)
-
-  # pick out the ones with no missing data
-  filled_ids <- ind_ids(joint_bxd)[complete.cases(joint_bxd$pheno)]
-  cat("dimention of filled_ids :", dim(filled_ids))
-  filled_bxd = subset(joint_bxd, ind = filled_ids)
-
-  # calculate genotype probablity
-  pr = calc_gprob_update_gmap(new_gmap_file, filled_bxd, ncores, error_prob, stepsize, FALSE)
-  prob1 = getGenopr(pr)
-  print("calculating geno prob done")
-  cat("dimention of geno :", dim(prob1))
-
-  write.csv(filled_bxd$pheno, file = pheno_output_file)
-  write.csv(prob1, file = geno_output_file)
-  print("writing out pheno and geno done")
-
-  if(scan){
-    print("Doing genome scan")
-    tic()
-    out = scan1(pr, filled_bxd$pheno, cores=32)
-    toc()
-    print("writing out rqtl result file.")
-    write.csv(out,file=result_file)
-  }
-}
-
-args = commandArgs(trailingOnly=TRUE)
-clean_and_write(args[1], args[2], args[3])
diff --git a/sh/rqtl-julia.sh b/sh/rqtl-julia.sh
deleted file mode 100755
index 7b56be2..0000000
--- a/sh/rqtl-julia.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-# Assuming you are in LMGPU directory
-
-# Input in Rqtl2 format.
-URL="./data/HC_M2_0606_R.zip"
-# Intermediate and final scan result will be stored here.
-output_dir="./data/HIPPO_CLEAN_DATA/"
-# Do genome scan with R/qtl2, default is False. Only True if we need to compare genome scan results produced by LMGPU.
-scan="FALSE"
-
-time Rscript --vanilla ./r/cleaning.R $URL $output_dir $scan
-
-# If export_matrix set to true, then the entire LOD score matrix will be exported. If false, only maximum lod and related gmpa info will be exported.
-export_matrix="false"
-# genome scan results.
-output_file="julia_result.csv"
-# rqtl_file is needed to find gmap.csv.
-rqtl_file="./data/HC_M2_0606_R.zip"
-
-
-time JULIA_NUM_THREADS=16 ./bin/MyAppCompiled/bin/MyApp $output_dir $output_file $rqtl_file $export_matrix
diff --git a/sh/run_lmgpu.sh b/sh/run_lmgpu.sh
deleted file mode 100755
index dec4b7e..0000000
--- a/sh/run_lmgpu.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-# Assuming you are in LMGPU directory
-
-
-geno_file="../data/cleandata/geno_prob.csv"
-pheno_file="../data/cleandata/traits.csv"
-export_matrix="false"
-output_file="../data/results/output.csv"
-rqtl_file="../data/UTHSC_SPL_RMA_1210.zip"
-
-time JULIA_NUM_THREADS=8 ./MyAppCompiled/bin/MyApp $geno_file $pheno_file $export_matrix $output_file $rqtl_file
diff --git a/src/LMGPU.jl b/src/LMGPU.jl
index 45132b1..23fbff8 100644
--- a/src/LMGPU.jl
+++ b/src/LMGPU.jl
@@ -6,21 +6,19 @@ using LinearAlgebra
 using Base.Threads
 using ZipFile
 using CUDA
-# using CuArrays
-# using CUDAnative
-# using CUDAdrv
-# import CuArrays.CuArray
+
 
 #put all your source file here.
 include("data_io.jl")
+export get_geno_data, get_pheno_data, get_gmap_file
 include("util.jl")
 include("cpu.jl")
+export cpurun 
 include("gpu.jl")
 include("common_func.jl")
 include("match_gmap_info.jl")
+export  get_gmap_info, match_gmap
 # include("cli.jl")
 
-#put all your public functions (functions that you want user to use) here.
-export get_geno_data, get_pheno_data, cpurun, get_gmap_info, match_gmap #, gpurun
 
 end # module
diff --git a/src/cpu.jl b/src/cpu.jl
index 63e06a7..387e082 100644
--- a/src/cpu.jl
+++ b/src/cpu.jl
@@ -61,7 +61,7 @@ end
 
 
 ##################### Running CPU Function ###################
-function cpurun(a::AbstractArray{<:Real, 2}, b::AbstractArray{<:Real, 2}, n::Int, maxlod::Bool)
+function cpurun(a::AbstractArray{<:Real, 2}, b::AbstractArray{<:Real, 2}, n::Int, export_matrix::Bool)
     a_std = get_standardized_matrix(a);
     b_std = get_standardized_matrix(b);
     #step 2: calculate R, matrix of corelation coefficients
@@ -70,8 +70,9 @@ function cpurun(a::AbstractArray{<:Real, 2}, b::AbstractArray{<:Real, 2}, n::Int
     # lod = lod_score(n, r);
     lod = lod_score_multithread(n,r)
 
-    if maxlod 
-        println("exporting max lod")
+
+    if !export_matrix 
+        println("Calculating max lod")
         return find_max_idx_value(lod)
     else 
         println("exporting matrix.")
diff --git a/src/data_io.jl b/src/data_io.jl
index 0b13f5d..b4e884f 100644
--- a/src/data_io.jl
+++ b/src/data_io.jl
@@ -12,13 +12,24 @@ end
 #     return convert(Array{datatype,2}, pheno)
 # end
 
+function try_string2num(num)
+    return tryparse(Float64,num) != nothing
+end
+
 
 function get_pheno_data(file, datatype; transposed=true)
 
-    #first column is individual ID such as : BXD1
-    pheno = readdlm(file, ','; skipstart=1)
+    #first column is individual ID such as : BXD1 , need to be removed. 
+    pheno = readdlm(file, ','; skipstart=1)[:, 2:end]
+
+    if pheno[1,end] == "f" || pheno[1,end] == "m"
+        @info "Removing sex column of phenotype. "
+        pheno = pheno[:, 1:end-1]
+        
+    end
+
     pheno = convert(Array{datatype,2}, pheno)
-    # pheno = convert2float.(pheno, datatype)
+
     if transposed 
         return transpose(pheno) |> collect
     else 
diff --git a/src/match_gmap_info.jl b/src/match_gmap_info.jl
index 57c02f4..7287865 100644
--- a/src/match_gmap_info.jl
+++ b/src/match_gmap_info.jl
@@ -16,24 +16,8 @@ function extension(url::String)
     end
 end
 
-function get_gmap_info(rqtl_file)
-
-    # if passing in rqtl_file as a zip, extract gmap file.
-    if extension(rqtl_file) == ".zip"
-        dir = ZipFile.Reader(rqtl_file)
-        f = findfile(dir, "gmap.csv")
-        gmap = readdlm(f, ',')
-        close(dir)
-    # if passing in just gmap file.
-    elseif extension(rqtl_file) == ".csv"
-        if occursin("gmap.csv", rqtl_file)
-            gmap = readdlm(rqtl_file, ',')
-        else
-            error("no gmap file found.")
-        end
-    else
-        error("Rqtl file is not passed in as a .zip, need to handle this.")
-    end
+function get_gmap_info(gmap_file)
+    gmap = readdlm(gmap_file, ',', header=true)
     return gmap
 
 end