Skip to content

Commit

Permalink
Missing tools/packages to Python3/R containers (#281)
Browse files Browse the repository at this point in the history
* Missing tools/packages to Python3/R containers
Fixes #275

* revise bioconductor install script

* revise

* fix github.R script

* fix for no version numbers

* revise miniwdl test

* build fixes

* lowercase hyprcoloc

* fix install of some packages

* cleanup

* early Sept packages

* downgrade R

* bioconductor 3.18

* roll back

* fix failing LDpred2 tests

* remove --col-bp flag/arg

* run all builds

* changelog
  • Loading branch information
espenhgn authored Oct 17, 2024
1 parent 5087cc3 commit b352180
Show file tree
Hide file tree
Showing 11 changed files with 175 additions and 112 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docker_build_push_gwas.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# action to build and push the gwas Docker and Singularity image at each tag (for release)
name: build gwas Docker image
name: build & push gwas Docker image

on:
push:
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ If MD5 sum is not listed for a certain release then it means that the container

### Added

* Add R packages lightgbm, EFAtools, RiskScorescvd, glmnet, survival, caret, PooledCohort, genio, HyPrColoc
* Add Python3 packages miniwdl, miniwdl-slurm, dxpy
* Add unit test runs as part of the GitHub Actions workflow for building Docker containers
* Add Python packages `imbalanced-learn, lightgbm, openpyxl` + PRSice_linux binary to `python3.sif` container
* Add Conda environment file for project dependencies
Expand All @@ -37,6 +39,7 @@ If MD5 sum is not listed for a certain release then it means that the container

### Updated

* update R to 4.4.1 in `r.sif` container (from 4.0.5); update R packages to Posit/CRAN/BioConductor dated 2024.09.01; BioConductor version 3.19 (from 3.12)
* update testing scripts to support both Docker and Singularity containers
* Update REGENIE binary to version 3.6 in `gwas.sif` container
* Update LDAK binary to version 6 in gwas.sif (from 5.2)
Expand All @@ -57,6 +60,7 @@ If MD5 sum is not listed for a certain release then it means that the container

### Fixed

* Fixed broken unit test in `tests/test_LDpred2/scripts/ld.sh`
* Fixed broken unit test `tests/test_gwas.py::test_gwas_metal` with Apptainer "sandbox" mode
* Workaround for pandas import before scipy in python codes via `export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH`
* Fixed brittle tests if `TMPDIR` is not `/tmp`
Expand Down
3 changes: 3 additions & 0 deletions docker/dockerfiles/python3/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ RUN mamba install python=3.10.6 \
intervaltree=3.1.0 \
matplotlib-venn=0.11.5 \
matplotlib=3.6.0 \
miniwdl \
miniwdl-slurm \
more-itertools=9.0.0 \
numdifftools=0.9.39 \
numba=0.60.0 \
Expand All @@ -53,6 +55,7 @@ RUN mamba install python=3.10.6 \
RUN pip install --no-cache-dir cbgen==1.0.2 && \
pip install --no-cache-dir bgen-reader==4.0.8 && \
pip install --no-cache-dir bed-reader==0.2.24 && \
pip install --no-cache-dir dxpy==0.383.1 && \
pip install --no-cache-dir pysnptools==0.5.7 && \
pip install --no-cache-dir fastlmm==0.6.3 && \
pip install --no-cache-dir plinkio==0.9.8 && \
Expand Down
45 changes: 27 additions & 18 deletions docker/dockerfiles/r/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
# R container
FROM rocker/verse:4.0.5
FROM rocker/verse:4.4.1
# based on Ubuntu 22.04.5 LTS (jammy)

ENV TZ=Europe
ENV DEBIAN_FRONTEND=noninteractive

# Essential tools
WORKDIR /tmp
COPY /scripts/apt_get_essential.sh .
RUN bash apt_get_essential.sh && \
rm apt_get_essential.sh

# deps for R packages
# deps for R packages;
RUN apt-get update -y && apt-get install -y --no-install-recommends \
libperl-dev=5.30.0-9ubuntu0.5 \
libgslcblas0=2.5+dfsg-6+deb10u1build0.20.04.1 \
lmodern=2.004.5-6 \
texlive-latex-extra=2019.202000218-1 && \
apt-utils=2.4.13 \
ca-certificates=20240203~22.04.1 && \
update-ca-certificates && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN apt-get update -y && apt-get install -y --no-install-recommends \
curl=7.81.0-1ubuntu1.18 \
libperl-dev=5.34.0-3ubuntu1.3 \
libgslcblas0=2.7.1+dfsg-3 \
lmodern=2.004.5-6.1 \
texlive-latex-extra=2021.20220204-1 && \
wget=1.21.2-2ubuntu1.1 \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

Expand All @@ -25,18 +32,10 @@ COPY /scripts/install_gcta.sh /tmp/gcta/
RUN bash /tmp/gcta/install_gcta.sh && \
rm -rf /tmp/gcta

# PRSice-2.
# The container running PRSice.R script with "--prsice PRSice_linux"
# argument must also contain the PRSice_linux binary)
WORKDIR /tmp/prsice
COPY /scripts/install_prsice.sh /tmp/prsice/
RUN bash /tmp/prsice/install_prsice.sh && \
rm -rf /tmp/prsice

# R packages.
# CRAN packages
WORKDIR /tmp/
RUN R -e "install.packages('devtools', version='2.4.5', repos='https://packagemanager.posit.co/cran/__linux__/focal/2023-02-16', dependencies=c('Depends', 'Imports', 'LinkingTo'))"
RUN R -e "install.packages('devtools', repos='https://packagemanager.posit.co/cran/__linux__/jammy/2024-09-01', dependencies=c('Depends', 'Imports', 'LinkingTo'))"
COPY /scripts/R/cran.R .
RUN Rscript cran.R

Expand All @@ -55,4 +54,14 @@ COPY /scripts/R/source.R .
RUN Rscript source.R && \
rm -rf /tmp/*

# Misc. binaries
# PRSice-2.
# The container running PRSice.R script with "--prsice PRSice_linux"
# argument must also contain the PRSice_linux binary)
WORKDIR /tmp/prsice
COPY /scripts/install_prsice.sh /tmp/prsice/
RUN bash /tmp/prsice/install_prsice.sh && \
rm -rf /tmp/prsice


WORKDIR /tools
15 changes: 14 additions & 1 deletion docker/scripts/R/bioconductor.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# set up
dependencies <- c('Depends', 'Imports', 'LinkingTo')
upgrade <- 'default'

url = "https://packagemanager.posit.co/bioconductor/__linux__/jammy/2024-09-02"
# Configure BioCManager to use Posit Package Manager:
options(BioC_mirror = "https://packagemanager.posit.co/bioconductor/2024-09-02")
options(BIOCONDUCTOR_CONFIG_FILE = "https://packagemanager.posit.co/bioconductor/2024-09-02/config.yaml")

# Configure a CRAN snapshot compatible with Bioconductor 3.19:
options(repos = c(CRAN = "https://packagemanager.posit.co/cran/__linux__/jammy/2024-09-01"))

# Bioconductor packages
packages <- c(
'AnnotationDbi',
Expand All @@ -16,10 +28,11 @@ packages <- c(
'zlibbioc')

# install package from Bioconductor and quit with error if installation fails
library(devtools)
for (package in packages) {
tryCatch(
{
BiocManager::install(package, version='3.12')
BiocManager::install(package, version='3.19')
},
error = function(e) {
cat("Error occurred during package installation:\n")
Expand Down
176 changes: 94 additions & 82 deletions docker/scripts/R/cran.R
Original file line number Diff line number Diff line change
@@ -1,96 +1,108 @@
require(devtools)
url <- "https://packagemanager.posit.co/cran/__linux__/focal/2023-02-16"
url <- "https://packagemanager.posit.co/cran/__linux__/jammy/2024-09-01"
dependencies <- c('Depends', 'Imports', 'LinkingTo')
upgrade <- 'default'

options(repos = c(CRAN = url))

# CRAN packages w. version
packages <- list(
argparser = '0.7.1',
arsenal = '3.6.3',
bigreadr = '0.2.5',
bigsnpr = '1.11.6',
BiocManager = '1.30.19',
brant = '0.3-0',
brms = '2.18.0',
carat = '2.1.0',
circlize = '0.4.15',
correlation = '0.8.3',
corrplot = '0.92',
CPBayes = '1.1.0',
DescTools = '0.99.47',
'data.table' = '1.14.6',
dendextend = '1.16.0',
dplyr = '1.1.0',
fastICA = '1.2-3',
fcfdr = '1.0.0',
flextable = '0.8.5',
fmsb = '0.7.5',
foreign = '0.8-84',
GCPBayes = '4.0.0',
geepack = '1.3.9',
ggalluvial = '0.12.4',
ggcorrplot = '0.1.4',
ggplot2 = '3.4.1',
ggseg = '1.6.5',
ggseg3d = '1.6.3',
ggstar = '1.0.4',
ggthemes = '4.2.4',
ggpubr = '0.6.0',
glue = '1.6.2',
gplots = '3.1.3',
gtsummary = '1.7.0',
Haplin = '7.3.0',
homologene = '1.4.68.19.3.27',
imputeMissings = '0.0.3',
jtools = '2.2.1',
lavaan = '0.6-14',
lmerTest = '3.1-3',
magrittr = '2.0.3',
MatrixEQTL = '2.3',
mgcv = '1.8-41', # has gamm function
miniCRAN = '0.2.16',
moments = '0.14.1',
MplusAutomation = '1.1.0',
MultiABEL = '1.1-6',
MVN = '5.9',
mvtnorm = '1.1-3',
optparse = '1.7.3',
parameters = '0.20.2',
patchwork = '1.1.2',
pracma = '2.4.2',
PredictABEL = '1.2-4',
pROC = '1.18.0',
qqman = '0.1.8',
reghelper = '1.1.1',
remotes = '2.4.2',
rjags = '4-13',
ROCR = '1.0-11',
rmarkdown = '2.20',
rstan = '2.21.8',
runonce = '0.2.3',
scales = '1.2.1',
semptools = '0.2.9.6',
seqminer = '8.6',
semTools = '0.5-6',
simplecolors = '0.1.1',
sp = '1.6-0',
splines2 = '0.4.7',
stringr = '1.5.0',
survey = '4.1-1',
survminer = '0.4.9',
tibble = '3.1.8',
tidyr = '1.3.0',
tree = '1.0-43',
vctrs = '0.5.2',
xgboost = '1.7.3.1')
'argparser',
'arsenal',
'bigreadr',
'bigsnpr',
'BiocManager',
'brant',
'brms',
'carat',
'caret',
'circlize',
'correlation',
'corrplot',
'CPBayes',
'DescTools',
'data.table',
'dendextend',
'dplyr',
'EFAtools',
'fastICA',
'fcfdr',
'flextable',
'fmsb',
'foreign',
'GCPBayes',
'geepack',
'genio',
'ggalluvial',
'ggcorrplot',
'ggplot2',
'ggseg',
'ggseg3d',
'ggstar',
'ggthemes',
'ggpubr',
'glmnet',
'glue',
'gplots',
'gtsummary',
'Haplin',
'homologene',
'imputeMissings',
'jtools',
'lavaan',
'lightgbm',
'lmerTest',
'magrittr',
'MatrixEQTL',
'mgcv', # has gamm function
'miniCRAN',
'moments',
'MplusAutomation',
'MVN',
'mvtnorm',
'optparse',
'parameters',
'patchwork',
'PooledCohort',
'pracma',
'PredictABEL',
'pROC',
'qqman',
'r2redux',
'reghelper',
'remotes',
'RiskScorescvd',
'rjags',
'ROCR',
'rmarkdown',
'rstan',
'runonce',
'scales',
'semptools',
'seqminer',
'semTools',
'simplecolors',
'sp',
'splines2',
'stringr',
'susieR',
'survey',
'survival',
'survminer',
'tibble',
'tidyr',
'tree',
'vctrs',
'xgboost')

# install package from CRAN and quit with error if installation fails
for (package in names(packages)) {
version <- packages[[package]]
# for (package in names(packages)) {
for (package in packages) {
# version <- packages[[package]]

tryCatch(
{
devtools::install_version(package, version=version, repos=url, dependencies=dependencies, upgrade=upgrade)
devtools::install_cran(package, dependencies=dependencies, upgrade=upgrade)
},
error = function(e) {
cat("Error occurred during package installation:\n")
Expand Down
20 changes: 13 additions & 7 deletions docker/scripts/R/github.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
url <- "https://packagemanager.posit.co/cran/__linux__/focal/2023-02-16"
url <- "https://packagemanager.posit.co/cran/__linux__/jammy/2024-09-01"
dependencies <- c('Depends', 'Imports', 'LinkingTo')
upgrade <- 'default'
auth_token <- Sys.getenv("github_pat")
Expand All @@ -8,8 +8,8 @@ cat("GitHub PAT length: ", nchar(auth_token), "\n")
packages <- list(
'alexploner/cfdr.pleio' = '76d5085e6d3f3ca9576d5d7564d2acf11bcfd021',
'dajiangliu/rareGWAMA' = '72e962dae19dc07251244f6c33275ada189c2126',
'deepchocolate/glm-extras' = '91f8d53c886b27b7c9941df6c3233f99981323a8',
'GenomicSEM/GenomicSEM' = 'd3ddccc2825228cde27a70f155cdbcde9ebcdf68',
'deepchocolate/glm-extras' = 'ecba68c0378fc953edf8fe594ce914aff8ada6fa',
'GenomicSEM/GenomicSEM' = '199d16f5e45c040a25ee0c885338ebd667277135',
'jamesliley/cfdr' = 'ec5fddbd27c746a470eb827dc249a80194b231e8',
'jamesliley/cFDR-common-controls' = '9b923fea283e2373ee8effa2909620a1930004bd',
# gwasvcf deps:
Expand All @@ -22,13 +22,13 @@ packages <- list(
'gqi/MRMix'='56afdb2bc96760842405396f5d3f02e60e305039',
'WSpiller/RadialMR' = '0ed91f83aebf265a09482561c128c830e58ed697',
'MRCIEU/TwoSampleMR' = '578c68fa754c57d764553812bf85d69ecf43b011',
'stephenslab/susieR' = 'ced6a9c83a45f792d4d2ef2a9ae0846e164bf92c',
'MRCIEU/gwasglue' = 'c2d5660eed389e1a9b3e04406b88731d642243f1',
'noahlorinczcomi/MRBEE' = '96971e346099b89585a6eff4a6f22bbcf25d6ca8',
'noahlorinczcomi/MRBEE' = '6295549a1f5a158c6701eb793646d60c8aef11ca',
'norment/normentR' = 'dfa1fbae9587db6c3613b0405df4f9cfa98ee0e1',
'psychgen/phenotools' = 'b744d927a1302d85152917f3802a2212093d588a',
'psychgen/phenotools' = '9eefa4ee0e8ea00bcbdb0e579dcdd7912cfe0597',
'wouterpeyrot/CCGWAS' = 'ce9764da946189623a0164f156ad119773bc32f5',
'WSpiller/MVMR' = '6adf8839a33fbe225c0161c564a517dfd61cee32'
'WSpiller/MVMR' = '65705da9421b6235c7458dba6f01cddfebfe96f5',
'xiashen/MultiABEL' = '7067fe6753c74f6580029abc82bce914472b4b16'
)

# install package from GitHub and quit with error if installation fails
Expand All @@ -48,3 +48,9 @@ for (package in names(packages)) {
}
)
}


# misc. packages
library(remotes)
remotes::install_version('RcppEigen', version = '0.3.3.9.3')
remotes::install_github('jrs95/hyprcoloc', build_opts = c('--resave-data', '--no-manual'), upgrade = 'never')
Loading

0 comments on commit b352180

Please sign in to comment.