Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Fork dask-jobqueue's testing suite images #193

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions ci/LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Copyright (c) 2018, Anaconda, Inc. and contributors
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

Neither the name of Anaconda nor the names of any contributors may be used to
endorse or promote products derived from this software without specific prior
written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
13 changes: 13 additions & 0 deletions ci/conda_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash

set -e
set -x

# Install miniconda
wget http://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
bash ~/miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
conda update conda --yes
conda clean -tipy
conda config --set always_yes yes --set changeps1 no
conda --version
7 changes: 7 additions & 0 deletions ci/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: dask-jobqueue
channels:
- conda-forge
- defaults
dependencies:
- python=3.8
- juliaup
42 changes: 42 additions & 0 deletions ci/htcondor.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env bash

function jobqueue_before_install {
docker version
docker-compose version

# start htcondor cluster
cd ./ci/htcondor
docker-compose pull
./start-htcondor.sh
docker-compose exec -T submit /bin/bash -c "condor_status"
docker-compose exec -T submit /bin/bash -c "condor_q"
cd -

#Set shared space permissions
docker-compose exec -T submit /bin/bash -c "chmod -R 777 /shared_space"

docker ps -a
docker images
}

function jobqueue_install {
cd ./ci/htcondor
docker-compose exec -T submit /bin/bash -c "cd /dask-jobqueue; pip3 install -e .;chown -R submituser ."
cd -
}

function jobqueue_script {
cd ./ci/htcondor
docker-compose exec -T --user submituser submit /bin/bash -c "cd; pytest /dask-jobqueue/dask_jobqueue --log-cli-level DEBUG --capture=tee-sys --verbose -E htcondor "
cd -
}

function jobqueue_after_script {
cd ./ci/htcondor
docker-compose exec -T --user submituser submit /bin/bash -c "condor_q"
docker-compose exec -T submit /bin/bash -c "condor_status"
docker-compose exec -T --user submituser submit /bin/bash -c "condor_history"
docker-compose exec -T --user submituser submit /bin/bash -c "cd; cat logs/*"
docker-compose exec -T cm /bin/bash -c " grep -R \"\" /var/log/condor/ "
cd -
}
15 changes: 15 additions & 0 deletions ci/htcondor/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM htcondor/submit:el7 as submit

RUN curl -o miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash miniconda.sh -f -b -p /opt/anaconda && \
/opt/anaconda/bin/conda clean -tipy && \
rm -f miniconda.sh
ENV PATH /opt/anaconda/bin:$PATH
# environment.yml file is copied by CI script. If manually building, you should copy it too from parent directory
COPY environment.yml .
RUN conda env update -n base --file environment.yml

FROM htcondor/execute:el7 as execute

COPY --from=submit /opt/anaconda /opt/anaconda
ENV PATH /opt/anaconda/bin:$PATH
1 change: 1 addition & 0 deletions ci/htcondor/condor_config.local
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NEGOTIATOR_INTERVAL=10
66 changes: 66 additions & 0 deletions ci/htcondor/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
version: "3.4"

services:
cm:
image: htcondor/cm:el7
hostname: cm.htcondor
environment:
- USE_POOL_PASSWORD=yes
volumes:
- secrets:/root/secrets
- ./condor_config.local:/etc/condor/condor_config.local
command: bash -c 'condor_store_cred -p password -f /root/secrets/pool_password ; exec bash -x /start.sh'

submit:
image: daskdev/dask-jobqueue:htcondor-submit
build:
context: .
target: submit
hostname: submit.htcondor
environment:
- CONDOR_HOST=cm
- USE_POOL_PASSWORD=yes
- CI_SHARED_SPACE=/shared_space
depends_on:
- cm
volumes:
- secrets:/root/secrets
- ../..:/dask-jobqueue
- ./condor_config.local:/etc/condor/condor_config.local
- shared_space:/shared_space

execute1:
image: daskdev/dask-jobqueue:htcondor-execute
build:
context: .
target: execute
hostname: execute1.htcondor
environment:
- CONDOR_HOST=cm
- USE_POOL_PASSWORD=yes
depends_on:
- cm
volumes:
- secrets:/root/secrets
- ./condor_config.local:/etc/condor/condor_config.local
- shared_space:/shared_space

execute2:
image: daskdev/dask-jobqueue:htcondor-execute
build:
context: .
target: execute
hostname: execute2.htcondor
environment:
- CONDOR_HOST=cm
- USE_POOL_PASSWORD=yes
depends_on:
- cm
volumes:
- secrets:/root/secrets
- ./condor_config.local:/etc/condor/condor_config.local
- shared_space:/shared_space

volumes:
secrets:
shared_space:
10 changes: 10 additions & 0 deletions ci/htcondor/start-htcondor.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

docker-compose up -d --no-build

while [ `docker-compose exec -T submit condor_status -af activity|grep Idle|wc -l` -ne 2 ]
do
echo "Waiting for cluster to become ready";
sleep 2
done
echo "HTCondor properly configured"
20 changes: 20 additions & 0 deletions ci/none.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash

function jobqueue_before_install {
true # Pass
}

function jobqueue_install {
which python
pip install --no-deps -e .
}

function jobqueue_script {
flake8 -j auto dask_jobqueue
black --exclude versioneer.py --check .
pytest --verbose
}

function jobqueue_after_script {
echo "Done."
}
43 changes: 43 additions & 0 deletions ci/pbs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env bash

function jobqueue_before_install {
docker version
docker-compose version

# start pbs cluster
cd ./ci/pbs
docker-compose pull
./start-pbs.sh
cd -

#Set shared space permissions
docker exec pbs_master /bin/bash -c "chmod -R 777 /shared_space"
docker exec pbs_master /bin/bash -c "chown -R pbsuser:pbsuser /home/pbsuser"

docker exec -u pbsuser pbs_master pbsnodes -a
docker ps -a
docker images
}

function jobqueue_install {
docker exec pbs_master /bin/bash -c "cd /dask-jobqueue; pip install -e .; chown -R pbsuser ."
}

function jobqueue_script {
docker exec -u pbsuser pbs_master /bin/bash -c "cd; pytest /dask-jobqueue/dask_jobqueue --verbose -s -E pbs"
}

function jobqueue_after_script {
docker exec -u pbsuser pbs_master qstat -fx
docker exec pbs_master bash -c 'cat /var/spool/pbs/sched_logs/*|| true'
docker exec pbs_master bash -c 'cat /var/spool/pbs/server_logs/*|| true'
docker exec pbs_master bash -c 'cat /var/spool/pbs/server_priv/accounting/*|| true'
docker exec pbs_slave_1 bash -c 'cat /var/spool/pbs/mom_logs/*|| true'
docker exec pbs_slave_1 bash -c 'cat /var/spool/pbs/spool/*|| true'
docker exec pbs_slave_1 bash -c 'cat /tmp/*.e*|| true'
docker exec pbs_slave_1 bash -c 'cat /tmp/*.o*|| true'
docker exec pbs_slave_2 bash -c 'cat /var/spool/pbs/mom_logs/*|| true'
docker exec pbs_slave_2 bash -c 'cat /var/spool/pbs/spool/*|| true'
docker exec pbs_slave_2 bash -c 'cat /tmp/*.e*|| true'
docker exec pbs_slave_2 bash -c 'cat /tmp/*.o*|| true'
}
42 changes: 42 additions & 0 deletions ci/pbs/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# inspired from https://github.com/PBSPro/pbspro/blob/v18.1.beta/docker/centos7/
# multi-stage build
# build script will be triggered
FROM centos:7.5.1804 AS builder
# install dependencies for building
RUN yum install -y gcc make rpm-build libtool hwloc-devel libX11-devel \
libXt-devel libedit-devel libical-devel ncurses-devel perl \
postgresql-devel python-devel tcl-devel tk-devel swig expat-devel \
openssl-devel libXext libXft git postgresql-contrib
# get known PBS Pro source code
RUN git clone --branch release_18_1_branch https://github.com/pbspro/pbspro.git /src/pbspro
COPY build.sh /
RUN bash /build.sh

# base image
FROM centos:7.5.1804
LABEL description="PBS Professional Open Source and conda"

#The pbs master node name, can be overridden if needed
ENV PBS_MASTER pbs_master
ENV PATH /opt/pbs/bin:/opt/anaconda/bin:$PATH
ENV LANG en_US.UTF-8
ENV LC_ALL en_US.UTF-8

COPY --from=builder /root/rpmbuild/RPMS/x86_64/pbspro-server-*.rpm .
# install pbspro and useful packages
RUN yum install -y pbspro-server-*.rpm curl bzip2 git gcc sudo openssh-server && yum clean all
# install python
RUN curl -o miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash miniconda.sh -f -b -p /opt/anaconda && \
/opt/anaconda/bin/conda clean -tipy && \
rm -f miniconda.sh
# environment.yml file is copied by CI script. If manually building, you should copy it too from parent directory
COPY environment.yml .
RUN conda env update -n base --file environment.yml

# Copy entrypoint and other needed scripts
COPY ./*.sh /
RUN chmod a+x ./*.sh

# default entrypoint launch pbs master
ENTRYPOINT ["bash", "/master-entrypoint.sh"]
11 changes: 11 additions & 0 deletions ci/pbs/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
cd /src/pbspro
./autogen.sh
./configure -prefix=/opt/pbs
make dist
mkdir /root/rpmbuild /root/rpmbuild/SOURCES /root/rpmbuild/SPECS
cp pbspro-*.tar.gz /root/rpmbuild/SOURCES
cp pbspro.spec /root/rpmbuild/SPECS
cp pbspro-rpmlintrc /root/rpmbuild/SOURCES
cd /root/rpmbuild/SPECS
rpmbuild -ba pbspro.spec
54 changes: 54 additions & 0 deletions ci/pbs/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
version: "2"

services:

master:
image: daskdev/dask-jobqueue:pbs
build: .
container_name: pbs_master
hostname: pbs_master
environment:
- CI_SHARED_SPACE=/shared_space
volumes:
- ../..:/dask-jobqueue
- userhome:/home/pbsuser
- shared_space:/shared_space
command: bash /run-master.sh

slave_one:
image: daskdev/dask-jobqueue:pbs
build: .
container_name: pbs_slave_1
hostname: pbs_slave_1
volumes:
- userhome:/home/pbsuser
- shared_space:/shared_space
entrypoint: "bash /slave-entrypoint.sh"
command: bash /run-slave.sh
links:
- "master:pbs_master"
environment:
- PBS_MASTER=pbs_master
depends_on:
- master

slave_two:
image: daskdev/dask-jobqueue:pbs
build: .
container_name: pbs_slave_2
hostname: pbs_slave_2
volumes:
- userhome:/home/pbsuser
- shared_space:/shared_space
entrypoint: "bash /slave-entrypoint.sh"
command: bash /run-slave.sh
links:
- "master:pbs_master"
environment:
- PBS_MASTER=pbs_master
depends_on:
- master

volumes:
userhome:
shared_space:
16 changes: 16 additions & 0 deletions ci/pbs/master-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/sh
pbs_conf_file=/etc/pbs.conf
mom_conf_file=/var/spool/pbs/mom_priv/config
hostname=$(hostname)

# replace hostname in pbs.conf and mom_priv/config
sed -i "s/PBS_SERVER=.*/PBS_SERVER=$hostname/" $pbs_conf_file
sed -i "s/\$clienthost .*/\$clienthost $hostname/" $mom_conf_file

# start PBS Pro
/etc/init.d/pbs start

# create default non-root user
adduser pbsuser

exec "$@"
13 changes: 13 additions & 0 deletions ci/pbs/run-master.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

# Reduce time between PBS scheduling and add history
qmgr -c "set server scheduler_iteration = 20"
qmgr -c "set server job_history_enable = True"
qmgr -c "set server job_history_duration = 24:00:00"

# add two slaves to pbs
qmgr -c "create node pbs_slave_1"
qmgr -c "create node pbs_slave_2"

# Start hanging process to leave the container up and running
sleep infinity
Loading
Loading