diff --git a/.env.template b/.env.template new file mode 100644 index 00000000..f51ba9e7 --- /dev/null +++ b/.env.template @@ -0,0 +1,7 @@ +# Database configuration +DB_NAME=daisy +DB_USER=daisy +DB_PASSWORD=daisy + +# Backup configuration +BACKUP_VOLUME=../backups diff --git a/README.md b/README.md index 6119dcd4..ed939f5b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Elixir Daisy + ![Build Status](https://github.com/elixir-luxembourg/daisy/actions/workflows/main.yml/badge.svg) [![Python 3.9](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/release/python-396/) @@ -9,299 +10,22 @@ For more information, please refer to the official [Daisy documentation](https:/ DAISY was published as an article [DAISY: A Data Information System for accountability under the General Data Protection Regulation](https://doi.org/10.1093/gigascience/giz140) in GigaScience journal. ## Demo deployment -You are encouraged to try Daisy for yourself using our [DEMO deployment](https://daisy-demo.elixir-luxembourg.org/). - -## Deployment using Docker - -### Requirements - -* docker: https://docs.docker.com/install/ - -### Installation - -1. Get the source code - - ```bash - git clone git@github.com:elixir-luxembourg/daisy.git - cd daisy - ``` -1. Create your settings file - - ```bash - cp elixir_daisy/settings_local.template.py elixir_daisy/settings_local.py - ``` - Optional: edit the file elixir_daisy/settings_local.py to adapt to your environment. - -1. Build daisy docker image - ```bash - docker-compose up --build - ``` - Wait for the build to finish and keep the process running -1. Open a new shell and go to daisy folder - -1. Build the database - - ```bash - docker-compose exec web python manage.py migrate - ``` -1. Build the solr schema - - ```bash - docker-compose exec web python manage.py build_solr_schema -c /solr/daisy/conf -r daisy -u default - ``` - -1. Compile and deploy static files - - ```bash - cd web/static/vendor - npm run build - cd ../../../ - docker-compose exec web python manage.py collectstatic - ``` -1. Create initial data in the database - - ```bash - docker-compose exec web bash -c "cd core/fixtures/ && wget https://git-r3lab.uni.lu/pinar.alper/metadata-tools/raw/master/metadata_tools/resources/edda.json && wget https://git-r3lab.uni.lu/pinar.alper/metadata-tools/raw/master/metadata_tools/resources/hpo.json && wget https://git-r3lab.uni.lu/pinar.alper/metadata-tools/raw/master/metadata_tools/resources/hdo.json && wget https://git-r3lab.uni.lu/pinar.alper/metadata-tools/raw/master/metadata_tools/resources/hgnc.json" - docker-compose exec web python manage.py load_initial_data - ``` - Initial data includes, for instance, controlled vocabularies terms and initial list of institutions and cohorts. - **This step can take several minutes to complete** - -1. Load demo data - - ```bash - docker-compose exec web python manage.py load_demo_data - ``` - This will create mock datasets, projects and create a demo admin account. - -1. Optional - import users from an active directory instance - - ```bash - docker-compose exec web python manage.py import_users - ``` - -1. Build the search index - - ```bash - docker-compose exec web python manage.py rebuild_index -u default - ``` - -1. Browse to https://localhost - a demo admin account is available: - - ``` - username: admin - password: demo - ``` - -### Linting - -pip install black==23.7.0 -pre-commit install -black --check . -black . - - -### Operation manual - - -#### Importing - -In addition to loading of initial data, DAISY database can be populated by importing Project, Dataset and Partners records from JSON files using commands `import_projects`, `import_datasets` and `import_partners` respectively. - The commands for import are accepting one JSON file (flag `-f`):
- -```bash -docker-compose exec web python manage.py -f ${PATH_TO_JSON_FILE} -``` -where ${PATH_TO_JSON_FILE} is the path to a json file containing the records definitions. -See file daisy/data/demo/projects.json as an example. - -Alternatively, you can specify directory containing multiple JSON files to be imported with `-d` flag: -```bash -docker-compose exec web python manage.py -d ${PATH_TO_DIR} -``` - -#### Exporting - -Information in the DAISY database can be exported to JSON files. The command for export are given below:
- -```bash -docker-compose exec web python manage.py export_partners -f ${JSON_FILE} -``` -where ${JSON_FILE} is the path to a json file that will be produced. In addition to ````export_partners````, you can run ````export_projects```` and ````export_datasets```` in the same way. - -### Upgrade to last Daisy version - -1. Create a database backup. - - ```bash - docker-compose exec db pg_dump daisy --port=5432 --username=daisy --no-password --clean > backup_`date +%y-%m-%d`.sql - ``` - -1. Make sure docker containers are stopped. - - ```bash - docker-compose stop - ``` - -3. Get last Daisy release. - ```bash - git checkout master - git pull - ``` - -1. Rebuild and start the docker containers. - - ```bash - docker-compose up --build - ``` - Open a new terminal window to execute the following commands. - -1. Update the database schema. - - ```bash - docker-compose exec web python manage.py migrate - ``` - -1. Update the solr schema. - - ```bash - docker-compose exec web python manage.py build_solr_schema -c /solr/daisy/conf -r daisy -u default - ``` - -1. Collect static files. - - ```bash - docker-compose exec web python manage.py collectstatic - ``` - - -1. Rebuild the search index. - - ```bash - docker-compose exec web python manage.py rebuild_index -u default - ``` -1. Reimport the users (optional). - - If LDAP was used during initial setup to import users, they have to be imported again: - - ```bash - docker-compose exec web python manage.py import_users - ``` - -## Deployment without Docker - CentOS - - -See [DEPLOYMENT](DEPLOYMENT.md). - - -## Development - -To be completed. - -### Import users from active directory -```bash -./manage.py import_users -``` - -### Import projects, datasets or partners from external system -Single file mode: -```bash -./manage.py import_projects -f path/to/json_file.json -``` - -Batch mode: -```bash -./manage.py import_projects -d path/to/dir/with/json/files/ -``` - -Available commands: `import_projects`, `import_datasets`, `import_partners`. - -In case of problems, add `--verbose` flag to the command, and take a look inside `./log/daisy.log`. - -### Install js and css dependencies - -```bash -cd web/static/vendor/ -npm ci -``` - -### Compile daisy.scss and React -```bash -cd web/static/vendor -npm run-script build -``` - -### Run the built-in web server (for development) - -```bash -./manage.py runserver -``` - -### Run the tests - -The following command will install the test dependencies and execute the tests: - -```bash -python setup.py pytest -``` -run test for a specific file: -```bash -python setup.py pytest --addopts web/tests/test_dataset.py -``` - -If tests dependencies are already installed, one can also run the tests just by executing: - -```bash -pytest -``` - -## Administration - -To get access to the admin page, you must log in with a superuser account. -On the `Users` section, you can give any user a `staff` status and he will be able to access any project/datasets. +You are encouraged to try Daisy for yourself using our [DEMO deployment](https://daisy-demo.elixir-luxembourg.org/). +## Documentation -## `settings.py` and `local_settings.py` reference +DAISY comes with a **Docker deployment*. For more instructions see the deployment [guide](doc/deployment.md). -### Display -| Key | Description | Expected values | Example value | -|---|---|---|---| -| `COMPANY` | A name that is used to generate verbose names of some models | str | `'LCSB'` | -| `DEMO_MODE` | A flag which makes a simple banneer about demo mode appear in About page | bool | `False` | -| `INSTANCE_LABEL` | A name that is used in navbar header to help differentiate different deployments | str | `'Staging test VM'` | -| `INSTANCE_PRIMARY_COLOR` | A color that will be navbar header's background | str of a color | `'#076505'` | -| `LOGIN_USERNAME_PLACEHOLDER` | A helpful placeholder in login form for logins | str | `'@uni.lu'` | -| `LOGIN_PASSWORD_PLACEHOLDER` | A helpful placeholder in login form for passwords | str | `'Hint: use your AD password'` | +See also our -### Integration with other tools -#### ID Service -| Key | Description | Expected values | Example value | -|---|---|---|---| -| `IDSERVICE_FUNCTION` | Path to a function (`lambda: str`) that generates IDs for entities which are published | str | `'web.views.utils.generate_elu_accession'` | -| `IDSERVICE_ENDPOINT` | In case LCSB's idservice function is being used, the setting contains the IDservice's URI | str | `'https://192.168.1.101/v1/api/` | +- [Backup manual](doc/backup.md) for creating and restoring backups. +- [Update manual](doc/update.md) for migration to newer version. +- [Management and administration manual](doc/administration.md) for regular maintenance tasks including starting/stopping the services, import/export of data, inspecting logs and clean up. +- [Development manual](doc/development.md) for steps to setup the development environment and guidance on how to contribute. -#### REMS -| Key | Description | Expected values | Example value | -|---|---|---|---| -| `REMS_INTEGRATION_ENABLED` | A feature flag for REMS integration. In practice, there's a dedicated endpoint which processes the information from REMS about dataset entitlements | str | `True` | -| `REMS_SKIP_IP_CHECK` | If set to `True`, there will be no IP checking if the request comes from trusted REMS instance. | bool | `False` | -| `REMS_ALLOWED_IP_ADDRESSES` | A list of IP addresses that should be considered trusted REMS instances. Beware of configuration difficulties when using reverse proxies. The check can be skipped with `REMS_SKIP_IP_CHECK` | dict[str] | `['127.0.0.1', '192.168.1.101']` | +For legacy deployment (<1.8.1), please refer to the [Legacy deployment and administration manual](doc/legacy-deployment.md). -#### Keycloak -| Key | Description | Expected values | Example value | -|---|---|---|---| -| `KEYCLOAK_INTEGRATION` | A feature flag for importing user information from Keycloak (OIDC IDs) | bool | `True` | -| `KEYCLOAK_URL` | URL to the Keycloak instance | str | `'https://keycloak.lcsb.uni.lu/auth/'` | -| `KEYCLOAK_REALM_LOGIN` | Realm's login name in your Keycloak instance | str | `'master'` | -| `KEYCLOAK_REALM_ADMIN` | Realm's admin name in your Keycloak instance | str | `'master'` | -| `KEYCLOAK_USER` | Username to access Keycloak | str | `'username'` | -| `KEYCLOAK_PASS` | Password to access Keycloak | str | `'secure123'` | +### Acknowledgement -### Others -| Key | Description | Expected values | Example value | -|---|---|---|---| -| `SERVER_SCHEME` | A URL's scheme to access your DAISY instance (http or https) | str | `'https'` | -| `SERVER_URL` | A URL to access your DAISY instance (without the scheme) | str | `'example.com'` | -| `GLOBAL_API_KEY` | An API key that is not connected with any user. Disabled if set to `None` | optional[str] | `'in-practice-you-dont-want-to-use-it-unless-debugging'` | \ No newline at end of file +This work was supported by [ELIXIR Luxembourg](https://elixir-luxembourg.org/). \ No newline at end of file diff --git a/core/migrations/0037_auto_20240924_1020.py b/core/migrations/0037_auto_20240924_1020.py new file mode 100644 index 00000000..dfe7ff49 --- /dev/null +++ b/core/migrations/0037_auto_20240924_1020.py @@ -0,0 +1,60 @@ +# Generated by Django 3.2.23 on 2024-09-24 08:20 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0036_use_restriction_codes"), + ] + + operations = [ + migrations.AlterField( + model_name="cohort", + name="elu_accession", + field=models.CharField( + blank=True, + help_text="Unique persistent identifier of the record.", + max_length=20, + null=True, + unique=True, + verbose_name="Accession number", + ), + ), + migrations.AlterField( + model_name="dataset", + name="elu_accession", + field=models.CharField( + blank=True, + help_text="Unique persistent identifier of the record.", + max_length=20, + null=True, + unique=True, + verbose_name="Accession number", + ), + ), + migrations.AlterField( + model_name="partner", + name="elu_accession", + field=models.CharField( + blank=True, + help_text="Unique persistent identifier of the record.", + max_length=20, + null=True, + unique=True, + verbose_name="Accession number", + ), + ), + migrations.AlterField( + model_name="project", + name="elu_accession", + field=models.CharField( + blank=True, + help_text="Unique persistent identifier of the record.", + max_length=20, + null=True, + unique=True, + verbose_name="Accession number", + ), + ), + ] diff --git a/doc/administration.md b/doc/administration.md new file mode 100644 index 00000000..dea66011 --- /dev/null +++ b/doc/administration.md @@ -0,0 +1,301 @@ +# Management and administration + +## Access the Web Service + +Shell into the `web` container: + +```bash +docker compose exec web /bin/bash +``` + +### Run Django Commands + +Run Django management commands, e.g. `makemigrations`, `migrate`, `createsuperuser`, etc., using the `web` service: + + +```bash +docker compose exec web python manage.py +``` + +See `docker compose exec web python manage.py --help` for all available commands. + +#### Collect Static Files + +```bash +docker compose exec web python manage.py collectstatic --noinput +``` + +#### Rebuild Solr Index + +```bash +docker compose exec web python manage.py rebuild_index --noinput +``` + +--- + +## Managing Other Services + +### PostgreSQL Database (`db` Service) + +#### Access the Database Shell + +```bash +docker compose exec db psql -U daisy -d +``` + +#### Execute SQL Commands + +Run SQL commands directly: + +```bash +docker compose exec db psql -U daisy -d daisy -c "SELECT * FROM user;" +``` + +### Solr (`solr` Service) + +#### Access Solr Admin Interface + +Solr runs on port `8983`. Access it via: + +``` +http://localhost:8983/solr/ +``` + +### RabbitMQ (`mq` Service) + +#### Access RabbitMQ Management Interface + +RabbitMQ management runs on port `15672`. Access it via: + +``` +http://localhost:15672/ +``` + +- **Username:** `guest` +- **Password:** `guest` + +### Celery Worker (`worker` Service) + +Logs for the Celery worker can be viewed with: + +```bash +docker compose logs -f worker +``` + +### Celery Beat (`beat` Service) + +Logs for Celery Beat can be viewed with: + +```bash +docker compose logs -f beat +``` + +### Flower Monitoring Tool (`flower` Service) + +Access Flower for task monitoring on port `5555`: + +``` +http://localhost:5555/ +``` + +--- + +## Administration + +To access the admin interface: + +1. **Create a Superuser Account:** + + ```bash + docker compose exec web python manage.py createsuperuser + ``` + +2. **Access the Admin Site:** + + ``` + http://localhost/admin/ + ``` + + Log in with your superuser credentials. + +--- + +## Settings Reference + +To get access to the admin page, you must log in with a superuser account. +On the `Users` section, you can give any user a `staff` status and he will be able to access any project/datasets. + +### `local_settings.py` reference + +#### Display + +| Key | Description | Expected values | Example value | +| ---------------------------- | -------------------------------------------------------------------------------- | --------------- | ------------------------------ | +| `COMPANY` | A name that is used to generate verbose names of some models | str | `'LCSB'` | +| `DEMO_MODE` | A flag which makes a simple banneer about demo mode appear in About page | bool | `False` | +| `INSTANCE_LABEL` | A name that is used in navbar header to help differentiate different deployments | str | `'Staging test VM'` | +| `INSTANCE_PRIMARY_COLOR` | A color that will be navbar header's background | str of a color | `'#076505'` | +| `LOGIN_USERNAME_PLACEHOLDER` | A helpful placeholder in login form for logins | str | `'@uni.lu'` | +| `LOGIN_PASSWORD_PLACEHOLDER` | A helpful placeholder in login form for passwords | str | `'Hint: use your AD password'` | + +#### Integration with other tools + +##### ID Service + +| Key | Description | Expected values | Example value | +| -------------------- | ----------------------------------------------------------------------------------------- | --------------- | ------------------------------------------ | +| `IDSERVICE_FUNCTION` | Path to a function (`lambda: str`) that generates IDs for entities which are published | str | `'web.views.utils.generate_elu_accession'` | +| `IDSERVICE_ENDPOINT` | In case LCSB's idservice function is being used, the setting contains the IDservice's URI | str | `'https://192.168.1.101/v1/api/` | + +##### REMS + +| Key | Description | Expected values | Example value | +| --------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------- | -------------------------------- | +| `REMS_INTEGRATION_ENABLED` | A feature flag for REMS integration. In practice, there's a dedicated endpoint which processes the information from REMS about dataset entitlements | str | `True` | +| `REMS_SKIP_IP_CHECK` | If set to `True`, there will be no IP checking if the request comes from trusted REMS instance. | bool | `False` | +| `REMS_ALLOWED_IP_ADDRESSES` | A list of IP addresses that should be considered trusted REMS instances. Beware of configuration difficulties when using reverse proxies. The check can be skipped with `REMS_SKIP_IP_CHECK` | dict[str] | `['127.0.0.1', '192.168.1.101']` | + +##### Keycloak + +| Key | Description | Expected values | Example value | +| ---------------------- | ---------------------------------------------------------------------- | --------------- | -------------------------------------- | +| `KEYCLOAK_INTEGRATION` | A feature flag for importing user information from Keycloak (OIDC IDs) | bool | `True` | +| `KEYCLOAK_URL` | URL to the Keycloak instance | str | `'https://keycloak.lcsb.uni.lu/auth/'` | +| `KEYCLOAK_REALM_LOGIN` | Realm's login name in your Keycloak instance | str | `'master'` | +| `KEYCLOAK_REALM_ADMIN` | Realm's admin name in your Keycloak instance | str | `'master'` | +| `KEYCLOAK_USER` | Username to access Keycloak | str | `'username'` | +| `KEYCLOAK_PASS` | Password to access Keycloak | str | `'secure123'` | + +#### Others + +| Key | Description | Expected values | Example value | +| ---------------- | ------------------------------------------------------------------------- | --------------- | -------------------------------------------------------- | +| `SERVER_SCHEME` | A URL's scheme to access your DAISY instance (http or https) | str | `'https'` | +| `SERVER_URL` | A URL to access your DAISY instance (without the scheme) | str | `'example.com'` | +| `GLOBAL_API_KEY` | An API key that is not connected with any user. Disabled if set to `None` | optional[str] | `'in-practice-you-dont-want-to-use-it-unless-debugging'` | + +--- + +## Additional Tips + +- **Running Custom Commands:** Use `docker compose run` for one-off commands without starting the entire service. + + Example: + + ```bash + docker compose run --rm web python manage.py shell + ``` + +- **Accessing Other Services:** You can shell into other services similarly: + + ```bash + docker compose exec db /bin/bash + docker compose exec solr /bin/bash + ``` + +- **Environment Variables:** Override environment variables when running commands: + + ```bash + DB_NAME=custom_db docker compose up -d + ``` + +--- + +## Docker Compose Services Overview + +- **web:** Django application server using Gunicorn. +- **db:** PostgreSQL database. +- **nginx:** Reverse proxy and static file server. +- **solr:** Apache Solr for full-text search. +- **mq:** RabbitMQ message broker. +- **flower:** Monitoring tool for Celery tasks. +- **worker:** Celery worker for asynchronous tasks. +- **beat:** Celery Beat scheduler. +- **backup:** Manages database backups and restoration. + +--- + +## Logs and Monitoring + +### View Service Logs + +View logs for a specific service: + +```bash +docker compose logs -f +``` + +Replace `` with `web`, `db`, `worker`, etc. + +### Check Container Status + +```bash +docker compose ps +``` + +--- + +## Clean Up + +### Remove All Containers and Volumes + +Stop containers and remove containers, networks, volumes, and images: + +```bash +docker system prune -a +docker compose down -v --rmi all +``` + +## Importing and Exporting Data + +In addition to loading of initial data, DAISY database can be populated by importing Project, Dataset and Partners records from JSON files using commands `import_projects`, `import_datasets` and `import_partners` respectively. JSON files are validated using the [Elixir-LU JSON schemas](https://github.com/elixir-luxembourg/json-schemas). + +### Import Data + +You can import data from JSON files using Django management commands. + +#### Import Projects + +```bash +docker compose exec web python manage.py import_projects -f /path/to/projects.json +``` + +#### Import Datasets + +```bash +docker compose exec web python manage.py import_datasets -f /path/to/datasets.json +``` + +#### Import Partners + +```bash +docker compose exec web python manage.py import_partners -f /path/to/partners.json +``` + +To import multiple JSON files from a directory: + +```bash +docker compose exec web python manage.py import_projects -d /path/to/directory/ +``` + +### Export Data + +Information in the DAISY database can be exported to JSON files. The command for export are given below:
+ +#### Export Projects + +```bash +docker compose exec web python manage.py export_projects -f /path/to/output/projects.json +``` + +#### Export Datasets + +```bash +docker compose exec web python manage.py export_datasets -f /path/to/output/datasets.json +``` + +#### Export Partners + +```bash +docker compose exec web python manage.py export_partners -f /path/to/output/partners.json +``` diff --git a/doc/backup.md b/doc/backup.md new file mode 100644 index 00000000..2bf028a9 --- /dev/null +++ b/doc/backup.md @@ -0,0 +1,116 @@ +# Database Backup and Restore Script + +## Overview + +This manual describes steps to perform for backup creation and its restoration. + +## Key Functions + +- **Backup**: Creates a timestamped `tar.gz` archive of the PostgreSQL database and Django media files. +- **Restore**: Restores from a specified `tar.gz` backup archive. + +## Docker Compose Integration + +The `backup` service in `docker-compose.yaml` manages backup and restore using the `scripts/db.sh` script. + +### Configuration + +All variables can be set in the [environment file](.env.template). These include variables necessary for connection to the database, path to local folder where the backup is created and setup of cron tasks for backup. + +- **Volumes**: + - `${BACKUP_VOLUME:-../backups}:/backups` + - `.:/code` + +### Operations + +#### Enable Automatic Backups + +To ensure automatic backups are enabled, set `ENABLE_BACKUPS=true` (enabled by default): + +```bash +ENABLE_BACKUPS=true docker compose up -d backup +``` + +This will configure automatic backups based on the `BACKUP_SCHEDULE`. + +To disable automatic backups: + +```bash +ENABLE_BACKUPS=false docker compose up -d backup +``` + +#### Manual Backup + +Create a manual backup: + +```bash +docker compose exec backup sh /code/scripts/db.sh backup +``` + +- **Output**: `backup_.tar.gz` in the `BACKUP_DIR` (`../backups` by default). + +## Scheduled Backup with Cron + +To schedule the backup script to run automatically at a specific time using cron, add the following line to your crontab: + +1. Ensure the destination location for backups in `.env` file (`BACKUP_VOLUME` variable) + +2. Open the crontab editor: + + ```bash + crontab -e + ``` + +3. Add the cron job entry (for example, to run the backup at 1 AM daily) with path to the backup script: + + ```bash + 0 1 * * * /scripts/backup_script.sh + ``` + +4. Check if the cron job is added: + + ```bash + crontab -l + ``` + +## Restore Backup + +Restore from a specific backup file: + +```bash +docker compose exec backup sh /code/scripts/db.sh restore ../backups/backup_.tar.gz +docker compose run web python manage.py rebuild_index --noinput +``` + +- Replace `../backups/backup_.tar.gz` with the actual file path. + +## List Cron Jobs + +View the automatic backup schedule: + +```bash +docker compose exec backup crontab -l +``` + +## List Backup Contents + +View contents of a backup archive: + +```bash +tar -ztvf ../backups/backup_.tar.gz +``` + +## Restore Legacy Backup + +To restore backup created before version 1.8.1 on newer versions with docker deployment, execute the `legacy_restore.sh` script inside the running container + +```bash +# Copy the legacy backup file to the backup container +docker cp ../daisy_prod.tar.gz $(docker compose ps -q backup):/code/daisy_prod.tar.gz + +# Execute the legacy_restore.sh script inside the running container +docker compose exec backup /bin/sh -c "sh /code/scripts/legacy_restore.sh /code/daisy_prod.tar.gz && rm /code/daisy_prod.tar.gz" +docker compose run web python manage.py rebuild_index --noinput +``` + +Replace `../daisy_prod.tar.gz` with the actual path to legacy backup file. diff --git a/doc/deployment.md b/doc/deployment.md new file mode 100644 index 00000000..89d12334 --- /dev/null +++ b/doc/deployment.md @@ -0,0 +1,142 @@ +# Daisy Project Setup Guide + +This guide provides concise instructions for setting up and running the Daisy project using Docker Compose. It includes commands for managing the Django application and other services within the project. + +## Prerequisites + +- **Docker** +- **Docker Compose** + +--- + +## Getting Started + +### Clone the Repository + +```bash +git clone https://github.com/elixir-luxembourg/daisy.git +cd daisy +``` + +### Environment Variables + +Create a `.env` file in the project root to override default environment variables if necessary. See [.env.template](env.template) file for more detail. Additionally, create `elixir_daisy/settings_local.py` file from `elixir_daisy/settings_local.template.py`. + +## Installation + +### Build and Start Services + +Build and start all services defined in `docker-compose.yaml`: + +```bash +docker compose up -d --build +``` + +### Initialize the Database + +Run database migrations: + +```bash +docker compose exec web python manage.py migrate +``` + +### Build the Solr Schema + +Build the Solr schema required for full-text search: + +```bash +docker compose exec web python manage.py build_solr_schema -c /solr/daisy/conf -r daisy -u default +``` + +### Compile and Deploy Static Files + +The project uses frontend assets that need to be compiled (e.g., with npm), you need to build them and collect static files. + +#### Install npm Dependencies + +```bash +docker compose exec web npm --prefix web/static/vendor ci +``` + +#### Build Frontend Assets + +```bash +docker compose exec web npm --prefix web/static/vendor run build +``` + +#### Collect Static Files + +From the project root: + +```bash +docker compose exec web python manage.py collectstatic --noinput +``` + +### Load Initial Data into the Database + +Load initial data, such as controlled vocabularies and initial list of institutions and cohorts. + +```bash +docker compose exec web bash -c " + cd core/fixtures/ && \ + wget https://git-r3lab.uni.lu/pinar.alper/metadata-tools/raw/master/metadata_tools/resources/edda.json && \ + wget https://git-r3lab.uni.lu/pinar.alper/metadata-tools/raw/master/metadata_tools/resources/hpo.json && \ + wget https://git-r3lab.uni.lu/pinar.alper/metadata-tools/raw/master/metadata_tools/resources/hdo.json && \ + wget https://git-r3lab.uni.lu/pinar.alper/metadata-tools/raw/master/metadata_tools/resources/hgnc.json +" +docker compose exec web python manage.py load_initial_data +``` + +**Note:** This step can take several minutes to complete. + +### Load Demo Data (Optional) + +In case you are provisioning a demo instance, following command loads demo data, including mock datasets, projects, and a demo admin account: + +```bash +docker compose exec web python manage.py load_demo_data +``` + +You can log in with the demo admin credentials provided during the demo data setup (username: `admin`, password:`admin` by default) or as one of the regular users (see the `About` page for more detail). + +### Build the Search Index + +After loading data, build the search index for Solr: + +```bash +docker compose exec web python manage.py rebuild_index --noinput +``` + +### Access the Application + +The application should now be accessible on `https://localhost/` + +## Scheduled Backup with Cron + +To ensure the backups are properly set up, please refer to the [Backup manual](backup.md#Scheduled-Backup-with-Cron) + + + +### Restore Legacy Backup + +To restore from a legacy backup file (e.g., `daisy_prod.tar.gz`): + +```bash +docker compose stop nginx flower worker beat web mq solr + +# Copy the legacy backup file into the backup container +docker cp ../daisy_prod.tar.gz $(docker compose ps -q backup):/code/daisy_prod.tar.gz + +# Execute the legacy restore script inside the backup container +docker compose exec backup sh /code/scripts/legacy_restore.sh /code/daisy_prod.tar.gz + +# Remove the backup file from the container +docker compose exec backup rm /code/daisy_prod.tar.gz + +docker compose up -d solr mq web worker beat flower nginx + +# Rebuild the Solr index +docker compose exec web python manage.py rebuild_index --noinput +``` + +--- diff --git a/doc/development.md b/doc/development.md new file mode 100644 index 00000000..f791c558 --- /dev/null +++ b/doc/development.md @@ -0,0 +1,73 @@ + +# Development + +## Linting + +pip install black==23.7.0 +pre-commit install +black --check . +black . + +## Import users from active directory + +```bash +./manage.py import_users +``` + +## Import projects, datasets or partners from external system + +Single file mode: + +```bash +./manage.py import_projects -f path/to/json_file.json +``` + +Batch mode: + +```bash +./manage.py import_projects -d path/to/dir/with/json/files/ +``` + +Available commands: `import_projects`, `import_datasets`, `import_partners`. + +In case of problems, add `--verbose` flag to the command, and take a look inside `./log/daisy.log`. + +## Install js and css dependencies + +```bash +cd web/static/vendor/ +npm ci +``` + +## Compile daisy.scss and React + +```bash +cd web/static/vendor +npm run-script build +``` + +## Run the built-in web server (for development) + +```bash +./manage.py runserver +``` + +## Run the tests + +The following command will install the test dependencies and execute the tests: + +```bash +python setup.py pytest +``` + +run test for a specific file: + +```bash +python setup.py pytest --addopts web/tests/test_dataset.py +``` + +If tests dependencies are already installed, one can also run the tests just by executing: + +```bash +pytest +``` diff --git a/DEPLOYMENT.md b/doc/legacy-deployment.md similarity index 99% rename from DEPLOYMENT.md rename to doc/legacy-deployment.md index e2515fe6..0bc5bfeb 100644 --- a/DEPLOYMENT.md +++ b/doc/legacy-deployment.md @@ -1,4 +1,7 @@ -# Installation + +**DISCLAIMER**: This is a set of instructions for legacy deployment and administration for version <1.8.1. It will be removed in future. + +# Legacy deployment and administration manual Following instructions are for CentOS Linux 7 (Core). diff --git a/doc/update.md b/doc/update.md new file mode 100644 index 00000000..2776d87d --- /dev/null +++ b/doc/update.md @@ -0,0 +1,55 @@ +# Updating the Project + +## Database Backup Before Upgrade + +Create a database backup before upgrading: + +```bash +docker compose exec backup sh /code/scripts/db.sh backup +``` + +## Upgrade Steps + +1. **Pull Latest Changes:** + + ```bash + git pull origin master + ``` + +2. **Rebuild Docker Images:** + + ```bash + docker compose build + ``` + +3. **Apply Database Migrations:** + + ```bash + docker compose exec web python manage.py migrate + ``` + +4. **Update Solr Schema:** + + ```bash + docker compose exec web python manage.py build_solr_schema -c /solr/daisy/conf -r daisy -u default + ``` + +5. **Collect Static Files:** + + ```bash + docker compose exec web python manage.py collectstatic --noinput + ``` + +6. **Rebuild Search Index:** + + ```bash + docker compose exec web python manage.py rebuild_index --noinput + ``` + +7. **Optional - Import Users:** + + If you use LDAP or Active Directory: + + ```bash + docker compose exec web python manage.py import_users + ``` diff --git a/docker-compose.yaml b/docker-compose.yaml index 8fa5fb6d..fa442086 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,5 +1,3 @@ -version: '3' - services: # web app web: @@ -12,6 +10,8 @@ services: - statics:/static - solrdata:/solr - .:/code + networks: + - daisy_network depends_on: - db - solr @@ -20,47 +20,56 @@ services: # database db: image: postgres:10.1 - restart: on-failure + restart: unless-stopped environment: - POSTGRES_PASSWORD: daisy - POSTGRES_USER: daisy - POSTGRES_DB: daisy - ports: - - "5433:5432" + POSTGRES_PASSWORD: ${DB_PASSWORD:-daisy} + POSTGRES_USER: ${DB_USER:-daisy} + POSTGRES_DB: ${DB_NAME:-daisy} + networks: + - daisy_network volumes: - pgdata:/var/lib/postgresql/data - # web werver frontend + # web server frontend nginx: build: ./docker/nginx - restart: on-failure + restart: unless-stopped volumes: - statics:/public/static:ro ports: - '80:80' - '443:443' + networks: + - daisy_network depends_on: - web # FTS db solr: build: ./docker/solr + restart: unless-stopped ports: - "8983:8983" + networks: + - daisy_network volumes: - solrdata:/opt/solr/server/solr - # rabbit mq + # rabbit mq mq: image: rabbitmq:3.9-management-alpine - restart: on-failure + restart: unless-stopped + networks: + - daisy_network ports: - "15672:15672" - "5672:5672" - # task monitoring flower: image: mher/flower:0.9.7 command: --broker=amqp://guest:guest@mq:5672// --broker_api=http://guest:guest@mq:15672/api/ ports: - "5555:5555" + restart: unless-stopped + networks: + - daisy_network depends_on: - mq # task runner @@ -73,6 +82,9 @@ services: depends_on: - db - mq + restart: unless-stopped + networks: + - daisy_network command: "celery -A elixir_daisy.celery_app worker --loglevel=DEBUG" # celery beat beat: @@ -84,10 +96,43 @@ services: depends_on: - db - mq + networks: + - daisy_network command: "celery -A elixir_daisy beat --loglevel=DEBUG --pidfile= --scheduler django_celery_beat.schedulers:DatabaseScheduler" - + # Backup service + backup: + image: alpine:3.20.3 + environment: + - DB_HOST=db + - DB_PORT=5432 + - DB_NAME=${DB_NAME:-daisy} + - DB_USER=${DB_USER:-daisy} + - DB_PASSWORD=${DB_PASSWORD:-daisy} + - BACKUP_DIR=/backups + - MEDIA_DIR=/code/documents + - SOLR_PORT=8983 + - RABBITMQ_MANAGEMENT_PORT=${RABBITMQ_MANAGEMENT_PORT:-15672} + - BACKUP_VOLUME=${BACKUP_VOLUME:-../backups} + volumes: + - ${BACKUP_VOLUME:-../backups}:/backups + - .:/code + - /var/run/docker.sock:/var/run/docker.sock + working_dir: /code + depends_on: + - db + networks: + - daisy_network + entrypoint: > + sh -c "apk add --no-cache docker-cli docker-compose postgresql-client lsof && + chmod +x /code/scripts/db.sh && + tail -f /dev/null" volumes: pgdata: statics: solrdata: + backups: + +networks: + daisy_network: + driver: bridge diff --git a/env.template b/env.template new file mode 100644 index 00000000..f51ba9e7 --- /dev/null +++ b/env.template @@ -0,0 +1,7 @@ +# Database configuration +DB_NAME=daisy +DB_USER=daisy +DB_PASSWORD=daisy + +# Backup configuration +BACKUP_VOLUME=../backups diff --git a/scripts/backup_script.sh b/scripts/backup_script.sh new file mode 100755 index 00000000..15cea18b --- /dev/null +++ b/scripts/backup_script.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Stop services that should not run during the backup +docker compose stop nginx flower worker beat web mq solr + +# Run the backup script inside the backup container +docker compose exec backup sh /code/scripts/db.sh backup + +# Start the services back up after the backup is complete +docker compose up -d solr mq web worker beat flower nginx diff --git a/scripts/db.sh b/scripts/db.sh new file mode 100755 index 00000000..f5c43e20 --- /dev/null +++ b/scripts/db.sh @@ -0,0 +1,148 @@ +#!/bin/bash + +set -euo pipefail + +# Configuration +BACKUP_DIR="${BACKUP_DIR:-../backups}" # Directory to store backups +DB_HOST="${DB_HOST:-db}" # PostgreSQL host +DB_PORT="${DB_PORT:-5432}" # PostgreSQL port +DB_NAME="${DB_NAME:-daisy}" # PostgreSQL database name +DB_USER="${DB_USER:-daisy}" # PostgreSQL user +DB_PASSWORD="${DB_PASSWORD:-daisy}" # PostgreSQL password +MEDIA_DIR="${MEDIA_DIR:-/code/documents}" # Django media directory +BACKUP_VOLUME="${BACKUP_VOLUME:-../backups}" # Backup volume + +if [ "${BACKUP_DIR}" == "../backups" ] && [ ! -d "${BACKUP_DIR}" ]; then + mkdir -p "${BACKUP_DIR}" +fi + +# Function to perform backup +backup() { + local timestamp + timestamp=$(date +%Y-%m-%d_%H-%M-%S) + local backup_file="${BACKUP_DIR}/backup_${timestamp}.tar.gz" + local temp_backup_dir="${BACKUP_DIR}/temp_${timestamp}" + + echo "Starting backup..." + + # Create temporary backup directory + mkdir -p "${temp_backup_dir}" + + # Backup database as SQL dump + echo "Backing up PostgreSQL database..." + if ! PGPASSWORD="${DB_PASSWORD}" pg_dump -h "${DB_HOST}" -p "${DB_PORT}" -U "${DB_USER}" -d "${DB_NAME}" -F p -f "${temp_backup_dir}/db_backup.sql"; then + echo "ERROR: Database backup failed" >&2 + rm -rf "${temp_backup_dir}" + exit 1 + fi + + # Backup media files + echo "Backing up Django media files..." + if [ -d "${MEDIA_DIR}" ]; then + if ! cp -R "${MEDIA_DIR}" "${temp_backup_dir}/documents"; then + echo "ERROR: Media files backup failed" >&2 + rm -rf "${temp_backup_dir}" + exit 1 + fi + else + echo "WARNING: Media directory not found. Skipping media backup." + fi + + # Create tar.gz archive of the backups + echo "Creating tar.gz archive..." + if ! tar -czf "${backup_file}" -C "${BACKUP_DIR}" "temp_${timestamp}"; then + echo "ERROR: Archive creation failed" >&2 + rm -rf "${temp_backup_dir}" + exit 1 + fi + + # Remove temporary backup directory + rm -rf "${temp_backup_dir}" + + echo "Backup completed successfully: ${BACKUP_VOLUME}/backup_${timestamp}.tar.gz" +} + +# Function to perform restore +restore() { + if [ $# -ne 1 ]; then + echo "Usage: $0 restore " >&2 + exit 1 + fi + + local backup_file="$1" + local tmp_restore_dir="${BACKUP_DIR}/restore_temp" + + if [ ! -f "${backup_file}" ]; then + echo "ERROR: Backup file not found: ${backup_file}" >&2 + exit 1 + fi + + echo "Starting restoration..." + + # Drop the existing database + echo "Dropping existing database..." + if ! PGPASSWORD="${DB_PASSWORD}" psql -h "${DB_HOST}" -p "${DB_PORT}" -U "${DB_USER}" -d postgres -c "DROP DATABASE IF EXISTS ${DB_NAME};"; then + echo "ERROR: Failed to drop existing database" >&2 + exit 1 + fi + + # Create a new database + echo "Creating new database..." + if ! PGPASSWORD="${DB_PASSWORD}" psql -h "${DB_HOST}" -p "${DB_PORT}" -U "${DB_USER}" -d postgres -c "CREATE DATABASE ${DB_NAME};"; then + echo "ERROR: Failed to create new database" >&2 + exit 1 + fi + + # Extract the backup archive + mkdir -p "${tmp_restore_dir}" + if ! tar -xzf "${backup_file}" -C "${tmp_restore_dir}"; then + echo "ERROR: Failed to extract backup archive" >&2 + rm -rf "${tmp_restore_dir}" + exit 1 + fi + + # Identify the extracted directory (e.g., temp_2023-10-05_12-00-00) + local extracted_dir + extracted_dir=$(ls "${tmp_restore_dir}") + + # Restore PostgreSQL database from SQL dump + echo "Restoring PostgreSQL database..." + if ! PGPASSWORD="${DB_PASSWORD}" psql -h "${DB_HOST}" -p "${DB_PORT}" -U "${DB_USER}" -d "${DB_NAME}" -f "${tmp_restore_dir}/${extracted_dir}/db_backup.sql"; then + echo "ERROR: Database restoration failed" >&2 + rm -rf "${tmp_restore_dir}" + exit 1 + fi + + # Restore media files + echo "Restoring Django media files..." + if [ -d "${tmp_restore_dir}/${extracted_dir}/documents" ]; then + rm -rf "${MEDIA_DIR:?}/"* + if ! cp -R "${tmp_restore_dir}/${extracted_dir}/documents/"* "${MEDIA_DIR}/"; then + echo "ERROR: Media files restoration failed" >&2 + rm -rf "${tmp_restore_dir}" + exit 1 + fi + else + echo "WARNING: No media backup found in the archive. Skipping media restoration." + fi + + # Remove temporary restoration directory + rm -rf "${tmp_restore_dir}" + + echo "Restoration completed successfully." +} + +# Main script logic +case "$1" in + backup) + backup + ;; + restore) + shift + restore "$@" + ;; + *) + echo "Usage: $0 {backup|restore }" >&2 + exit 1 + ;; +esac diff --git a/scripts/legacy_restore.sh b/scripts/legacy_restore.sh new file mode 100755 index 00000000..3e447baa --- /dev/null +++ b/scripts/legacy_restore.sh @@ -0,0 +1,166 @@ +#!/bin/sh + +# Exit immediately if a command exits with a non-zero status and treat unset variables as errors +set -eu + +# Configuration +DB_HOST="${DB_HOST:-db}" +DB_PORT="${DB_PORT:-5432}" +DB_NAME="${DB_NAME:-daisy}" +DB_USER="${DB_USER:-daisy}" +DB_PASSWORD="${DB_PASSWORD:-daisy}" +MEDIA_DIR="${MEDIA_DIR:-/code/documents}" +SHOW_DB_LOGS="${SHOW_DB_LOGS:-true}" + +# Check if TAR_FILE argument is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 path_to_daisy_backup_tar.gz" >&2 + exit 1 +fi + +TAR_FILE=$1 + +# Initialize variables before defining cleanup +EXTRACT_LIST="" + +# Create a temporary directory for extraction +TEMP_DIR=$(mktemp -d /tmp/restore_temp.XXXXXX) + +# Function to clean up temporary directories on exit +cleanup() { + rm -rf "$TEMP_DIR" "$EXTRACT_LIST" +} +trap cleanup EXIT + +# Ensure TEMP_DIR was created successfully +if [ ! -d "$TEMP_DIR" ]; then + echo "ERROR: Failed to create temporary directory." >&2 + exit 1 +fi + +echo "Step 1: Starting restoration process..." + +# Step 2: Search for necessary files and directories +echo "Step 2: Searching for daisy_dump.sql, documents directory, and settings_local.py in the archive..." + +# Initialize search result variables +SQL_DUMP_PATH="" +DOCUMENTS_DIR_PATH="" +SETTINGS_LOCAL_PATH="" + +# Search for daisy_dump.sql +SQL_DUMP_PATH=$(tar -tzf "$TAR_FILE" | grep "/daisy_dump\.sql$" | grep -v "/daisy/" | head -n 1 || true) + +# Search for documents directory (any depth) excluding those under 'templates' +DOCUMENTS_DIR_PATH=$(tar -tzf "$TAR_FILE" | grep "/documents/$" | grep -v "/templates/" | head -n 1 || true) + +# Search for settings_local.py (optional) +SETTINGS_LOCAL_PATH=$(tar -tzf "$TAR_FILE" | grep "/settings_local\.py$" | head -n 1 || true) + +# Check if SQL dump is found +if [ -z "$SQL_DUMP_PATH" ]; then + echo "ERROR: SQL dump 'daisy_dump.sql' not found in the archive." >&2 + # Proceeding without exiting to allow extraction of other files +fi + +# Check if documents directory is found +if [ -z "$DOCUMENTS_DIR_PATH" ]; then + echo "WARNING: Documents directory not found in the archive or it is under 'templates/'." >&2 +fi + +# settings_local.py is optional +if [ -z "$SETTINGS_LOCAL_PATH" ]; then + echo "INFO: settings_local.py not found in the archive. Continuing without it." >&2 +fi + +# Prepare list of paths to extract +EXTRACT_LIST=$(mktemp /tmp/extract_list.XXXXXX) + +# Add found paths to the extract list if they exist +if [ -n "$SQL_DUMP_PATH" ]; then + echo "$SQL_DUMP_PATH" >> "$EXTRACT_LIST" +fi + +if [ -n "$DOCUMENTS_DIR_PATH" ]; then + echo "$DOCUMENTS_DIR_PATH" >> "$EXTRACT_LIST" +fi + +if [ -n "$SETTINGS_LOCAL_PATH" ]; then + echo "$SETTINGS_LOCAL_PATH" >> "$EXTRACT_LIST" +fi + +# Debug: Show the list of paths to extract +echo "DEBUG: Paths to extract:" +cat "$EXTRACT_LIST" + +# Check if EXTRACT_LIST has entries +if [ ! -s "$EXTRACT_LIST" ]; then + echo "ERROR: No files or directories to extract. Exiting." >&2 + exit 1 +fi + +# Step 3: Extract only the necessary files and directories +echo "Step 3: Extracting the necessary files and directories..." +if ! tar -xzf "$TAR_FILE" -C "$TEMP_DIR" -T "$EXTRACT_LIST" 2>&1; then + echo "ERROR: Failed to extract necessary files from the backup archive." >&2 + exit 1 +fi + +# Step 4: Verify extracted contents +echo "Step 4: Verifying extracted contents..." +SQL_DUMP=$(find "$TEMP_DIR" -type f -name "daisy_dump.sql" | grep -v "/home/daisy/" | head -n 1 || true) +DOCUMENTS_DIR=$(find "$TEMP_DIR" -type d -name "documents" | grep -v "/templates/" | head -n 1 || true) +SETTINGS_LOCAL=$(find "$TEMP_DIR" -type f -name "settings_local.py" | head -n 1 || true) + +echo " - SQL dump found: $([ -n "$SQL_DUMP" ] && echo 'Yes' || echo 'No')" +echo " - Documents directory found: $([ -n "$DOCUMENTS_DIR" ] && echo 'Yes' || echo 'No')" +echo " - settings_local.py found: $([ -n "$SETTINGS_LOCAL" ] && echo 'Yes' || echo 'No')" +echo "Step 8: Restoring PostgreSQL database from SQL dump..." +if [ "$SHOW_DB_LOGS" = "true" ]; then + PGPASSWORD="${DB_PASSWORD}" psql -h "${DB_HOST}" -p "${DB_PORT}" -U "${DB_USER}" -d "${DB_NAME}" -f "${SQL_DUMP}" +else + PGPASSWORD="${DB_PASSWORD}" psql -h "${DB_HOST}" -p "${DB_PORT}" -U "${DB_USER}" -d "${DB_NAME}" -f "${SQL_DUMP}" > /dev/null 2>&1 +fi + +if [ $? -ne 0 ]; then + echo "ERROR: Database restoration failed" >&2 + rm -rf "$TEMP_DIR" + exit 1 +fi + +echo "Step 9: Restoring Django media files..." +if [ -n "$DOCUMENTS_DIR" ] && [ -d "$DOCUMENTS_DIR" ]; then + echo " - Copying files from $DOCUMENTS_DIR to $MEDIA_DIR" + rm -rf "${MEDIA_DIR:?}/"* + if ! cp -R "${DOCUMENTS_DIR}/." "${MEDIA_DIR}/"; then + echo "ERROR: Media files restoration failed" >&2 + rm -rf "$TEMP_DIR" + exit 1 + fi +else + echo "WARNING: No media backup found in the archive. Skipping media restoration." +fi + +echo "Step 9: Restoring settings_local.py..." +if [ -n "$SETTINGS_LOCAL" ] && [ -f "$SETTINGS_LOCAL" ]; then + if [ -f "/code/elixir_daisy/settings_local.py" ]; then + if ! cp -f "$SETTINGS_LOCAL" "/code/elixir_daisy/settings_local.py"; then + echo "ERROR: Failed to replace existing settings_local.py" >&2 + rm -rf "$TEMP_DIR" + exit 1 + fi + else + if ! cp "$SETTINGS_LOCAL" "/code/elixir_daisy/settings_local.py"; then + echo "ERROR: Failed to restore settings_local.py" >&2 + rm -rf "$TEMP_DIR" + exit 1 + fi + fi +else + echo "WARNING: settings_local.py not found in the archive. Skipping restoration." +fi + +echo "Step 11: Cleaning up temporary files..." +rm -rf "$TEMP_DIR" + +echo "Step 12: Restoration completed successfully." \ No newline at end of file