This repo implement Work Queue System to process multiple large csv files in a distributed way
To scale up the system, we can increase the worker replicas / concurrency. But please be reminded that we need to increase the connection pool size of the database as well
make setup
make start
make enqueue ARGS="data"
./scripts/setup.sh
or
make setup
docker compose up -d
or
make start
docker compose run --rm --no-deps enqueue [-h] [--dbhost DBHOST] [--dbname DBNAME] [--dbuser DBUSER] [--dbpass DBPASS] [--dbport DBPORT] directory
or
make enqueue ARGS="[-h] [--dbhost DBHOST] [--dbname DBNAME] [--dbuser DBUSER] [--dbpass DBPASS] [--dbport DBPORT] directory"
docker compose run --rm --no-deps worker python ./scripts/generate_data.py [--size] [--output]
or
make generate-data ARGS="[--size] [--output]"
make test
1. docker compose up monitor
2. Access the panel via http://localhost:5555/
- Celery
- Rabbitmq
- Postgres
- Docker
- SQLAlchemy