From c4b2faf2e2246dad12995dbc91b84f421754f27e Mon Sep 17 00:00:00 2001 From: vfisikop Date: Tue, 12 Mar 2024 17:45:22 +0200 Subject: [PATCH] Add JOSS paper --- .github/workflows/R-CMD-check-macOS.yml | 62 ----------- .github/workflows/R-CMD-check-ubuntu.yml | 65 ------------ .github/workflows/R-CMD-check-windows.yml | 61 ----------- .github/workflows/cmake-clang.yml | 39 ------- .github/workflows/cmake-examples.yml | 40 ------- .github/workflows/cmake-gcc.yml | 38 ------- .github/workflows/docs.yml | 27 ----- .github/workflows/draft_pdf.yml | 24 +++++ joss_paper/paper.md | 121 ---------------------- joss_paper/paper.bib => paper.bib | 55 ++++++---- paper.md | 110 ++++++++++++++++++++ 11 files changed, 169 insertions(+), 473 deletions(-) delete mode 100644 .github/workflows/R-CMD-check-macOS.yml delete mode 100644 .github/workflows/R-CMD-check-ubuntu.yml delete mode 100644 .github/workflows/R-CMD-check-windows.yml delete mode 100644 .github/workflows/cmake-clang.yml delete mode 100644 .github/workflows/cmake-examples.yml delete mode 100644 .github/workflows/cmake-gcc.yml delete mode 100644 .github/workflows/docs.yml create mode 100644 .github/workflows/draft_pdf.yml delete mode 100644 joss_paper/paper.md rename joss_paper/paper.bib => paper.bib (86%) create mode 100644 paper.md diff --git a/.github/workflows/R-CMD-check-macOS.yml b/.github/workflows/R-CMD-check-macOS.yml deleted file mode 100644 index b1ac281c3..000000000 --- a/.github/workflows/R-CMD-check-macOS.yml +++ /dev/null @@ -1,62 +0,0 @@ -############################################################################## -# GitHub Actions Workflow to test the R interface of volesti -# -# Copyright (c) 2020 Vissarion Fisikopoulos -# -# Licensed under GNU LGPL.3, see LICENCE file -############################################################################## - -on: [push, pull_request] - -name: R-CMD-check-macOS - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: macOS-latest, r: '4.1.2'} -# - {os: macOS-latest, r: 'devel'} # Error in library(devtools) : there is no package called ‘devtools’ - - {os: macOS-latest, r: 'release'} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v3 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v2 - - - name: Install dependencies - run: Rscript -e "install.packages(c('devtools', dependencies=TRUE))" -e "install.packages(c('rcmdcheck', 'devtools', 'Rcpp', 'RcppEigen', 'BH', 'testthat', 'downloader', 'xfun'))"; - - - name: Checkout Rvolesti repository - run: git clone https://github.com/GeomScale/Rvolesti.git; - - - name: Upgrade with current develop of volesti - run: cp -rf include Rvolesti/src/volesti; - cp -rf external Rvolesti/src; - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: cd Rvolesti/; - Rscript -e "library(rcmdcheck)" -e "rcmdcheck::rcmdcheck(args = c('--no-manual'), error_on = 'warning', check_dir = 'check')" - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@v2 - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check diff --git a/.github/workflows/R-CMD-check-ubuntu.yml b/.github/workflows/R-CMD-check-ubuntu.yml deleted file mode 100644 index 753ee114a..000000000 --- a/.github/workflows/R-CMD-check-ubuntu.yml +++ /dev/null @@ -1,65 +0,0 @@ -############################################################################## -# GitHub Actions Workflow to test the R interface of volesti -# -# Copyright (c) 2020 Vissarion Fisikopoulos -# -# Licensed under GNU LGPL.3, see LICENCE file -############################################################################## - -on: [push, pull_request] - -name: R-CMD-check-ubuntu - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: ubuntu-latest, r: 'devel'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'devel'} - - {os: ubuntu-20.04, r: 'release'} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v3 - - run: sudo apt-get update || true; - sudo apt install build-essential libcurl4-gnutls-dev libxml2-dev libssl-dev libgit2-dev libfontconfig1-dev libharfbuzz-dev libfribidi-dev; - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v2 - - - name: Install dependencies - run: Rscript -e "install.packages(c('testthat', 'pkgload', 'rcmdcheck', 'devtools', 'Rcpp', 'RcppEigen', 'BH', 'downloader', 'xfun', dependencies=TRUE))"; - - - name: Checkout Rvolesti repository - run: git clone https://github.com/GeomScale/Rvolesti.git; - - - name: Upgrade with current develop of volesti - run: cp -rf include Rvolesti/src/volesti; - cp -rf external Rvolesti/src; - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: cd Rvolesti/; - Rscript -e "library(rcmdcheck)" -e "rcmdcheck::rcmdcheck(args = c('--no-manual'), error_on = 'warning', check_dir = 'check')" - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@v2 - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check diff --git a/.github/workflows/R-CMD-check-windows.yml b/.github/workflows/R-CMD-check-windows.yml deleted file mode 100644 index 97e725aa0..000000000 --- a/.github/workflows/R-CMD-check-windows.yml +++ /dev/null @@ -1,61 +0,0 @@ -############################################################################## -# GitHub Actions Workflow to test the R interface of volesti -# -# Copyright (c) 2020 Vissarion Fisikopoulos -# -# Licensed under GNU LGPL.3, see LICENCE file -############################################################################## - -on: [push, pull_request] - -name: R-CMD-check-windows - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: -# - {os: windows-latest, r: 'devel'} # Error in library(devtools) : there is no package called ‘devtools’ - - {os: windows-latest, r: 'release'} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v3 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v2 - - - name: Install dependencies - run: Rscript -e "install.packages(c('devtools', dependencies=TRUE))" -e "install.packages(c('rcmdcheck', 'devtools', 'Rcpp', 'RcppEigen', 'BH', 'testthat', 'downloader', 'xfun'))" - - - name: Checkout Rvolesti repository - run: git clone https://github.com/GeomScale/Rvolesti.git; - - - name: Upgrade with current develop of volesti - run: cp -r -fo include Rvolesti/src/volesti; - cp -r -fo external Rvolesti/src; - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: cd Rvolesti/; - Rscript -e "library(rcmdcheck)" -e "rcmdcheck::rcmdcheck(args = c('--no-manual'), error_on = 'warning', check_dir = 'check')" - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@v2 - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check diff --git a/.github/workflows/cmake-clang.yml b/.github/workflows/cmake-clang.yml deleted file mode 100644 index 520b33159..000000000 --- a/.github/workflows/cmake-clang.yml +++ /dev/null @@ -1,39 +0,0 @@ -############################################################################## -# GitHub Actions Workflow for volesti to build tests with Clang -# -# Copyright (c) 2020-2022 Vissarion Fisikopoulos -# -# Licensed under GNU LGPL.3, see LICENCE file -############################################################################## -name: cmake-clang - -on: [push, pull_request] - -jobs: - build: - name: ${{ matrix.config.os }} - ${{ matrix.config.compiler }} - strategy: - fail-fast: false - matrix: - config: - - {os: ubuntu-22.04, compiler_pkg: clang-11, compiler: clang++-11} - - {os: ubuntu-22.04, compiler_pkg: clang-12, compiler: clang++-12} - - {os: ubuntu-22.04, compiler_pkg: clang-13, compiler: clang++-13} - - {os: ubuntu-22.04, compiler_pkg: clang-14, compiler: clang++-14} - #- {os: ubuntu-22.04, compiler_pkg: clang-15, compiler: clang++-15} - - {os: ubuntu-20.04, compiler_pkg: clang-8, compiler: clang++-8} - - {os: ubuntu-20.04, compiler_pkg: clang-9, compiler: clang++-9} - - {os: ubuntu-20.04, compiler_pkg: clang-10, compiler: clang++-10} - - {os: ubuntu-20.04, compiler_pkg: clang-11, compiler: clang++-11} - - {os: ubuntu-20.04, compiler_pkg: clang-12, compiler: clang++-12} - runs-on: ${{ matrix.config.os }} - steps: - - uses: actions/checkout@v1 - - run: sudo apt-get update || true; - sudo apt-get install ${{ matrix.config.compiler_pkg }} lp-solve libopenblas-dev; - rm -rf build; - mkdir build; - cd build; - cmake -D CMAKE_CXX_COMPILER=${{ matrix.config.compiler }} -D CMAKE_CXX_FLAGS=-fsanitize=memory -D CMAKE_CXX_FLAGS=-fsanitize=undefined -D CMAKE_CXX_FLAGS=-g -D DISABLE_NLP_ORACLES=ON -D USE_MKL=OFF ../test; - make; - ctest --verbose; \ No newline at end of file diff --git a/.github/workflows/cmake-examples.yml b/.github/workflows/cmake-examples.yml deleted file mode 100644 index b7784fa30..000000000 --- a/.github/workflows/cmake-examples.yml +++ /dev/null @@ -1,40 +0,0 @@ -############################################################################## -# GitHub Actions Workflow for volesti to build tests with GCC -# -# Copyright (c) 2020-2022 Vissarion Fisikopoulos -# -# Licensed under GNU LGPL.3, see LICENCE file -############################################################################## -name: cmake-examples - -on: [push, pull_request] - -jobs: - build: - name: ${{ matrix.config.os }} - ${{ matrix.config.compiler }} - strategy: - fail-fast: false - matrix: - config: - - {os: ubuntu-22.04, compiler_pkg: clang-11, compiler: clang++-11} - - {os: ubuntu-22.04, compiler_pkg: g++-11, compiler: g++-11} - runs-on: ${{ matrix.config.os }} - steps: - - uses: actions/checkout@v1 - - run: sudo apt-get update || true; - sudo apt-get install ${{ matrix.config.compiler_pkg }} lp-solve libomp-dev libopenblas-dev libarpack2-dev; - - name: Build examples - run: | - cd examples - for dir in */; do - if [ "$dir" != "EnvelopeProblemSOS/" ] && [ "$dir" != "python_utilities/" ]; then - echo - echo "Building examples in $dir ....................." - cd "$dir" - mkdir build && cd build - cmake -DCMAKE_CXX_COMPILER=${{ matrix.config.compiler }} -DUSE_MKL=OFF .. - make - cd ../.. - fi - done - diff --git a/.github/workflows/cmake-gcc.yml b/.github/workflows/cmake-gcc.yml deleted file mode 100644 index f18e358ac..000000000 --- a/.github/workflows/cmake-gcc.yml +++ /dev/null @@ -1,38 +0,0 @@ -############################################################################## -# GitHub Actions Workflow for volesti to build tests with GCC -# -# Copyright (c) 2020-2021 Vissarion Fisikopoulos -# -# Licensed under GNU LGPL.3, see LICENCE file -############################################################################## -name: cmake-gcc - -on: [push, pull_request] - -jobs: - build: - name: ${{ matrix.config.os }} - ${{ matrix.config.compiler }} - strategy: - fail-fast: false - matrix: - config: - - {os: ubuntu-22.04, compiler: g++-9} - - {os: ubuntu-22.04, compiler: g++-10} - - {os: ubuntu-22.04, compiler: g++-11} - - {os: ubuntu-22.04, compiler: g++-12} - #- {os: ubuntu-22.04, compiler: g++-13} - - {os: ubuntu-20.04, compiler: g++-7} - - {os: ubuntu-20.04, compiler: g++-8} - - {os: ubuntu-20.04, compiler: g++-9} - - {os: ubuntu-20.04, compiler: g++-10} - runs-on: ${{ matrix.config.os }} - steps: - - uses: actions/checkout@v1 - - run: sudo apt-get update || true; - sudo apt-get install ${{ matrix.config.compiler }} lp-solve libopenblas-dev; - rm -rf build; - mkdir build; - cd build; - cmake -D CMAKE_CXX_COMPILER=${{ matrix.config.compiler }} -D DISABLE_NLP_ORACLES=ON -D USE_MKL=OFF ../test; - make; - ctest --verbose; diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml deleted file mode 100644 index fca7ede90..000000000 --- a/.github/workflows/docs.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Docs -on: [push, pull_request] - -jobs: - notebooks: - name: "Build the docs" - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - - name: Install dependencies - run: | - sudo apt-get install doxygen - python -m pip install -U pip - python -m pip install -r docs/requirements.txt - - - name: Build the docs - run: | - python -m pip install -U setuptools - python -m pip install -U pillow mock alabaster commonmark recommonmark sphinx sphinx-rtd-theme readthedocs-sphinx-ext - cd docs - python -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html diff --git a/.github/workflows/draft_pdf.yml b/.github/workflows/draft_pdf.yml new file mode 100644 index 000000000..1d7dc44e4 --- /dev/null +++ b/.github/workflows/draft_pdf.yml @@ -0,0 +1,24 @@ +name: Draft PDF +on: [push] + +jobs: + paper: + runs-on: ubuntu-latest + name: Paper Draft + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build draft PDF + uses: openjournals/openjournals-draft-action@master + with: + journal: joss + # This should be the path to the paper within your repo. + paper-path: paper.md + - name: Upload + uses: actions/upload-artifact@v4 + with: + name: paper + # This is the output path where Pandoc will write the compiled + # PDF. Note, this should be the same directory as the input + # paper.md + path: paper.pdf diff --git a/joss_paper/paper.md b/joss_paper/paper.md deleted file mode 100644 index 06b30ef4a..000000000 --- a/joss_paper/paper.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -title: 'volesti: C++ library for sampling and volume computation on convex bodies' -tags: - - C++ - - geometry - - randomization - - Monte-Carlo methods - - convexity -authors: - - name: Apostolos Chalkis - orcid: 0000-0000-0000-0000 - equal-contrib: true - affiliation: "2, 4" # (Multiple affiliations must be quoted) - - name: Vissarion Fisikopoulos - corresponding: true # (This is how to denote the corresponding author) - equal-contrib: true # (This is how you can denote equal contributions between multiple authors) - affiliation: "1, 4" - - name: Marios Papachristou - equal-contrib: true # (This is how you can denote equal contributions between multiple authors) - affiliation: 5 - - name: Elias Tsigaridas - equal-contrib: true # (This is how you can denote equal contributions between multiple authors) - affiliation: "3, 4" -affiliations: - - name: National & Kapodistrian University of Athens, Greece - index: 1 - - name: Quantagonia - index: 2 - - name: Inria Paris and IMJ-PRG, Sorbonne Universit\`e - index: 3 - - name: GeomScale - index: 4 - - name: Cornell University - index: 5 -date: 11 March 2024 -bibliography: paper.bib - ---- - -# Summary - -Sampling from (constrained) high-dimensional distributions and volume approximation of convex -bodies are fundamental operations that appear in optimization, finance, -engineering, artificial intelligence, and machine learning. -We present `volesti`, a C++ library that delivers efficient implementations of state-of-the-art, mainly randomized, algorithms -to sample from general logconcave distributions. -Based on these routines can estimate the volume of convex bodies in high dimensions, -round them and also compute multidimensional integrals over them. -The backbone of our library consists of Monte-Carlo algorithms, -that are randomized algorithms, the output of which can be incorrect with (usually very small) error probability; thus, we also provide several -high-dimensional statistical tests to certify and verify the output. - -The focus of `volesti`' is scalability in high dimensions, -that, depending on the problem at hand, could be in the order of hundreds or thousands dimension. -Another novelty is the ability to handle a variety of different inputs -for the constrained support of the various distributions. -`volesti` supports three different types of polyhedra [@Ziegler:1995], spectrahedra [@Ramana:1999] -and general non-linear convex objects. - -`volesti` relies on `Eigen` library [@eigen] for linear algebra but also support `MKL` optimizations [@mkl]. -There are R [@Chalkis:2021] and Python [@Chalkis_dingo:2023] interfaces available; -alas not all C++ functionality is available in through these interfaces. - -# Statement of need - -High-dimensional sampling from multivariate distributions with Markov Chain Monte Carlo (MCMC) -algorithms is a fundamental problem with many applications in the whole spectrum of science and engineering [@Iyengar:1988; -@Somerville:1998; @Genz:2009; @Schellenberger:2009]. -In particular, multivariate integration over a convex set -as well as the volume approximation of convex sets -have accumulated a huge amount of effort from theorists and engineers over the last decades. -Nevertheless, these problems are computationally hard for general dimensions [@Dyer:1988]. -MCMC algorithms made remarkable progress -and their use allowed us to efficiently tackle the problems of sampling and -volume estimation of convex bodies in theory, -by the introduction of (ragher sharp) theoretical guarantees [@Chen:2018; @Lee:2018; -@Mangoubi:2019]. -Unfortunately, these theoretical guarantees of the MCMC algorithms -do not extend in an straightforward manner to efficient implementations able to attack problems coming from real-life computations. -Therefore, we witnessed the birth of efficient in practice MCMC algorithm -that they relax the theoretical guarantees and -and employ new algorithmic and statistical techniques -to be amenable to efficient implementations. -Remarkably, these algorithms, and the corresponding implementations, -also meet the requirements for high accuracy results -[@Emiris:2014; @Cousins:2015; @Chalkis_volume:2023; @Kook:2022]. -Let us mention that the volume algorithm of @Cousins:2015 and the sampling method of @Kook:2022 are available as `MATLAB` -packages. - -All aforementioned algorithms and techniques are available in `volesti` -along with the sampling algorithm by -@Chalkis_hmc:2023 and the algorithms for spectrahedra by @Chalkis_spectra:2022. - -The efficient implementations of `volesti -(i) suport various sampling techniques based on geometric walks, roughly speaking these are a continuous version of MCMC algorithms, like Billard walk, Hamiltonian walk and other, -(ii) give us the ability to sample from various distributions, like uniform, log-concave, exponential, and Gaussian, -(iii) allows to consider the distributions - constrained in various convex domains, like hypercubes, zonotopes, general polytopes (in H and V representations), spectrahedra, - and (iv) can perform volume computations, integration, and solve problem from real life applications in very high dimensions. - - - -We use `volesti` extensively in various research and engineering directions that we pursue. -In particular, for the problem of sampling the flux space of metabolic networks -we were able to sample from the most complicated human metabolic network accessible today, Recon3D [@cftz-socg021], -we use to model financial crises [@ccef-crises-j], -to detect low volatility anomalies in stock markets [@bcft-aistats-23], - to introduce randomized control in asset pricing and portfolio performance evaluation [@bcft-arxiv-24]), but also to sample from (and compute the volume of) spectrahedra [@Chalkis_spectra:2022], the feasible regions of semidefinite programs. - -Even more, `volesti` has been used in conducting research in electric power systems [@Venzke:2019], for problems -in probabilistic inference [@Spallitta:2024], -to perform resource analysis on programs [@pham-phd-2024]; -but also to more theoretical and mathematical challenges, like the computation of topological invariants [@co-alenex-2021] - and persistent homology [@vm-fods-2022]. - -# Acknowledgements - -We would like to thank the contributors to the `volesti` library for their valuable contributions and -feedback. - -# References diff --git a/joss_paper/paper.bib b/paper.bib similarity index 86% rename from joss_paper/paper.bib rename to paper.bib index b14a11504..36e67484f 100644 --- a/joss_paper/paper.bib +++ b/paper.bib @@ -33,6 +33,7 @@ @article{Iyengar:1988 volume = {9}, year = {1988}, url = {https://doi.org/10.1137/0909028}, + doi = {10.1137/0909028} } @article{Somerville:1998, @@ -45,6 +46,7 @@ @article{Somerville:1998 volume = {7}, year = {1998}, url = {https://doi.org/10.1080/10618600.1998.10474793}, + doi = {10.2307/1390681} } @book{Genz:2009, @@ -55,6 +57,7 @@ @book{Genz:2009 Probabilities}, year = {2009}, isbn = {364201688X, 9783642016882}, + doi = {10.1007/978-3-642-01689-9} } @article{Schellenberger:2009, @@ -66,6 +69,7 @@ @article{Schellenberger:2009 volume = {284 9}, year = {2009}, url = {https://doi.org/10.1074/jbc.R800048200}, + doi = {10.1074/jbc.r800048200} } @article{Venzke:2019, @@ -78,6 +82,7 @@ @article{Venzke:2019 year = {2021}, issn = {0378-7796}, url = {https://doi.org/10.1016/j.epsr.2020.106614}, + doi = {10.1016/j.epsr.2020.106614} } @article{Dyer:1988, @@ -90,6 +95,7 @@ @article{Dyer:1988 volume = {17}, year = {1988}, url = {https://doi.org/10.1137/0217060}, + doi = {10.1137/0217060} } @article{Chen:2018, @@ -114,6 +120,7 @@ @inproceedings{Lee:2018 year = {2018}, isbn = {978-1-4503-5559-9}, url = {https://doi.org/10.1145/3188745.3188774}, + doi = {10.1145/3188745.3188774} } @inproceedings{Mangoubi:2019, @@ -125,6 +132,7 @@ @inproceedings{Mangoubi:2019 Computation via a Sub-Linear Ball Walk}, year = {2019}, url = {https://doi.org/10.1109/FOCS.2019.00082}, + doi = {10.1109/focs.2019.00082} } @article{Lovasz:2006, @@ -139,6 +147,7 @@ @article{Lovasz:2006 year = {2006}, issn = {0097-5397}, url = {https://doi.org/10.1137/S009753970544727X}, + doi = {10.1145/1007352.1007403} } @article{Emiris:2014, @@ -153,6 +162,7 @@ @article{Emiris:2014 year = {2014}, issn = {0098-3500}, url = {https://doi.org/10.1145/3194656}, + doi = {10.1145/3194656} } @article{Cousins:2015, @@ -166,6 +176,7 @@ @article{Cousins:2015 volume = {8}, year = {2016}, url = {https://doi.org/10.1007/s12532-015-0097-z}, + doi = {10.1007/s12532-015-0097-z} } @article{Chalkis_volume:2023, @@ -221,13 +232,17 @@ @article{Ramana:1999 doi = {10.1007/BF01100204} } -@misc{Spallitta:2024, - title={Enhancing SMT-based Weighted Model Integration by Structure Awareness}, - author={Giuseppe Spallitta and Gabriele Masina and Paolo Morettin and Andrea Passerini and Roberto Sebastiani}, - year={2024}, - eprint={2302.06188}, - archivePrefix={arXiv}, - primaryClass={cs.AI} +@article{Spallitta:2024, +title = {Enhancing SMT-based Weighted Model Integration by structure awareness}, +journal = {Artificial Intelligence}, +volume = {328}, +pages = {104067}, +year = {2024}, +issn = {0004-3702}, +doi = {10.1016/j.artint.2024.104067}, +url = {https://www.sciencedirect.com/science/article/pii/S0004370224000031}, +author = {Giuseppe Spallitta and Gabriele Masina and Paolo Morettin and Andrea Passerini and Roberto Sebastiani}, +keywords = {Hybrid probabilistic inference, Weighted Model Integration, Satisfiability modulo theories} } @inproceedings{Kook:2022, @@ -269,11 +284,10 @@ @article{Chalkis_spectra:2022 pages = {205-232}, year = {2022}, issn = {0024-3795}, -doi = {https://doi.org/10.1016/j.laa.2022.04.002}, +doi = {10.1016/j.laa.2022.04.002}, url = {https://www.sciencedirect.com/science/article/pii/S0024379522001471}, author = {Apostolos Chalkis and Ioannis Z. Emiris and Vissarion Fisikopoulos and Panagiotis Repouskos and Elias Tsigaridas}, -keywords = {Spectahedron, Semidefinite-programming, Sampling, Random walk, Monte Carlo, Polynomial eigenvalue problem, Volume approximation, Optimization}, -abstract = {We present algorithmic, complexity, and implementation results on the problem of sampling points from a spectrahedron, that is, the feasible region of a semidefinite program. Our main tool is geometric random walks. We analyze the arithmetic and bit complexity of certain primitive geometric operations that are based on the algebraic properties of spectrahedra and the polynomial eigenvalue problem. This study leads to the implementation of a broad collection of random walks for sampling from spectrahedra that experimentally show faster mixing times than methods currently employed either in theoretical studies or in applications, including the popular family of Hit-and-Run walks. The different random walks offer a variety of advantages, thus allowing us to efficiently sample from general probability distributions, for example the family of log-concave distributions which arise in numerous applications. We focus on two major applications of independent interest: (i) approximate the volume of a spectrahedron, and (ii) compute the expectation of functions coming from robust optimal control. We exploit efficient linear algebra algorithms and implementations to address the aforementioned computations in very high dimension. In particular, we provide a C++ open source implementation of our methods that scales efficiently, for the first time, up to dimension 200. We illustrate its efficiency on various data sets.} +keywords = {Spectahedron, Semidefinite-programming, Sampling, Random walk, Monte Carlo, Polynomial eigenvalue problem, Volume approximation, Optimization} } @@ -305,7 +319,8 @@ @article{ccef-crises-j volume={109}, pages={101916}, year={2023}, - publisher={Elsevier} + publisher={Elsevier}, + doi={10.1016/j.comgeo.2022.101916} } @@ -323,8 +338,9 @@ @inproceedings{bcft-aistats-23 @article{bcft-arxiv-24, title={Randomized Control in Performance Analysis and Empirical Asset Pricing}, author={Bachelard, Cyril and Chalkis, Apostolos and Fisikopoulos, Vissarion and Tsigaridas, Elias}, - journal={arXiv preprint arXiv:2403.00009}, - year={2024} + journal={ssrn preprint}, + year={2024}, + doi={10.2139/ssrn.4744249} } @inproceedings{co-alenex-2021, @@ -333,7 +349,8 @@ @inproceedings{co-alenex-2021 booktitle={2021 Proceedings of the Workshop on Algorithm Engineering and Experiments (ALENEX)}, pages={193--206}, year={2021}, - organization={SIAM} + organization={SIAM}, + doi={10.1137/1.9781611976472.15} } @@ -345,15 +362,13 @@ @article{vm-fods-2022 number={4}, pages={667--705}, year={2022}, - publisher={Foundations of Data Science} + publisher={Foundations of Data Science}, + doi={10.3934/fods.2022018} } - - - @PhdThesis{pham-phd-2024, +@PhdThesis{pham-phd-2024, author = {Long Pham}, - title = {Hybrid Resource-Bound -Analyses of Programs}, + title = {Hybrid Resource-Bound Analyses of Programs}, school = {Carnegie Mellon University}, year = 2024, note = {(PhD thesis proposal)}} diff --git a/paper.md b/paper.md new file mode 100644 index 000000000..7db7af8b2 --- /dev/null +++ b/paper.md @@ -0,0 +1,110 @@ +--- +title: 'volesti: A C++ library for sampling and volume computation on convex bodies' +tags: + - C++ + - geometry + - randomization + - Monte Carlo methods + - convexity +authors: + - name: Apostolos Chalkis + orcid: 0000-0002-4628-1907 + equal-contrib: true + affiliation: "2, 4" # (Multiple affiliations must be quoted) + - name: Vissarion Fisikopoulos + orcid: 0000-0002-0780-666X + corresponding: true # (This is how to denote the corresponding author) + equal-contrib: true # (This is how you can denote equal contributions between multiple authors) + affiliation: "1, 4" + - name: Marios Papachristou + orcid: 0000-0002-1728-0729 + equal-contrib: true # (This is how you can denote equal contributions between multiple authors) + affiliation: 5 + - name: Elias Tsigaridas + equal-contrib: true # (This is how you can denote equal contributions between multiple authors) + affiliation: "3, 4" +affiliations: + - name: National & Kapodistrian University of Athens, Greece + index: 1 + - name: Quantagonia + index: 2 + - name: Inria Paris and IMJ-PRG, Sorbonne Université + index: 3 + - name: GeomScale + index: 4 + - name: Cornell University + index: 5 +date: 11 March 2024 +bibliography: paper.bib + +--- + +# Summary + +Sampling from (constrained) high-dimensional distributions and volume approximation of convex +bodies are fundamental operations that appear in optimization, finance, +engineering, artificial intelligence, and machine learning. +We present `volesti`, a C++ library that delivers efficient implementations of state-of-the-art, mainly randomized, algorithms +to sample from general logconcave distributions. +Based on these routines, we can estimate the volume of convex bodies in high dimensions, +round them, and compute multidimensional integrals over them. +The backbone of our library consists of Monte Carlo algorithms, +which are randomized algorithms, the output of which can be incorrect with (usually very small) error probability; thus, we also provide several +high-dimensional statistical tests to certify and verify the output. + +The focus of `volesti`' is scalability in high dimensions, +that, depending on the problem at hand, could range from hundreds to thousands of dimensions. +Another novelty is the ability to handle a variety of different inputs +for the constrained support of the various distributions. +`volesti` supports three different types of polyhedra [@Ziegler:1995], spectrahedra [@Ramana:1999] +and general non-linear convex objects. + +`volesti` relies on `Eigen` library [@eigen] for linear algebra but also supports `MKL` optimizations [@mkl]. +There are R [@Chalkis:2021] and Python [@Chalkis_dingo:2023] interfaces available. + +# Statement of need + +High-dimensional sampling from multivariate distributions with Markov Chain Monte Carlo (MCMC) +algorithms is a fundamental problem with many applications in science and engineering [@Iyengar:1988; @Somerville:1998; @Genz:2009; @Schellenberger:2009]. +In particular, multivariate integration over a convex set as well as the volume approximation of convex sets have garnered significant attention from theorists and engineers over the last decades. +Nevertheless, these problems are computationally hard for general dimensions [@Dyer:1988]. +MCMC algorithms made remarkable progress and their use allowed us to efficiently tackle the problems of sampling and volume estimation of convex bodies in theory, +by the introduction of (rigorous) theoretical guarantees [@Chen:2018; @Lee:2018; +@Mangoubi:2019]. +Unfortunately, these theoretical guarantees of the MCMC algorithms +do not extend in a straightforward manner to efficient implementations able to attack problems coming from real-life computations. +Therefore, we witnessed the birth of efficient in practice MCMC algorithm +that relax the theoretical guarantees and employ new algorithmic and statistical techniques +to be amenable to efficient implementations. +Remarkably, these algorithms, and the corresponding implementations, +also meet the requirements for high accuracy results +[@Emiris:2014; @Cousins:2015; @Chalkis_volume:2023; @Kook:2022]; +however several existing published methods are available as part of propertiary packages (MATLAB) [@Cousins:2015; @Kook:2022]. + +Our open-source package -- volesti -- offers all of the aforementioned functionality, together with the support of sampling from general log-concave densities @Chalkis_hmc:2023, and uniform sampling from spectrahedra @Chalkis_spectra:2022. + +Our implementation supports: +1. support various sampling techniques based on geometric walks, roughly speaking these are a continuous version of MCMC algorithms, such as Billard walk, Hamiltonian walk and others, +2. give the user the ability to sample from various distributions, like uniform, exponential, Gaussian, and general log-concave densities, +3. allows to consider the distributions constrained in various convex domains, such as hypercubes, zonotopes, general polytopes (defined either as a set of linear inequalities or as a convex hull of a pointset), spectrahedra (feasible sets of semidefinite programs), and, +4. can perform volume computations, integration, and solve problems from real life applications in very high dimensions. + +# Impact + +`volesti` has been used extensively in various research and engineering projects coauthored by the authors of this paper. +In particular, for the problem of sampling the flux space of metabolic networks +we were able to sample from the most complicated human metabolic network accessible today, Recon3D [@cftz-socg021], +used to model financial crises [@ccef-crises-j], +to detect low volatility anomalies in stock markets [@bcft-aistats-23], + to introduce randomized control in asset pricing and portfolio performance evaluation [@bcft-arxiv-24]), but also to sample from (and compute the volume of) spectrahedra [@Chalkis_spectra:2022], the feasible regions of semidefinite programs. + +Even more, `volesti` has been used by other research teams in conducting research in electric power systems [@Venzke:2019], for problems in probabilistic inference [@Spallitta:2024], +to perform resource analysis on programs [@pham-phd-2024]; +but also for more theoretical and mathematical challenges, like the computation of topological invariants [@co-alenex-2021], and persistent homology [@vm-fods-2022]. + +# Acknowledgements + +We would like to thank the contributors to the `volesti` library for their valuable contributions and feedback. +MP was partially supported by a Cornell University Fellowship, a grant from the A.G. Leventis Foundation, a grant from the Gerondelis Foundation, and a LinkedIn Ph.D. Fellowship. + +# References