diff --git a/joss_paper/README b/joss_paper/README new file mode 100644 index 000000000..46dcfe919 --- /dev/null +++ b/joss_paper/README @@ -0,0 +1,7 @@ +Compile it locally with + +`docker run --rm --volume $PWD/joss_paper:/data --user $(id -u):$(id -g) --env JOURNAL=joss openjournals/inara` + +More instructions on + +https://joss.readthedocs.io/en/latest/submitting.html#example-paper-and-bibliography \ No newline at end of file diff --git a/joss_paper/paper.bib b/joss_paper/paper.bib new file mode 100644 index 000000000..b14a11504 --- /dev/null +++ b/joss_paper/paper.bib @@ -0,0 +1,360 @@ +@article{Chalkis:2021, + author = {Apostolos Chalkis and Vissarion Fisikopoulos}, + title = {{volesti: Volume Approximation and Sampling for Convex + Polytopes in R}}, + year = {2021}, + journal = {{The R Journal}}, + doi = {10.32614/RJ-2021-077}, + url = {https://doi.org/10.32614/RJ-2021-077}, + pages = {642--660}, + volume = {13}, + number = {2} +} + +@article{Chalkis_dingo:2023, + author = {Apostolos Chalkis and Vissarion Fisikopoulos and Elias Tsigaridas and Haris Zafeiropoulos}, + title = {dingo: a Python package for metabolic flux sampling}, + elocation-id = {2023.06.18.545486}, + year = {2023}, + doi = {10.1101/2023.06.18.545486}, + publisher = {Cold Spring Harbor Laboratory}, + URL = {https://www.biorxiv.org/content/early/2023/06/20/2023.06.18.545486}, + eprint = {https://www.biorxiv.org/content/early/2023/06/20/2023.06.18.545486.full.pdf}, + journal = {bioRxiv} +} + +@article{Iyengar:1988, + author = {S. Iyengar}, + journal = {SIAM Journal on Scientific and Statistical Computing}, + number = {3}, + pages = {418--423}, + title = {Evaluation of Normal Probabilities of Symmetric + Regions}, + volume = {9}, + year = {1988}, + url = {https://doi.org/10.1137/0909028}, +} + +@article{Somerville:1998, + author = {P.N. Somerville}, + journal = {Journal of Computational and Graphical Statistics}, + number = {4}, + pages = {529-544}, publisher = {Taylor \& Francis}, + title = {Numerical Computation of Multivariate Normal and + Multivariate-t Probabilities over Convex Regions}, + volume = {7}, + year = {1998}, + url = {https://doi.org/10.1080/10618600.1998.10474793}, +} + +@book{Genz:2009, + author = {Genz, A. and Bretz, F.}, + edition = {1st}, + publisher = {Springer Publishing Company, Incorporated}, + title = {Computation of Multivariate Normal and t + Probabilities}, + year = {2009}, + isbn = {364201688X, 9783642016882}, +} + +@article{Schellenberger:2009, + author = {J. Schellenberger and B.O. Palsson}, + journal = {The Journal of biological Chemistry}, + pages = {5457-61}, + title = {Use of Randomized Sampling for Analysis of Metabolic + Networks}, + volume = {284 9}, + year = {2009}, + url = {https://doi.org/10.1074/jbc.R800048200}, +} + +@article{Venzke:2019, + author = {A. Venzke and D.K. Molzahn and S. Chatzivasileiadis}, + journal = {Electric Power Systems Research}, + pages = {106614}, + title = {Efficient creation of datasets for data-driven power + system applications}, + volume = {190}, + year = {2021}, + issn = {0378-7796}, + url = {https://doi.org/10.1016/j.epsr.2020.106614}, +} + +@article{Dyer:1988, + author = {Dyer, M. and Frieze, A.}, + journal = {SIAM Journal on Computing}, + number = {5}, + pages = {967-974}, + title = {On the Complexity of Computing the Volume of a + Polyhedron}, + volume = {17}, + year = {1988}, + url = {https://doi.org/10.1137/0217060}, +} + +@article{Chen:2018, + author = {Y. Chen and R. Dwivedi and M.J. Wainwright and B. Yu}, + journal = {Journal of Machine Learning Research}, + number = {55}, + pages = {1--86}, + title = {Fast {MCMC} Sampling Algorithms on Polytopes}, + volume = {19}, + year = {2018}, + url = {http://jmlr.org/papers/v19/18-158.html}, +} + +@inproceedings{Lee:2018, + author = {Y.T. Lee and S. Vempala}, + booktitle = {Proceedings of the 50th Annual ACM SIGACT Symposium + on Theory of Computing}, + pages = {1115--1121}, + series = {STOC 2018}, + title = {Convergence Rate of {R}iemannian {H}amiltonian {Monte + Carlo} and Faster Polytope Volume Computation}, + year = {2018}, + isbn = {978-1-4503-5559-9}, + url = {https://doi.org/10.1145/3188745.3188774}, +} + +@inproceedings{Mangoubi:2019, + author = {O. {Mangoubi} and N. K. {Vishnoi}}, + booktitle = {2019 IEEE 60th Annual Symposium on Foundations of + Computer Science (FOCS)}, + pages = {1338-1357}, + title = {Faster Polytope Rounding, Sampling, and Volume + Computation via a Sub-Linear Ball Walk}, + year = {2019}, + url = {https://doi.org/10.1109/FOCS.2019.00082}, +} + +@article{Lovasz:2006, + address = {Philadelphia, PA, USA}, + author = {Lov\'{a}sz, L. and Vempala, S.}, + journal = {SIAM Journal on Computing}, + number = {4}, + pages = {985-1005}, + publisher = {Society for Industrial and Applied Mathematics}, + title = {Hit-and-Run from a Corner}, + volume = {35}, + year = {2006}, + issn = {0097-5397}, + url = {https://doi.org/10.1137/S009753970544727X}, +} + +@article{Emiris:2014, + address = {New York, USA}, + author = {{I.Z.} Emiris and V. Fisikopoulos}, + journal = {ACM Transactions of Mathematical Software, 2018}, + number = {4}, + pages = {38:1--38:21}, + publisher = {ACM}, + title = {Practical Polytope Volume Approximation}, + volume = {44}, + year = {2014}, + issn = {0098-3500}, + url = {https://doi.org/10.1145/3194656}, +} + +@article{Cousins:2015, + address = {Berlin}, + author = {B. Cousins and S. Vempala}, + journal = {Mathematical Programming Computation}, + month = {Jun}, + number = {2}, + publisher = {Springer-Verlag}, + title = {A Practical Volume Algorithm}, + volume = {8}, + year = {2016}, + url = {https://doi.org/10.1007/s12532-015-0097-z}, +} + +@article{Chalkis_volume:2023, +author = {Chalkis, Apostolos and Emiris, Ioannis Z. and Fisikopoulos, Vissarion}, +title = {A Practical Algorithm for Volume Estimation based on Billiard Trajectories and Simulated Annealing}, +year = {2023}, +issue_date = {December 2023}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {28}, +issn = {1084-6654}, +url = {https://doi.org/10.1145/3584182}, +doi = {10.1145/3584182}, +journal = {ACM J. Exp. Algorithmics}, +month = {may}, +articleno = {1.3}, +numpages = {34}, +keywords = {volume approximation, mathematical software, randomized algorithm, billiard trajectories, polytope representations, sampling, Random walk} +} + +@MISC{eigen, + author = {Ga\"{e}l Guennebaud and Beno\^{i}t Jacob and others}, + title = {Eigen v3}, + howpublished = {http://eigen.tuxfamily.org}, + year = {2010} + } + +@MISC{mkl, + title = {Intel Math Kernel Library (Intel MKL)}, + howpublished = {https://software.intel.com/en-us/intel-mkl}, + year = {2024} + } + + @book{Ziegler:1995, + address = {New York}, + author = {Ziegler, Günter M.}, + booktitle = {Graduate texts in mathematics, 152}, + description = {Lectures on Polytopes}, + issn = {0387943293 9780387943299 3540943293 9783540943297 038794365X 9780387943657 354094365X 9783540943655}, + publisher = {Springer-Verlag}, + title = {Lectures on polytopes}, + year = 1995 +} + +@article{Ramana:1999, +author = {Ramana, Motakuri and Goldman, A.}, +year = {1999}, +month = {02}, +pages = {}, +title = {Some Geometric Results in Semidefinite Programming}, +volume = {7}, +journal = {Journal of Global Optimization}, +doi = {10.1007/BF01100204} +} + +@misc{Spallitta:2024, + title={Enhancing SMT-based Weighted Model Integration by Structure Awareness}, + author={Giuseppe Spallitta and Gabriele Masina and Paolo Morettin and Andrea Passerini and Roberto Sebastiani}, + year={2024}, + eprint={2302.06188}, + archivePrefix={arXiv}, + primaryClass={cs.AI} +} + +@inproceedings{Kook:2022, + author = {Kook, Yunbum and Lee, Yin-Tat and Shen, Ruoqi and Vempala, Santosh}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh}, + pages = {31684--31696}, + publisher = {Curran Associates, Inc.}, + title = {Sampling with Riemannian Hamiltonian Monte Carlo in a Constrained Space}, + url = {https://proceedings.neurips.cc/paper_files/paper/2022/file/cdaa7f07b0c5a7803927d20aa717132e-Paper-Conference.pdf}, + volume = {35}, + year = {2022} +} + +@article{Chalkis_hmc:2023, +author = {Chalkis, Apostolos and Fisikopoulos, Vissarion and Papachristou, Marios and Tsigaridas, Elias}, +title = {Truncated Log-concave Sampling for Convex Bodies with Reflective Hamiltonian Monte Carlo}, +year = {2023}, +issue_date = {June 2023}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {49}, +number = {2}, +issn = {0098-3500}, +url = {https://doi.org/10.1145/3589505}, +doi = {10.1145/3589505}, +abstract = {We introduce Reflective Hamiltonian Monte Carlo (ReHMC), an HMC-based algorithm to sample from a log-concave distribution restricted to a convex body. The random walk is based on incorporating reflections to the Hamiltonian dynamics such that the support of the target density is the convex body. We develop an efficient open source implementation of ReHMC and perform an experimental study on various high-dimensional datasets. The experiments suggest that ReHMC outperforms Hit-and-Run and Coordinate-Hit-and-Run regarding the time it needs to produce an independent sample, introducing practical truncated sampling in thousands of dimensions.}, +journal = {ACM Trans. Math. Softw.}, +month = {jun}, +articleno = {16}, +numpages = {25}, +keywords = {Statistical software, truncated sampling, geometric random walks, experiments, mixing time} +} + +@article{Chalkis_spectra:2022, +title = {Efficient sampling in spectrahedra and volume approximation}, +journal = {Linear Algebra and its Applications}, +volume = {648}, +pages = {205-232}, +year = {2022}, +issn = {0024-3795}, +doi = {https://doi.org/10.1016/j.laa.2022.04.002}, +url = {https://www.sciencedirect.com/science/article/pii/S0024379522001471}, +author = {Apostolos Chalkis and Ioannis Z. Emiris and Vissarion Fisikopoulos and Panagiotis Repouskos and Elias Tsigaridas}, +keywords = {Spectahedron, Semidefinite-programming, Sampling, Random walk, Monte Carlo, Polynomial eigenvalue problem, Volume approximation, Optimization}, +abstract = {We present algorithmic, complexity, and implementation results on the problem of sampling points from a spectrahedron, that is, the feasible region of a semidefinite program. Our main tool is geometric random walks. We analyze the arithmetic and bit complexity of certain primitive geometric operations that are based on the algebraic properties of spectrahedra and the polynomial eigenvalue problem. This study leads to the implementation of a broad collection of random walks for sampling from spectrahedra that experimentally show faster mixing times than methods currently employed either in theoretical studies or in applications, including the popular family of Hit-and-Run walks. The different random walks offer a variety of advantages, thus allowing us to efficiently sample from general probability distributions, for example the family of log-concave distributions which arise in numerous applications. We focus on two major applications of independent interest: (i) approximate the volume of a spectrahedron, and (ii) compute the expectation of functions coming from robust optimal control. We exploit efficient linear algebra algorithms and implementations to address the aforementioned computations in very high dimension. In particular, we provide a C++ open source implementation of our methods that scales efficiently, for the first time, up to dimension 200. We illustrate its efficiency on various data sets.} +} + + + @inproceedings{cftz-socg021, + author = {Apostolos Chalkis and + Vissarion Fisikopoulos and + Elias P. Tsigaridas and + Haris Zafeiropoulos}, + editor = {Kevin Buchin and + {\'{E}}ric Colin de Verdi{\`{e}}re}, + title = {Geometric Algorithms for Sampling the Flux Space of Metabolic Networks}, + booktitle = {37th International Symposium on Computational Geometry, SoCG 2021, + June 7-11, 2021, Buffalo, NY, {USA} (Virtual Conference)}, + series = {LIPIcs}, + volume = {189}, + pages = {21:1--21:16}, + publisher = {Schloss Dagstuhl - Leibniz-Zentrum f{\"{u}}r Informatik}, + year = {2021}, + url = {https://doi.org/10.4230/LIPIcs.SoCG.2021.21}, + doi = {10.4230/LIPICS.SOCG.2021.21}, + +} + + +@article{ccef-crises-j, + title={Practical volume approximation of high-dimensional convex bodies, applied to modeling portfolio dependencies and financial crises}, + author={Cal{\`e}s, Ludovic and Chalkis, Apostolos and Emiris, Ioannis Z and Fisikopoulos, Vissarion}, + journal={Computational Geometry}, + volume={109}, + pages={101916}, + year={2023}, + publisher={Elsevier} +} + + + + @inproceedings{bcft-aistats-23, + title={Randomized geometric tools for anomaly detection in stock markets}, + author={Bachelard, Cyril and Chalkis, Apostolos and Fisikopoulos, Vissarion and Tsigaridas, Elias}, + booktitle={International Conference on Artificial Intelligence and Statistics}, + pages={9400--9416}, + year={2023}, + organization={PMLR} +} + + + @article{bcft-arxiv-24, + title={Randomized Control in Performance Analysis and Empirical Asset Pricing}, + author={Bachelard, Cyril and Chalkis, Apostolos and Fisikopoulos, Vissarion and Tsigaridas, Elias}, + journal={arXiv preprint arXiv:2403.00009}, + year={2024} +} + +@inproceedings{co-alenex-2021, + title={Computation of large asymptotics of 3-manifold quantum invariants}, + author={Maria, Cl{\'e}ment and Rouill{\'e}, Owen}, + booktitle={2021 Proceedings of the Workshop on Algorithm Engineering and Experiments (ALENEX)}, + pages={193--206}, + year={2021}, + organization={SIAM} +} + + +@article{vm-fods-2022, + title={Multiple hypothesis testing with persistent homology}, + author={Vejdemo-Johansson, Mikael and Mukherjee, Sayan}, + journal={Foundations of Data Science}, + volume={4}, + number={4}, + pages={667--705}, + year={2022}, + publisher={Foundations of Data Science} +} + + + + @PhdThesis{pham-phd-2024, + author = {Long Pham}, + title = {Hybrid Resource-Bound +Analyses of Programs}, + school = {Carnegie Mellon University}, + year = 2024, + note = {(PhD thesis proposal)}} + diff --git a/joss_paper/paper.md b/joss_paper/paper.md new file mode 100644 index 000000000..8fff2bd93 --- /dev/null +++ b/joss_paper/paper.md @@ -0,0 +1,106 @@ +--- +title: 'volesti: C++ library for sampling and volume computation on convex bodies' +tags: + - C++ + - geometry + - randomization + - Monte-Carlo methods + - convexity +authors: + - name: Apostolos Chalkis + orcid: 0000-0000-0000-0000 + equal-contrib: true + affiliation: "2, 4" # (Multiple affiliations must be quoted) + - name: Vissarion Fisikopoulos + corresponding: true # (This is how to denote the corresponding author) + equal-contrib: true # (This is how you can denote equal contributions between multiple authors) + affiliation: "1, 4" + - name: Marios Papachristou + equal-contrib: true # (This is how you can denote equal contributions between multiple authors) + affiliation: 5 + - name: Elias Tsigaridas + equal-contrib: true # (This is how you can denote equal contributions between multiple authors) + affiliation: "3, 4" +affiliations: + - name: National & Kapodistrian University of Athens, Greece + index: 1 + - name: Quantagonia + index: 2 + - name: Inria Paris and IMJ-PRG, Sorbonne Universit\`e and Paris Universit\`e + index: 3 + - name: GeomScale + index: 4 + - name: Cornell University + index: 5 +date: 11 March 2024 +bibliography: paper.bib + +--- + +# Summary + +Sampling from high-dimensional distributions and volume approximation of convex +bodies are fundamental operations that appear in optimization, finance, +engineering, artificial intelligence, and machine learning. +In this paper, we present `volesti`, a C++ library that provides efficient, randomized algorithms for +volume estimation---a special case of integration---as well as general logconcave sampling and +rounding for convex bodies. +Since the implemented methods are Monte-Carlo algorithms the library also provides several +high-dimensional statistical tests. + +`volesti`'s focus is scalability in high dimensions that could be in the order of hundreds or thousands +depending on the problem. +Another novelty of the library is the variety of handling inputs. +`volesti` supports three different types of polyhedra [@Ziegler:1995], specrahedra [Ramana:1999] +and general non-linear convex objects. + +`volesti` relies on `Eigen` library [@eigen] for linear algebra but also support `MKL` optimizations [@mkl]. +There are R and Python interfaces to `volesti` available [@Chalkis:2021, @Chalkis_dingo:2023] +although not all C++ features are exposed in those interfaces. + +# Statement of need + +High-dimensional sampling from multivariate distributions with Markov Chain Monte Carlo (MCMC) +algorithms is a fundamental problem with many applications in science and engineering [@Iyengar:1988; +@Somerville:1998; @Genz:2009; @Schellenberger:2009]. +In particular, multivariate integration over a convex set and volume approximation of such sets +have accumulated a broad amount of effort over the last decades. +Nevertheless, those problems are computationally hard for general dimensions [@Dyer:1988]. +MCMC algorithms have made remarkable progress efficiently solving the problems of sampling and +volume estimation of convex bodies while enjoying great theoretical guarantees [@Chen:2018; @Lee:2018; +@Mangoubi:2019]. However, theoretical algorithms cannot be applied efficiently as is to real-life +computations. +Therefore, practical algorithms have been designed by relaxing the theoretical guarantees and +applying new algorithmic and statistical techniques to perform efficiently while at the same time +meeting the requirements for high accuracy results [@Emiris:2014; @Cousins:2015; @Chalkis_volume:2023; +@Kook:2022]. +The volume method of @Cousins:2015 and the sampling method of @Kook:2022 are available as `MATLAB` +packages. +All the methods mentioned above are implemented in `volesti` as well as the sampling algorithm by +@Chalkis_hmc:2023 and the algorithms for spectahedra by @Chalkis_spectra:2022. + +Q: what does the package offer you? + various sampling techniques based on geometric walks (Billard, Hamiltonian, etc), + for various distributions (uniform, log-concave, exponential, Gaussian) + constrained in various convex domains (hypercubes, zonotopes, general polytopes (in H and V representations), spectrahedra, + using these we can do volume, integration, and applications. + + +The problems (theoretical and practical) that we solved using volesti + metabolic (sampling flux in the human body metabolic network) [@cftz-socg021], + finance (e.g., to model financial crises [@ccef-crises-j], + to detect low volatility anamalies in stock markets [@bcft-aistats-23], + to introduct randomized control in asset pricing and portfolio performance evaluation [@bcft-arxiv-24]) , spectrahedra+sdp [@Chalkis_spectra:2022] + +To our knowledge `volesti` has been used in conducting research in electric power systems [@Venzke:2019], +in probabilistic inference [@Spallitta:2024], + and resource analysis on programs [@pham-phd-2024]; +but even for more theoretical and mathematical endeavours, like the computation of topological invariants [@co-alenex-2021] + and persistent homology [@vm-fods-2022] + +# Acknowledgements + +We would like to thank the contributors to the `volesti` library for their valuable contributions and +feedback. + +# References \ No newline at end of file