Skip to content

Commit

Permalink
Merge pull request #32 from kyleniemeyer/patch-1
Browse files Browse the repository at this point in the history
Fixes for JOSS paper
  • Loading branch information
lorenzoschena authored Oct 17, 2024
2 parents b754355 + 97af41b commit afd7f90
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 27 deletions.
69 changes: 47 additions & 22 deletions paper/paper.bib
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,30 @@ @Article{Huang2001
publisher = {Wiley},
}

@article{sieber_paschereit_oberleithner_2016, title={Spectral proper orthogonal decomposition}, volume={792}, DOI={10.1017/jfm.2016.103}, journal={Journal of Fluid Mechanics}, publisher={Cambridge University Press}, author={Sieber, Moritz and Paschereit, C. Oliver and Oberleithner, Kilian}, year={2016}, pages={798–828}}
@article{sieber_paschereit_oberleithner_2016,
title={Spectral proper orthogonal decomposition},
volume={792},
DOI={10.1017/jfm.2016.103},
journal={Journal of Fluid Mechanics},
publisher={Cambridge University Press},
author={Sieber, Moritz and Paschereit, C. Oliver and Oberleithner, Kilian},
year={2016},
pages={798–828}
}

@article{berkooz_proper_1993,
author = {Ranc, Nicolas and Blanche, Antoione and Ryckelynck, D. and Chrysochoos, Andre},
year = {2014},
month = {04},
pages = {},
month = apr,
pages = {725--739},
title = {POD Preprocessing of IR Thermal Data to Assess Heat Source Distributions},
volume = {55},
journal = {Experimental Mechanics},
doi = {10.1007/s11340-014-9858-2}
}

@article{ninni_modulo_2020,
author = {Ninni, Davide and Mendez, Miguel},
author = {Ninni, Davide and Mendez, Miguel A.},
year = {2020},
month = {12},
pages = {100622},
Expand All @@ -37,23 +46,44 @@ @article{ninni_modulo_2020
doi = {10.1016/j.softx.2020.100622}
}

@article{mendez_balabane_buchlin_2019, title={Multi-scale proper orthogonal decomposition of complex fluid flows}, volume={870}, DOI={10.1017/jfm.2019.212}, journal={Journal of Fluid Mechanics}, publisher={Cambridge University Press}, author={Mendez, M. A. and Balabane, M. and Buchlin, J.-M.}, year={2019}, pages={988–1036}}
@article{mendez_balabane_buchlin_2019,
title={Multi-scale proper orthogonal decomposition of complex fluid flows},
volume={870},
DOI={10.1017/jfm.2019.212},
journal={Journal of Fluid Mechanics},
publisher={Cambridge University Press},
author={Mendez, Miguel A. and Balabane, M. and Buchlin, J.-M.},
year={2019},
pages={988–1036}
}

@article{schmid_2010,
title={Dynamic mode decomposition of numerical and experimental data},
volume={656},
DOI={10.1017/S0022112010001217},
journal={Journal of Fluid Mechanics},
publisher={Cambridge University Press},
author={Schmid, Peter J.}, year={2010}, pages={5–28}}
author={Schmid, Peter J.},
year={2010},
pages={5–28}
}

@article{Towne_2018, title={Spectral proper orthogonal decomposition and its relationship to dynamic mode decomposition and resolvent analysis}, volume={847}, DOI={10.1017/jfm.2018.283}, journal={Journal of Fluid Mechanics}, publisher={Cambridge University Press}, author={Towne, Aaron and Schmidt, Oliver T. and Colonius, Tim}, year={2018}, pages={821–867}}
@article{Towne_2018,
title={Spectral proper orthogonal decomposition and its relationship to dynamic mode decomposition and resolvent analysis},
volume={847},
DOI={10.1017/jfm.2018.283},
journal={Journal of Fluid Mechanics},
publisher={Cambridge University Press},
author={Towne, Aaron and Schmidt, Oliver T. and Colonius, Tim},
year={2018},
pages={821–867}
}

@article{mendez_2023,
author = {Mendez, Miguel},
author = {Mendez, Miguel A.},
year = {2023},
month = {01},
pages = {},
month = jan,
pages = {042001},
title = {Linear and Nonlinear Dimensionality Reduction from Fluid Mechanics to Machine Learning},
volume = {34},
journal = {Measurement Science and Technology},
Expand Down Expand Up @@ -93,15 +123,18 @@ @misc{py_POD
}


@article{Mengaldo2021, doi = {10.21105/joss.02862},
@article{Mengaldo2021,
doi = {10.21105/joss.02862},
url = {https://doi.org/10.21105/joss.02862},
year = {2021}, publisher = {The Open Journal},
year = {2021},
publisher = {The Open Journal},
volume = {6},
number = {60},
pages = {2862},
author = {Gianmarco Mengaldo and Romit Maulik},
title = {PySPOD: A Python package for Spectral Proper Orthogonal Decomposition (SPOD)},
journal = {Journal of Open Source Software} }
title = {{PySPOD: A Python package for Spectral Proper Orthogonal Decomposition (SPOD)}},
journal = {Journal of Open Source Software}
}
@misc{SpyOD,
title = {Spectral Proper Orthogonal Decomposition},
Expand All @@ -126,11 +159,3 @@ @article{rogowski2024unlocking
Program summary
Program Title: PySPOD CPC Library link to program files: https://doi.org/10.17632/jf5bf26jcj.1 Developer's repository link: https://github.com/MathEXLab/PySPOD Licensing provisions: MIT License Programming language: Python Nature of problem: Large spatio-temporal datasets may contain coherent patterns that can be leveraged to better understand, model, and possibly predict the behavior of complex dynamical systems. To this end, modal decomposition methods, such as the proper orthogonal decomposition (POD) and its spectral counterpart (SPOD), constitute powerful tools. The SPOD algorithm allows the systematic identification of space-time coherent patterns. This can be used to understand better the physics of the process of interest, and provide a path for mathematical modeling, including reduced order modeling. The SPOD algorithm has been successfully applied to fluid dynamics, geophysics and other domains. However, the existing open-source implementations are serial, and they prevent running on the increasingly large datasets that are becoming available, especially in computational physics. The inability to analyze via SPOD large dataset in turn prevents unlocking novel mechanisms and dynamical behaviors in complex systems. Solution method: We provide an open-source parallel (MPI distributed) code, namely PySPOD, that is able to run on large datasets (the ones considered in the present paper reach about 200 Terabytes). The code is built on the previous serial open-source code PySPOD that was published in https://joss.theoj.org/papers/10.21105/joss.02862.pdf. The new parallel implementation is able to scale on several nodes (we show both weak and strong scalability) and solve some of the bottlenecks that are commonly found at the I/O stage. The current parallel code allows running on datasets that was not easy or possible to analyze with serial SPOD algorithms, hence providing a path towards unlocking novel findings in computational physics. Additional comments including restrictions and unusual features: The code comes with a set of built-in postprocessing tools, for visualizing the results. It also comes with extensive continuous integration, documentation, and tutorials, as well as a dedicated website in addition to the associated GiHub repository. Within the package we also provide a parallel implementation of the proper orthogonal decomposition (POD), that leverages the I/O parallel capabilities of the SPOD algorithm.}
}

@article{Mendez_Balabane_Buchlin_2019,
title={Multi-scale proper orthogonal decomposition of complex fluid flows},
volume={870}, DOI={10.1017/jfm.2019.212},
journal={Journal of Fluid Mechanics},
author={Mendez, M. A. and Balabane, M. and Buchlin, J.-M.},
year={2019},
pages={988–1036}}
12 changes: 7 additions & 5 deletions paper/paper.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,16 @@ bibliography: paper.bib

# Summary
Dimensionality reduction is an essential tool in processing large datasets, enabling data compression, pattern recognition, and reduced-order modeling. Many linear tools for dimensionality reduction have been developed in fluid mechanics, where they have been formulated to identify coherent structures and build reduced-order models of turbulent flows [@berkooz_proper_1993].
This work proposes a major upgrade of the software package MODULO (MODal mULtiscale pOd,[@ninni_modulo_2020]), which was designed to perform Multiscale Proper Orthogonal Decomposition (mPOD)[@mendez_balabane_buchlin_2019]. In addition to implementing the classic Fourier Transform (DFT) and Proper Orthogonal Decomposition (POD), MODULO now also allows for computing Dynamic Mode Decomposition (DMD) [@schmid_2010] as well as the Spectral POD by [@sieber_paschereit_oberleithner_2016], the Spectral POD by [@Towne_2018] and a generalized kernel-based decomposition akin to kernel PCA [@mendez_2023]. All algorithms are wrapped in a ‘SciKit’-like Python API, which allows computing all decompositions in one line of code. Documentation, exercises, and video tutorials are also provided to offer a primer on data drive modal analysis.
This work proposes a major upgrade of the software package MODULO (MODal mULtiscale pOd) [@ninni_modulo_2020], which was designed to perform Multiscale Proper Orthogonal Decomposition (mPOD) [@mendez_balabane_buchlin_2019]. In addition to implementing the classic Fourier Transform (DFT) and Proper Orthogonal Decomposition (POD), MODULO now also allows for computing Dynamic Mode Decomposition (DMD) [@schmid_2010] as well as the Spectral POD by @sieber_paschereit_oberleithner_2016, the Spectral POD by @Towne_2018 and a generalized kernel-based decomposition akin to kernel PCA [@mendez_2023]. All algorithms are wrapped in a ‘SciKit’-like Python API, which allows computing all decompositions in one line of code. Documentation, exercises, and video tutorials are also provided to offer a primer on data drive modal analysis.

# Statement of Need
As extensively illustrated in recent reviews [@mendez_2023], [@Taira2020], all modal decompositions can be considered as special matrix factorizations. The matrix being factorized collects (many) snapshots (samples) of a high-dimensional variable. The factorization provides a basis for the matrix's column and row spaces to identify the most essential patterns (modes) according to a certain criterion. In what follows, we will refer to common terminologies in fluid dynamics. Nevertheless, it is worth stressing that these tools can be applied to any high-dimensional dataset to identify patterns and build reduced-order models [@Mendez_Balabane_Buchlin_2019]. In the common arrangement encountered in fluid dynamics, the basis for the column space is a set of ‘spatial structures’ while the basis for the row space is a set of `temporal structures'. These are paired by a scalar, which defines their relative importance. The POD, closely related to Principal Component Analysis, yields modes with the highest energy (variance) content and, in addition, guarantees their orthonormality by construction.
As extensively illustrated in recent reviews [@mendez_2023; @Taira2020], all modal decompositions can be considered as special matrix factorizations. The matrix being factorized collects (many) snapshots (samples) of a high-dimensional variable. The factorization provides a basis for the matrix's column and row spaces to identify the most essential patterns (modes) according to a certain criterion. In what follows, we will refer to common terminologies in fluid dynamics. Nevertheless, it is worth stressing that these tools can be applied to any high-dimensional dataset to identify patterns and build reduced-order models [@mendez_balabane_buchlin_2019]. In the common arrangement encountered in fluid dynamics, the basis for the column space is a set of ‘spatial structures’ while the basis for the row space is a set of `temporal structures'. These are paired by a scalar, which defines their relative importance. The POD, closely related to Principal Component Analysis, yields modes with the highest energy (variance) content and, in addition, guarantees their orthonormality by construction.
In the DFT, as implemented in MODULO, modes are defined to evolve as orthonormal complex exponential in time. This implies that the associated frequencies are integer multiples of a fundamental tone. The DMD generalizes the DFT by releasing the orthogonality constraint and considering complex frequencies, i.e., modes that can vanish or explode.
Both the constraint of energy optimality and harmonic modes can lead to poor convergence and feature detection performances. This motivated the development of hybrid methods such as the Spectral POD by [@Towne_2018], Spectral POD by [@sieber_paschereit_oberleithner_2016], and Multiscale Proper Orthogonal Decomposition (mPOD)[@mendez_balabane_buchlin_2019]. The first can be seen as an optimally averaged DMD, while the second combines POD and DFT with a filtering operation. Both SPODs assume statistically stationary data and are designed to identify harmonic (or quasi-harmonic) modes. The mPOD combines POD with Multi-resolution Analysis (MRA) to provide optimal modes within a prescribed frequency band. The mPOD modes are thus spectrally less narrow than those obtained by the SPODs, but this allows for localizing them in time (i.e., potentially having compact support in time).
Finally, recent developments in nonlinear methods such as kernel PCA and their applications to fluid dynamics (see [@mendez_2023]) have motivated the interest in the connection between nonlinear methods and the most general Karhunen-Loeve expansion (KL). This generalizes the POD as the decomposition of data onto the eigenfunction of a kernel function (the POD being a KL for the case of linear kernel).
Both the constraint of energy optimality and harmonic modes can lead to poor convergence and feature detection performances. This motivated the development of hybrid methods such as the Spectral POD by @Towne_2018, Spectral POD by @sieber_paschereit_oberleithner_2016, and Multiscale Proper Orthogonal Decomposition (mPOD) [@mendez_balabane_buchlin_2019]. The first can be seen as an optimally averaged DMD, while the second combines POD and DFT with a filtering operation. Both SPODs assume statistically stationary data and are designed to identify harmonic (or quasi-harmonic) modes. The mPOD combines POD with Multi-resolution Analysis (MRA) to provide optimal modes within a prescribed frequency band. The mPOD modes are thus spectrally less narrow than those obtained by the SPODs, but this allows for localizing them in time (i.e., potentially having compact support in time).
Finally, recent developments in nonlinear methods such as kernel PCA and their applications to fluid dynamics (see @mendez_2023) have motivated the interest in the connection between nonlinear methods and the most general Karhunen-Loeve expansion (KL). This generalizes the POD as the decomposition of data onto the eigenfunction of a kernel function (the POD being a KL for the case of linear kernel).


MODULO provides a unified tool to carry out different decompositions with a shared API. This simplifies comparing different techniques and streamlines their application to a given dataset (problem). In addition, it is the only package that includes the mPOD and the generalized KL with kernel functions interfacing with SciKit-learn. For decomposition-specific packages, we refer the reader to many excellent Python APIs that are available to compute the POD, DMD, and both SPODs, for example [@py_DMD], [@Mengaldo2021], [@SpyOD], [@rogowski2024unlocking].
MODULO provides a unified tool to carry out different decompositions with a shared API. This simplifies comparing different techniques and streamlines their application to a given dataset (problem). In addition, it is the only package that includes the mPOD and the generalized KL with kernel functions interfacing with SciKit-learn. For decomposition-specific packages, we refer the reader to many excellent Python APIs that are available to compute the POD, DMD, and both SPODs, for example [@py_DMD; @Mengaldo2021; @SpyOD; @rogowski2024unlocking].


# New Features
Expand All @@ -68,3 +68,5 @@ MODULO is a versatile and user-friendly toolbox for data-driven modal decomposit

# Acknowledgements
R. Poletti and L. Schena are supported by Fonds Wetenschappelijk Onderzoek (FWO), grant numbers 1SD7823N and 1S75825N, respectively.

# References

0 comments on commit afd7f90

Please sign in to comment.