diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 0000000..73e3827
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1 @@
+github: [wmayner]
diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
new file mode 100644
index 0000000..28c900d
--- /dev/null
+++ b/.github/workflows/build_wheels.yml
@@ -0,0 +1,27 @@
+name: Build wheels & run tests
+
+on: ['push', 'pull_request']
+
+jobs:
+ build_wheels:
+ name: Build wheels on ${{ matrix.os }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os: [ubuntu-20.04, windows-2019, macos-11]
+
+ steps:
+ - uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
+
+ - name: Build wheels
+ uses: pypa/cibuildwheel@v2.12.0
+ with:
+ package-dir: .
+ output-dir: wheelhouse
+ config-file: "{package}/pyproject.toml"
+
+ - uses: actions/upload-artifact@v3
+ with:
+ path: ./wheelhouse/*.whl
diff --git a/.github/workflows/make_sdist.yml b/.github/workflows/make_sdist.yml
new file mode 100644
index 0000000..b74d383
--- /dev/null
+++ b/.github/workflows/make_sdist.yml
@@ -0,0 +1,19 @@
+name: Make source distribution
+
+on: ['push', 'pull_request']
+
+jobs:
+ make_sdist:
+ name: Make source distribution
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
+
+ - name: Build source distribution
+ run: pipx run build --sdist
+
+ - uses: actions/upload-artifact@v3
+ with:
+ path: dist/*.tar.gz
diff --git a/.gitignore b/.gitignore
index 4731ce8..7fa6859 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
__pycache__
.gitconfig
.cache
+.pytest_cache
.tox
.env
.ropeproject
@@ -8,6 +9,9 @@ __pycache__
*.pyc
MANIFEST
*.egg*
+src/pyemd/emd.cpp
+src/pyemd/_version.py
build
dist
-pyemd/emd.cpp
+wheelhouse
+ignore
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 025a4ad..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-sudo: false
-language: python
-python:
- - '2.7'
- - '3.4'
- - '3.5'
- - '3.6'
-install:
- - pip install -r dev_requirements.txt
- - make build
- - pip uninstall --yes -r dev_requirements.txt
- - pip install tox-travis
-script: tox
-notifications:
- email: false
- slack:
- rooms:
- secure: rxQsNRK9XBkBV0pdYuJG+tsN2tky+JUEF5ayDIUAzSaPeB//VVNNofJhcmfNgG1WiEEi6fe0dR/Y6UDsoVyQrbCHO2q2bIVQp6A/63vgz3DcVQzMahB/QVwte7gy02nLf6rS2g3VetVXrTW6OO4Cv7NQrQb58biVFx/yBtQ3qzI=
- on_success: never
- on_failure: always
diff --git a/LICENSE b/LICENSE
index 797a2c5..e3fb122 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2014-2017 Will Mayner
+Copyright (c) 2014-2023 Will Mayner
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/MANIFEST.in b/MANIFEST.in
index 966751d..09593c5 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,4 @@
-graft pyemd
+graft src/pyemd
graft test
include README.rst
diff --git a/Makefile b/Makefile
index ddd77c9..8ba3cc0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,30 +1,49 @@
-.PHONY: default test build clean dist test-dist check-dist build-dist clean-dist
+.PHONY: default clean develop test dist-clean build-local build dist-upload dist-test-upload dist-sign dist-check
-src = pyemd
-dist_dir = dist
+src = src/pyemd
+test = test
+dist = dist
+wheelhouse = wheelhouse
-default: build
+default: test
-test: build
+test: develop
py.test
-build: clean
- python setup.py build_ext -b .
+develop: clean
+ python -m pip install -e ".[test,dist]"
clean:
- rm -f pyemd/*.so
+ rm -rf $(shell find . -name '__pycache__')
+ rm -rf $(shell find . -name '*.so')
+ rm -rf .eggs
+ rm -rf pyemd.egg-info
+ rm -rf build
-dist: build-dist check-dist
- twine upload $(dist_dir)/*
+dist-build-local:
+ python -m build
-test-dist: build-dist check-dist
- twine upload --repository-url https://test.pypi.org/legacy/ $(dist_dir)/*
+dist-build-wheels:
+ cibuildwheel --platform linux --config-file pyproject.toml
-check-dist:
- python setup.py check --restructuredtext --strict
+dist-upload: dist-sign
+ twine upload $(dist)/*
+ twine upload $(wheelhouse)/*
-build-dist: clean-dist
- python setup.py sdist bdist_wheel --dist-dir=$(dist_dir)
+dist-test-upload: dist-check
+ twine upload --repository-url https://test.pypi.org/simple/ testpypi $(dist)/*
+ twine upload --repository-url https://test.pypi.org/simple/ testpypi $(wheelhouse)/*
-clean-dist:
- rm -rf $(dist_dir)
+dist-sign: dist-check
+ gpg --detach-sign -a $(dist)/*.tar.gz
+ gpg --detach-sign -a $(wheelhouse)/*.whl
+
+dist-check:
+ twine check --strict $(dist)/*
+ twine check --strict $(wheelhouse)/*
+
+dist-clean:
+ rm -rf $(dist)
+
+dist-test-install:
+ pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple pyemd
diff --git a/README.rst b/README.rst
index 9ea95d8..015d654 100644
--- a/README.rst
+++ b/README.rst
@@ -1,15 +1,16 @@
-.. image:: https://img.shields.io/travis/wmayner/pyemd/develop.svg?style=flat-square&maxAge=3600
- :target: https://travis-ci.org/wmayner/pyemd
+.. image:: https://img.shields.io/github/actions/workflow/status/wmayner/pyemd/build_wheels.yml?style=flat-square&maxAge=86400
+ :target: https://github.com/wmayner/pyemd/actions/workflows/build_wheels.yml
+ :alt: Build status badge
.. image:: https://img.shields.io/pypi/pyversions/pyemd.svg?style=flat-square&maxAge=86400
- :target: https://wiki.python.org/moin/Python2orPython3
+ :target: https://pypi.org/project/pyemd/
:alt: Python versions badge
PyEMD: Fast EMD for Python
==========================
PyEMD is a Python wrapper for `Ofir Pele and Michael Werman's implementation
-`_ of the `Earth Mover's
-Distance `_ that allows
+`_ of the `Earth Mover's
+Distance `_ that allows
it to be used with NumPy. **If you use this code, please cite the papers listed
at the end of this document.**
@@ -54,8 +55,9 @@ You can also calculate the EMD directly from two arrays of observations:
>>> emd_samples(first_array, second_array, bins=2)
0.5
-Documentation
--------------
+
+API Documentation
+-----------------
emd()
~~~~~
@@ -75,8 +77,8 @@ emd()
*N*.
- ``distance_matrix`` *(np.ndarray)*: A 2D array of ``np.float64,`` of size at
least *N* × *N*. This defines the underlying metric, or ground distance, by
- giving the pairwise distances between the histogram bins. It must represent a
- metric; there is no warning if it doesn't.
+ giving the pairwise distances between the histogram bins.
+ **NOTE: It must represent a metric; there is no warning if it doesn't.**
*Keyword Arguments:*
@@ -84,8 +86,8 @@ emd()
resulting distance to be a metric, it should be at least half the diameter of
the space (maximum possible distance between any two points). If you want
partial matching you can set it to zero (but then the resulting distance is
- not guaranteed to be a metric). The default value is ``-1.0``, which means the
- maximum value in the distance matrix is used.
+ not guaranteed to be a metric). The default value is ``-1.0``, which means
+ the maximum value in the distance matrix is used.
*Returns:* *(float)* The EMD value.
@@ -123,18 +125,18 @@ emd_samples()
*Arguments:*
-- ``first_array`` *(Iterable)*: A 1D array of samples used to generate a
+- ``first_array`` *(Iterable)*: An array of samples used to generate a
histogram.
-- ``second_array`` *(Iterable)*: A 1D array of samples used to generate a
+- ``second_array`` *(Iterable)*: An array of samples used to generate a
histogram.
*Keyword Arguments:*
- ``extra_mass_penalty`` *(float)*: Same as for ``emd()``.
- ``distance`` *(string or function)*: A string or function implementing
- a metric on a 1D ``np.ndarray``. Defaults to the Euclidean distance. Currently
- limited to 'euclidean' or your own function, which must take a 1D array and
- return a square 2D array of pairwise distances.
+ a metric on a 1D ``np.ndarray``. Defaults to the Euclidean distance.
+ Currently limited to 'euclidean' or your own function, which must take
+ a 1D array and return a square 2D array of pairwise distances.
- ``normalized`` (*boolean*): If true (default), treat histograms as fractions
of the dataset. If false, treat histograms as counts. In the latter case the
EMD will vary greatly by array length.
@@ -147,11 +149,12 @@ emd_samples()
``first_array`` and ``second_array``. Note: if the given range is not a
superset of the default range, no warning will be given.
-*Returns:* *(float)* The EMD value between the histograms of ``first_array`` and
-``second_array``.
+*Returns:* *(float)* The EMD value between the histograms of ``first_array``
+and ``second_array``.
----
+
Limitations and Caveats
-----------------------
@@ -163,8 +166,8 @@ Limitations and Caveats
- The histograms and distance matrix must be numpy arrays of type
``np.float64``. The original C++ template function can accept any numerical
C++ type, but this wrapper only instantiates the template with ``double``
- (Cython converts ``np.float64`` to ``double``). If there's demand, I can add
- support for other types.
+ (Cython converts ``np.float64`` to ``double``). If there's demand, I can
+ add support for other types.
- ``emd_with_flow()``:
@@ -172,57 +175,27 @@ Limitations and Caveats
- ``emd_samples()``:
- - Using the default ``bins='auto'`` results in an extra call to
- ``np.histogram()`` to determine the bin lengths, since `the NumPy
- bin-selectors are not exposed in the public API
+ - With ``numpy < 1.15.0``, using the default ``bins='auto'`` results in an
+ extra call to ``np.histogram()`` to determine the bin lengths, since `the
+ NumPy bin-selectors are not exposed in the public API
`_. For performance, you may
- want to set the bins yourself.
-
-
-Contributing
-------------
-
-To help develop PyEMD, fork the project on GitHub and install the requirements
-with ``pip install -r requirements.txt``.
-
-The ``Makefile`` defines some tasks to help with development:
-
-- ``test``: Run the test suite
-- ``build`` Generate and compile the Cython extension
-- ``clean``: Remove the compiled Cython extension
-- ``default``: Run ``build``
-
-Tests for different Python environments can be run with ``tox``.
+ want to set the bins yourself. If ``numpy >= 1.15`` is available,
+ ``np.histogram_bin_edges()`` is called instead, which is more efficient.
Credit
------
- All credit for the actual algorithm and implementation goes to `Ofir Pele
- `_ and `Michael Werman
- `_. See the `relevant paper
- `_.
+ `_ and `Michael Werman
+ `_. See the `relevant paper
+ `_.
- Thanks to the Cython developers for making this kind of wrapper relatively
easy to write.
Please cite these papers if you use this code:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Ofir Pele and Michael Werman. A linear time histogram metric for improved SIFT
-matching. *Computer Vision - ECCV 2008*, Marseille, France, 2008, pp. 495-508.
-
-.. code-block:: latex
-
- @INPROCEEDINGS{pele2008,
- title={A linear time histogram metric for improved sift matching},
- author={Pele, Ofir and Werman, Michael},
- booktitle={Computer Vision--ECCV 2008},
- pages={495--508},
- year={2008},
- month={October},
- publisher={Springer}
- }
-
Ofir Pele and Michael Werman. Fast and robust earth mover's distances. *Proc.
2009 IEEE 12th Int. Conf. on Computer Vision*, Kyoto, Japan, 2009, pp. 460-467.
@@ -237,3 +210,18 @@ Ofir Pele and Michael Werman. Fast and robust earth mover's distances. *Proc.
month={September},
organization={IEEE}
}
+
+Ofir Pele and Michael Werman. A linear time histogram metric for improved SIFT
+matching. *Computer Vision - ECCV 2008*, Marseille, France, 2008, pp. 495-508.
+
+.. code-block:: latex
+
+ @INPROCEEDINGS{pele2008,
+ title={A linear time histogram metric for improved sift matching},
+ author={Pele, Ofir and Werman, Michael},
+ booktitle={Computer Vision--ECCV 2008},
+ pages={495--508},
+ year={2008},
+ month={October},
+ publisher={Springer}
+ }
diff --git a/conftest.py b/conftest.py
deleted file mode 100644
index 24326d9..0000000
--- a/conftest.py
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# conftest.py
-
-
-collect_ignore = ["setup.py", "build", "dist"]
diff --git a/dev_requirements.txt b/dev_requirements.txt
deleted file mode 100644
index 142a744..0000000
--- a/dev_requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-Cython >=0.20.2
diff --git a/dist_requirements.txt b/dist_requirements.txt
deleted file mode 100644
index 8409743..0000000
--- a/dist_requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-docutils
-pygments
-twine
-wheel
diff --git a/pyemd/__about__.py b/pyemd/__about__.py
deleted file mode 100644
index 40bffc2..0000000
--- a/pyemd/__about__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# __about__.py
-
-"""PyEMD metadata"""
-
-__title__ = 'pyemd'
-__version__ = '0.5.1'
-__description__ = ("A Python wrapper for Ofir Pele and Michael Werman's "
- "implementation of the Earth Mover's Distance.")
-__author__ = 'Will Mayner'
-__author_email__ = 'wmayner@gmail.com'
-__author_website__ = 'http://willmayner.com'
-__license__ = 'MIT'
-__copyright__ = 'Copyright (c) 2014-2017 Will Mayner'
-__url__ = 'http://github.com/wmayner/pyemd'
-
-__all__ = ['__title__', '__version__', '__description__', '__author__',
- '__author_email__', '__author_website__', '__license__',
- '__copyright__', '__url__']
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..f779140
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,45 @@
+[build-system]
+requires = [
+ "cython",
+ "oldest-supported-numpy",
+ "setuptools >= 45",
+ "setuptools_scm",
+ "wheel",
+]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pyemd"
+license = { file = "LICENSE" }
+description = "A Python wrapper for Ofir Pele and Michael Werman's implementation of the Earth Mover's Distance."
+authors = [{ name = "Will Mayner", email = "wmayner@gmail.com" }]
+requires-python = ">=3.7"
+dependencies = ["numpy >= 1.9.0"]
+readme = "README.rst"
+classifiers = [
+ 'Development Status :: 5 - Production/Stable',
+ 'Intended Audience :: Developers',
+ 'Natural Language :: English',
+ 'License :: OSI Approved :: MIT License',
+ 'Programming Language :: Python :: 3',
+]
+dynamic = ['version']
+
+[project.optional-dependencies]
+test = ['pytest']
+dist = ['build', 'cibuildwheel', 'setuptools_scm', 'twine']
+
+[project.urls]
+"Homepage" = "https://github.com/wmayner/pyemd"
+"Bug Tracker" = "https://github.com/wmayner/pyemd/issues"
+
+[tool.setuptools_scm]
+write_to = "src/pyemd/_version.py"
+local_scheme = "no-local-version"
+
+[tool.cibuildwheel]
+skip = ["cp36*", "pp*"]
+build-verbosity = 2
+before-build = "cd {project} && git describe"
+test-requires = ["pytest"]
+test-command = "py.test {project}"
diff --git a/pytest.ini b/pytest.ini
index facd2f7..0254159 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,2 +1,15 @@
[pytest]
-addopts = --color=yes --tb=auto --doctest-glob='*.rst' --doctest-modules -vv
+addopts =
+ --color=yes
+ --tb=auto
+ --doctest-glob='*.rst'
+ --doctest-modules -vv
+ --ignore setup.py
+norecursedirs =
+ src
+ dist
+ build
+ wheelhouse
+ ignore
+ .git
+ .tox
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index e0a2d12..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
--e .
--r test_requirements.txt
--r dev_requirements.txt
--r dist_requirements.txt
diff --git a/setup.py b/setup.py
index 65bc605..3fe6fd4 100644
--- a/setup.py
+++ b/setup.py
@@ -1,43 +1,57 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-import io
import os
+import platform
import sys
-from warnings import warn
+from distutils.sysconfig import get_config_var
from setuptools import Extension, setup
from setuptools.command.build_ext import build_ext as _build_ext
-from setuptools.command.sdist import sdist as _sdist
+from packaging.version import Version, parse as parse_version
-# Alias ModuleNotFound for Python <= 3.5
-if (sys.version_info[0] < 3 or
- (sys.version_info[0] == 3 and sys.version_info[1] < 6)):
- ModuleNotFoundError = ImportError
+def is_platform_mac():
+ return sys.platform == "darwin"
+
+
+# For macOS, ensure extensions are built for macOS 10.9 when compiling on a
+# 10.9 system or above, overriding distutils behaviour which is to target
+# the version that Python was built for. This may be overridden by setting
+# MACOSX_DEPLOYMENT_TARGET before calling setup.py
+if is_platform_mac() and "MACOSX_DEPLOYMENT_TARGET" not in os.environ:
+ current_system = parse_version(platform.mac_ver()[0])
+ python_target = parse_version(get_config_var("MACOSX_DEPLOYMENT_TARGET"))
+ mac_deployment_target = Version("10.9")
+ if (
+ python_target < mac_deployment_target
+ and current_system >= mac_deployment_target
+ ):
+ os.environ["MACOSX_DEPLOYMENT_TARGET"] = str(mac_deployment_target)
try:
from Cython.Build import cythonize as _cythonize
+
USE_CYTHON = True
except (ImportError, ModuleNotFoundError):
USE_CYTHON = False
-def cythonize(extensions, **_ignore):
+def cythonize(extensions, **kwargs):
# Attempt to use Cython
if USE_CYTHON:
- return _cythonize(extensions)
+ return _cythonize(extensions, **kwargs)
# Cython is not available
for extension in extensions:
sources = []
for sfile in extension.sources:
path, ext = os.path.splitext(sfile)
- if ext in ('.pyx', '.py'):
- if extension.language == 'c++':
- ext = '.cpp'
+ if ext in (".pyx", ".py"):
+ if extension.language == "c++":
+ ext = ".cpp"
else:
- ext = '.c'
+ ext = ".c"
sfile = path + ext
sources.append(sfile)
extension.sources[:] = sources
@@ -45,79 +59,43 @@ def cythonize(extensions, **_ignore):
EXTENSIONS = [
- Extension('pyemd.emd',
- sources=['pyemd/emd.pyx'],
- language="c++")
+ Extension(
+ "pyemd.emd",
+ sources=["src/pyemd/emd.pyx"],
+ language="c++",
+ )
]
-EXT_MODULES = cythonize(EXTENSIONS)
+EXT_MODULES = cythonize(EXTENSIONS, language_level=3)
-class sdist(_sdist):
- def run(self):
- # Make sure the compiled Cython files in the distribution are up-to-date
- if USE_CYTHON:
- _cythonize(EXTENSIONS)
- else:
- warn('\n\n\033[91m\033[1m WARNING: '
- 'IF YOU A PREPARING A DISTRIBUTION: Cython is not available! '
- 'The cythonized `*.cpp` files may be out of date. Please '
- 'install Cython and run `sdist` again.'
- '\033[0m\n')
- _sdist.run(self)
-
-
-# See http://stackoverflow.com/a/21621689/1085344
+# See https://stackoverflow.com/a/21621689/1085344
class build_ext(_build_ext):
def finalize_options(self):
_build_ext.finalize_options(self)
# Prevent numpy from thinking it is still in its setup process:
- if hasattr(__builtins__, '__NUMPY_SETUP__'):
+ if hasattr(__builtins__, "__NUMPY_SETUP__"):
__builtins__.__NUMPY_SETUP__ = False
import numpy
- self.include_dirs.append(numpy.get_include())
-
-
-CMDCLASS = {
- 'sdist': sdist,
- 'build_ext': build_ext
-}
+ self.include_dirs.append(numpy.get_include())
-with io.open('README.rst', encoding='utf-8') as f:
- README = f.read()
-ABOUT = {}
-with open('./pyemd/__about__.py') as f:
- exec(f.read(), ABOUT)
+CMDCLASS = {"build_ext": build_ext}
-REQUIRES = [
- 'numpy >=1.9.0, <2.0.0'
-]
+SETUP_REQUIRES = ["setuptools_scm", "packaging"]
setup(
- name=ABOUT['__title__'],
- version=ABOUT['__version__'],
- description=ABOUT['__description__'],
- long_description=README,
- author=ABOUT['__author__'],
- author_email=ABOUT['__author_email__'],
- url=ABOUT['__url__'],
- license=ABOUT['__license__'],
- packages=['pyemd'],
- install_requires=REQUIRES,
- cmdclass=CMDCLASS,
- setup_requires=REQUIRES,
+ name="pyemd",
+ packages=["pyemd", "pyemd.lib"],
+ package_dir={
+ "pyemd": "src/pyemd",
+ "pyemd.lib": "src/pyemd/lib",
+ },
+ include_package_data=True,
ext_modules=EXT_MODULES,
- classifiers=[
- 'Development Status :: 3 - Alpha',
- 'Intended Audience :: Developers',
- 'Natural Language :: English',
- 'License :: OSI Approved :: MIT License',
- 'Programming Language :: Python :: 2.7',
- 'Programming Language :: Python :: 3.4',
- 'Programming Language :: Python :: 3.5',
- 'Programming Language :: Python :: 3.6',
- ],
+ cmdclass=CMDCLASS,
+ setup_requires=SETUP_REQUIRES,
+ use_scm_version=True,
)
diff --git a/pyemd/__init__.py b/src/pyemd/__init__.py
similarity index 77%
rename from pyemd/__init__.py
rename to src/pyemd/__init__.py
index 1957848..713ecb0 100644
--- a/pyemd/__init__.py
+++ b/src/pyemd/__init__.py
@@ -6,10 +6,10 @@
PyEMD
=====
-PyEMD is a Python wrapper for `Ofir Pele and Michael Werman's implementation of
-the Earth Mover's Distance
-`_ that allows it to be
-used with NumPy.
+PyEMD is a Python wrapper for `Ofir Pele and Michael Werman's implementation
+`_ of the `Earth Mover's
+Distance `_ that allows
+it to be used with NumPy.
**If you use this code, please cite the papers listed at the end of the
README.**
@@ -60,10 +60,10 @@
~~~~~~
- All credit for the actual algorithm and implementation goes to `Ofir Pele
- `_ and `Michael Werman
- `_. See the `relevant paper
- `_.
-- Thanks to the Cython devlopers for making this kind of wrapper relatively
+ `_ and `Michael Werman
+ `_. See the `relevant paper
+ `_.
+- Thanks to the Cython developers for making this kind of wrapper relatively
easy to write.
@@ -71,5 +71,9 @@
:license: See the LICENSE file.
"""
-from .__about__ import *
from .emd import emd, emd_with_flow, emd_samples
+
+try:
+ from ._version import version as __version__
+except ImportError:
+ __version__ = "unknown version"
diff --git a/pyemd/emd.pyx b/src/pyemd/emd.pyx
similarity index 94%
rename from pyemd/emd.pyx
rename to src/pyemd/emd.pyx
index 398503a..4cfff41 100644
--- a/pyemd/emd.pyx
+++ b/src/pyemd/emd.pyx
@@ -3,6 +3,8 @@
# distutils: language = c++
# emd.pyx
+from pkg_resources import parse_version
+
from libcpp.pair cimport pair
from libcpp.vector cimport vector
import cython
@@ -139,6 +141,16 @@ def euclidean_pairwise_distance_matrix(x):
return distance_matrix.reshape(len(x), len(x))
+# Use `np.histogram_bin_edges` if available (since NumPy version 1.15.0)
+if parse_version(np.__version__) >= parse_version('1.15.0'):
+ get_bins = np.histogram_bin_edges
+else:
+ def get_bins(a, bins=10, **kwargs):
+ if isinstance(bins, str):
+ hist, bins = np.histogram(a, bins=bins, **kwargs)
+ return bins
+
+
def emd_samples(first_array,
second_array,
extra_mass_penalty=DEFAULT_EXTRA_MASS_PENALTY,
@@ -154,9 +166,9 @@ def emd_samples(first_array,
Pairwise ground distances are taken from the center of the bins.
Arguments:
- first_array (Iterable): A 1D array of samples used to generate a
+ first_array (Iterable): An array of samples used to generate a
histogram.
- second_array (Iterable): A 1D array of samples used to generate a
+ second_array (Iterable): An array of samples used to generate a
histogram.
Keyword Arguments:
@@ -196,14 +208,10 @@ def emd_samples(first_array,
if range is None:
range = (min(np.min(first_array), np.min(second_array)),
max(np.max(first_array), np.max(second_array)))
- # Use automatic binning from `np.histogram()`
- # TODO: Use `np.histogram_bin_edges()` when it's available;
- # see https://github.com/numpy/numpy/issues/10183
- if isinstance(bins, str):
- hist, _ = np.histogram(np.concatenate([first_array, second_array]),
- range=range,
- bins=bins)
- bins = len(hist)
+ # Get bin edges using both arrays
+ bins = get_bins(np.concatenate([first_array, second_array]),
+ range=range,
+ bins=bins)
# Compute histograms
first_histogram, bin_edges = np.histogram(first_array,
range=range,
diff --git a/pyemd/lib/EMD_DEFS.hpp b/src/pyemd/lib/EMD_DEFS.hpp
similarity index 99%
rename from pyemd/lib/EMD_DEFS.hpp
rename to src/pyemd/lib/EMD_DEFS.hpp
index 27ae75f..b118379 100644
--- a/pyemd/lib/EMD_DEFS.hpp
+++ b/src/pyemd/lib/EMD_DEFS.hpp
@@ -7,14 +7,14 @@
typedef int NODE_T;
//------------------------------------------------------------------------------
-#endif
+#endif
// Copyright (c) 2009-2012, Ofir Pele
// All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
-// met:
+// met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
diff --git a/pyemd/lib/emd_hat.hpp b/src/pyemd/lib/emd_hat.hpp
similarity index 100%
rename from pyemd/lib/emd_hat.hpp
rename to src/pyemd/lib/emd_hat.hpp
diff --git a/pyemd/lib/emd_hat_impl.hpp b/src/pyemd/lib/emd_hat_impl.hpp
similarity index 95%
rename from pyemd/lib/emd_hat_impl.hpp
rename to src/pyemd/lib/emd_hat_impl.hpp
index 9e49770..d265cf9 100644
--- a/pyemd/lib/emd_hat_impl.hpp
+++ b/src/pyemd/lib/emd_hat_impl.hpp
@@ -20,8 +20,8 @@ void fillFWithZeros(std::vector< std::vector >& F) {
}
}
}
-
-// Forward declarations
+
+// Forward declarations
template struct emd_hat_impl;
template
@@ -31,12 +31,12 @@ NUM_T emd_hat_gd_metric::operator()(const std::vector& P
std::vector< std::vector >* F) {
if (FLOW_TYPE!=NO_FLOW) fillFWithZeros(*F);
-
+
assert( (F!=NULL) || (FLOW_TYPE==NO_FLOW) );
-
+
std::vector P= Pc;
std::vector Q= Qc;
-
+
// Assuming metric property we can pre-flow 0-cost edges
{for (NODE_T i=0; i::operator()(const std::vector& P
}}
return emd_hat_impl()(Pc,Qc,P,Q,C,extra_mass_penalty,F);
-
+
} // emd_hat_gd_metric
template
@@ -77,7 +77,7 @@ NUM_T emd_hat::operator()(const std::vector& P, const st
// Blocking instantiation for a non-overloaded template param
template
struct emd_hat_impl {
-
+
}; // emd_hat_impl
@@ -125,8 +125,8 @@ struct emd_hat_impl_integral_types {
abs_diff_sum_P_sum_Q= sum_P-sum_Q;
}
//if (needToSwapFlow) cout << "needToSwapFlow" << endl;
-
- // creating the b vector that contains all vertexes
+
+ // creating the b vector that contains all vertices
std::vector b(2*N+2);
const NODE_T THRESHOLD_NODE= 2*N;
const NODE_T ARTIFICIAL_NODE= 2*N+1; // need to be last !
@@ -136,16 +136,16 @@ struct emd_hat_impl_integral_types {
{for (NODE_T i=N; i<2*N; ++i) {
b[i]= (Q[i-N]);
}}
-
+
// remark*) I put here a deficit of the extra mass, as mass that flows to the threshold node
// can be absorbed from all sources with cost zero (this is in reverse order from the paper,
// where incoming edges to the threshold node had the cost of the threshold and outgoing
// edges had the cost of zero)
// This also makes sum of b zero.
- b[THRESHOLD_NODE]= -abs_diff_sum_P_sum_Q;
+ b[THRESHOLD_NODE]= -abs_diff_sum_P_sum_Q;
b[ARTIFICIAL_NODE]= 0;
//-------------------------------------------------------
-
+
//-------------------------------------------------------
NUM_T maxC= 0;
{for (NODE_T i=0; i sources_that_flow_not_only_to_thresh;
- std::set< NODE_T > sinks_that_get_flow_not_only_from_thresh;
+ std::set< NODE_T > sources_that_flow_not_only_to_thresh;
+ std::set< NODE_T > sinks_that_get_flow_not_only_from_thresh;
NUM_T pre_flow_cost= 0;
//=============================================================
-
+
//=============================================================
// regular edges between sinks and sources without threshold edges
std::vector< std::list< edge > > c(b.size());
@@ -193,8 +193,8 @@ struct emd_hat_impl_integral_types {
{for (NODE_T i=N; i<2*N; ++i) {
b[i]= -b[i];
}}
-
-
+
+
// add edges from/to threshold node,
// note that costs are reversed to the paper (see also remark* above)
// It is important that it will be this way because of remark* above.
@@ -203,8 +203,8 @@ struct emd_hat_impl_integral_types {
}}
{for (NODE_T j=0; j(j+N, maxC) );
- }}
-
+ }}
+
// artificial arcs - Note the restriction that only one edge i,j is artificial so I ignore it...
{for (NODE_T i=0; i(ARTIFICIAL_NODE, maxC + 1 ) );
@@ -212,15 +212,15 @@ struct emd_hat_impl_integral_types {
}}
//=============================================================
-
-
-
- //====================================================
+
+
+
+ //====================================================
// remove nodes with supply demand of 0
- // and vertexes that are connected only to the
+ // and vertices that are connected only to the
// threshold vertex
- //====================================================
+ //====================================================
NODE_T current_node_name= 0;
// Note here it should be vector and not vector
// as I'm using -1 as a special flag !!!
@@ -230,7 +230,7 @@ struct emd_hat_impl_integral_types {
nodes_old_names.reserve(b.size());
{for (NODE_T i=0; i=N)
- }
+ }
}
}} //i
nodes_new_names[THRESHOLD_NODE]= current_node_name;
@@ -258,7 +258,7 @@ struct emd_hat_impl_integral_types {
++j;
}
}}
-
+
std::vector< std::list< edge > > cc(bb.size());
{for (NODE_T i=0; i mcf;
-
+
NUM_T my_dist;
-
+
std::vector< std::list< edge0 > > flows(bb.size());
//std::cout << bb.size() << std::endl;
@@ -301,7 +301,7 @@ struct emd_hat_impl_integral_types {
bool reverseEdge= it->_to_to]-N;
+ j= nodes_old_names[it->_to]-N;
} else {
i= nodes_old_names[it->_to];
j= nodes_old_names[new_name_from]-N;
@@ -318,18 +318,18 @@ struct emd_hat_impl_integral_types {
}
}
}
-
+
if (FLOW_TYPE==WITHOUT_EXTRA_MASS_FLOW) transform_flow_to_regular(*F,POrig,QOrig);
-
+
my_dist=
pre_flow_cost + // pre-flowing on cases where it was possible
mcf_dist + // solution of the transportation problem
(abs_diff_sum_P_sum_Q*extra_mass_penalty); // emd-hat extra mass penalty
-
+
return my_dist;
//-------------------------------------------------------
-
+
} // emd_hat_impl_integral_types (main implementation) operator()
};
//=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@@ -342,7 +342,7 @@ template
struct emd_hat_impl {
typedef int NUM_T;
-
+
NUM_T operator()(
const std::vector& POrig, const std::vector& QOrig,
const std::vector& P, const std::vector& Q,
@@ -351,14 +351,14 @@ struct emd_hat_impl {
std::vector< std::vector >* F) {
return emd_hat_impl_integral_types()(POrig,QOrig,P,Q,C,extra_mass_penalty,F);
}
-
+
}; // emd_hat_impl
template
struct emd_hat_impl {
typedef long int NUM_T;
-
+
NUM_T operator()(
const std::vector& POrig, const std::vector& QOrig,
const std::vector& P, const std::vector& Q,
@@ -368,14 +368,14 @@ struct emd_hat_impl {
return emd_hat_impl_integral_types()(POrig,QOrig,P,Q,C,extra_mass_penalty,F);
}
-
+
}; // emd_hat_impl
template
struct emd_hat_impl {
typedef long long int NUM_T;
-
+
NUM_T operator()(
const std::vector& POrig, const std::vector& QOrig,
const std::vector& P, const std::vector& Q,
@@ -384,7 +384,7 @@ struct emd_hat_impl {
std::vector< std::vector >* F) {
return emd_hat_impl_integral_types()(POrig,QOrig,P,Q,C,extra_mass_penalty,F);
}
-
+
}; // emd_hat_impl
//----------------------------------------------------------------------------------------
@@ -397,23 +397,23 @@ struct emd_hat_impl {
typedef double NUM_T;
typedef long long int CONVERT_TO_T;
-
+
NUM_T operator()(
const std::vector& POrig, const std::vector& QOrig,
const std::vector& P, const std::vector& Q,
const std::vector< std::vector >& C,
NUM_T extra_mass_penalty,
std::vector< std::vector >* F) {
-
+
// TODO: static assert
assert(sizeof(CONVERT_TO_T)>=8);
-
+
// This condition should hold:
// ( 2^(sizeof(CONVERT_TO_T*8)) >= ( MULT_FACTOR^2 )
// Note that it can be problematic to check it because
// of overflow problems. I simply checked it with Linux calc
// which has arbitrary precision.
- const double MULT_FACTOR= 1000000;
+ const double MULT_FACTOR= 1000000;
// Constructing the input
const NODE_T N= P.size();
@@ -457,11 +457,11 @@ struct emd_hat_impl {
// unnormalize
dist= dist/PQnormFactor;
dist= dist/CnormFactor;
-
+
// adding extra mass penalty
if (extra_mass_penalty==-1) extra_mass_penalty= maxC;
dist+= (maxSum-minSum)*extra_mass_penalty;
-
+
// converting flow to double
if (FLOW_TYPE!=NO_FLOW) {
for (NODE_T i= 0; i {
}
}
}
-
+
return dist;
}
-
+
}; // emd_hat_impl
//----------------------------------------------------------------------------------------
#endif
@@ -483,7 +483,7 @@ struct emd_hat_impl {
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
-// met:
+// met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
diff --git a/pyemd/lib/emd_hat_signatures_interface.hpp b/src/pyemd/lib/emd_hat_signatures_interface.hpp
similarity index 97%
rename from pyemd/lib/emd_hat_signatures_interface.hpp
rename to src/pyemd/lib/emd_hat_signatures_interface.hpp
index d42b87f..1e08a9c 100644
--- a/pyemd/lib/emd_hat_signatures_interface.hpp
+++ b/src/pyemd/lib/emd_hat_signatures_interface.hpp
@@ -6,15 +6,15 @@
//=============================================================================
// This interface is similar to Rubner's interface. See:
-// http://www.cs.duke.edu/~tomasi/software/emd.htm
+// https://www.cs.duke.edu/~tomasi/software/emd.htm
// With the following changes;
// 1. Weights of signature should be of type NUM_T (see emd_hat.hpp)
-// 2. Return value of the distance function (func) should be of type NUM_T
-// 3. Return value of the emd_hat_signature_interface function is NUM_T
+// 2. Return value of the distance function (func) should be of type NUM_T
+// 3. Return value of the emd_hat_signature_interface function is NUM_T
// 4. The function does not return a flow (I may add this in future, if needed)
// 5. The function also gets the penalty for extra mass - if you want metric property
// should be at least half the diameter of the space (maximum possible distance
-// between any two points). In Rubner's code this is implicitly 0.
+// between any two points). In Rubner's code this is implicitly 0.
// 6. The result is not normalized with the flow.
//
// To get the same results as Rubner's code you should set extra_mass_penalty to 0,
@@ -59,16 +59,16 @@ template
NUM_T emd_hat_signature_interface(signature_tt* Signature1, signature_tt* Signature2,
NUM_T (*func)(feature_tt*, feature_tt*),
NUM_T extra_mass_penalty) {
-
+
std::vector P(Signature1->n + Signature2->n , 0);
- std::vector Q(Signature1->n + Signature2->n , 0);
+ std::vector Q(Signature1->n + Signature2->n , 0);
for (int i=0; in; ++i) {
P[i]= Signature1->Weights[i];
}
for (int j=0; jn; ++j) {
Q[j+Signature1->n]= Signature2->Weights[j];
}
-
+
std::vector< std::vector > C(P.size(), std::vector(P.size(), 0) );
{for (int i=0; in; ++i) {
{for (int j=0; jn; ++j) {
@@ -90,7 +90,7 @@ NUM_T emd_hat_signature_interface(signature_tt* Signature1, signature_tt<
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
-// met:
+// met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
diff --git a/pyemd/lib/flow_utils.hpp b/src/pyemd/lib/flow_utils.hpp
similarity index 99%
rename from pyemd/lib/flow_utils.hpp
rename to src/pyemd/lib/flow_utils.hpp
index b056dd9..d9dba6a 100644
--- a/pyemd/lib/flow_utils.hpp
+++ b/src/pyemd/lib/flow_utils.hpp
@@ -46,7 +46,7 @@ void transform_flow_to_regular(std::vector< std::vector >& F,
return_flow_from_to_transhipment_vertex(F,P,Q,
flow_from_P_to_transhipment,
flow_from_transhipment_to_Q);
-
+
NODE_T i= 0;
NODE_T j= 0;
while( true ) {
@@ -54,7 +54,7 @@ void transform_flow_to_regular(std::vector< std::vector >& F,
while (i >& F,
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
-// met:
+// met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
diff --git a/pyemd/lib/min_cost_flow.hpp b/src/pyemd/lib/min_cost_flow.hpp
similarity index 95%
rename from pyemd/lib/min_cost_flow.hpp
rename to src/pyemd/lib/min_cost_flow.hpp
index 57bc037..39fef40 100644
--- a/pyemd/lib/min_cost_flow.hpp
+++ b/src/pyemd/lib/min_cost_flow.hpp
@@ -84,13 +84,13 @@ class min_cost_flow {
//for (NODE_T i=0; i >::const_iterator it= c[from].begin(); it!=c[from].end(); ++it) {
@@ -98,7 +98,7 @@ class min_cost_flow {
x[it->_to].push_back( edge0 (from, -it->_cost,0) );
}} // it
}} // from
-
+
// reduced costs for forward edges (c[i,j]-pi[i]+pi[j])
// Note that for forward edges the residual capacity is infinity
std::vector< std::list< edge1 > > r_cost_forward(_num_nodes);
@@ -107,7 +107,7 @@ class min_cost_flow {
r_cost_forward[from].push_back( edge1(it->_to,it->_cost) );
}}
}}
-
+
// reduced costs and capacity for backward edges (c[j,i]-pi[j]+pi[i])
// Since the flow at the beginning is 0, the residual capacity is also zero
std::vector< std::list< edge2 > > r_cost_cap_backward(_num_nodes);
@@ -116,7 +116,7 @@ class min_cost_flow {
r_cost_cap_backward[ it->_to ].push_back( edge2(from,-it->_cost,0) );
}} // it
}} // from
-
+
// Max supply TODO:demand?, given U?, optimization-> min out of demand,supply
NUM_T U= 0;
{for (NODE_T i=0; i<_num_nodes; ++i) {
@@ -124,26 +124,26 @@ class min_cost_flow {
}}
NUM_T delta= static_cast(pow(2.0l,ceil(log(static_cast(U))/log(2.0))));
-
+
std::vector< NUM_T > d(_num_nodes);
std::vector< NODE_T > prev(_num_nodes);
delta= 1;
//while (delta>=1) {
-
+
// delta-scaling phase
//cout << "delta==" << delta << endl;
-
+
//tictoc_while_true.tic();
while (true) { //until we break when S or T is empty
-
+
NUM_T maxSupply= 0;
NODE_T k=0;
for (NODE_T i=0; i<_num_nodes; ++i) {
if (e[i]>0) {
if (maxSupply >::iterator itccb= r_cost_cap_backward[from].begin();
while ( (itccb!=r_cost_cap_backward[from].end()) && (itccb->_to!=to) ) {
@@ -174,7 +174,7 @@ class min_cost_flow {
if (itccb!=r_cost_cap_backward[from].end()) {
if (itccb->_residual_capacity_residual_capacity;
}
-
+
to= from;
} while (to!=k);
//---------------------------------------------------------------
@@ -185,14 +185,14 @@ class min_cost_flow {
do {
NODE_T from= prev[to];
assert(from!=to);
-
+
// TODO - might do here O(n) can be done in O(1)
typename std::list< edge0 >::iterator itx= x[from].begin();
while (itx->_to!=to) {
++itx;
}
itx->_flow+= delta;
-
+
// update residual for backward edges
typename std::list< edge2 >::iterator itccb= r_cost_cap_backward[to].begin();
while ( (itccb!=r_cost_cap_backward[to].end()) && (itccb->_to!=from) ) {
@@ -212,21 +212,21 @@ class min_cost_flow {
// update e
e[to]+= delta;
e[from]-= delta;
-
+
to= from;
} while (to!=k);
//---------------------------------------------------------------------------------
-
-
+
+
} // while true (until we break when S or T is empty)
//tictoc_while_true.toc();
//cout << "while true== " << tictoc_while_true.totalTimeSec() << endl;
-
+
//delta= delta/2;
//} // (delta-scaling phase)
-
-
+
+
// compute distance from x
//cout << endl << endl;
NUM_T dist= 0;
@@ -236,8 +236,8 @@ class min_cost_flow {
dist+= (it->_cost*it->_flow);
}} // it
}} // from
-
-
+
+
//tictoc_all_function.toc();
//cout << "operator() time==" << tictoc_all_function.totalTimeSec() << endl;
//cout << "compute_shortest_path_time==" << tictoc_shortest_path.totalTimeSec() << endl;
@@ -260,24 +260,24 @@ class min_cost_flow {
void compute_shortest_path(std::vector< NUM_T >& d,
std::vector< NODE_T >& prev,
-
+
NODE_T from,
std::vector< std::list< edge1 > >& cost_forward,
std::vector< std::list< edge2 > >& cost_backward,
const std::vector& e,
NODE_T& l) {
-
-
+
+
//----------------------------------------------------------------
// Making heap (all inf except 0, so we are saving comparisons...)
//----------------------------------------------------------------
std::vector< edge3 > Q(_num_nodes);
-
+
Q[0]._to= from;
_nodes_to_Q[from]= 0;
- Q[0]._dist= 0;
-
+ Q[0]._dist= 0;
+
NODE_T j=1;
// TODO: both of these into a function?
{for (NODE_T i=0; i finalNodesFlg(_num_nodes, false);
do {
NODE_T u= Q[0]._to;
-
+
d[u]= Q[0]._dist; // final distance
finalNodesFlg[u]= true;
if (e[u]<0) {
l= u;
break;
}
-
+
heap_remove_first(Q, _nodes_to_Q);
-
-
- // neighbors of u
+
+
+ // neighbors of u
{for (typename std::list< edge1 >::const_iterator it= cost_forward[u].begin(); it!=cost_forward[u].end(); ++it) {
assert (it->_reduced_cost>=0);
NUM_T alt= d[u]+it->_reduced_cost;
@@ -339,7 +339,7 @@ class min_cost_flow {
} while (!Q.empty());
-
+
//tmp_tic_toc.tic();
//---------------------------------------------------------------------------------
// reduced costs for forward edges (c[i,j]-pi[i]+pi[j])
@@ -354,7 +354,7 @@ class min_cost_flow {
}
} }
}}
-
+
// reduced costs and capacity for backward edges (c[j,i]-pi[j]+pi[i])
{for (NODE_T from=0; from<_num_nodes; ++from) {
{ for (typename std::list< edge2 >::iterator it= cost_backward[from].begin();
@@ -365,15 +365,15 @@ class min_cost_flow {
if (finalNodesFlg[it->_to]) {
it->_reduced_cost-= d[it->_to] - d[l];
}
-
+
} }// it
}}
//---------------------------------------------------------------------------------
//tmp_tic_toc.toc();
-
+
//----------------------------------------------------------------
-
-
+
+
} // compute_shortest_path
void heap_decrease_key(std::vector< edge3 >& Q, std::vector& nodes_to_Q,
@@ -385,7 +385,7 @@ class min_cost_flow {
i= PARENT(i);
}
} // heap_decrease_key
-
+
void heap_remove_first(std::vector< edge3 >& Q, std::vector& nodes_to_Q) {
swap_heap(Q, nodes_to_Q, 0, Q.size()-1);
Q.pop_back();
@@ -415,9 +415,9 @@ class min_cost_flow {
swap_heap(Q, nodes_to_Q, i, smallest);
i= smallest;
-
+
} while (true);
-
+
} // end heapify
@@ -430,10 +430,10 @@ class min_cost_flow {
nodes_to_Q[ Q[j]._to ]= j;
nodes_to_Q[ Q[i]._to ]= i;
} // swap_heapify
-
+
NODE_T LEFT(NODE_T i) {
return 2*(i+1)-1;
- }
+ }
NODE_T RIGHT(NODE_T i) {
return 2*(i+1); // 2*(i+1)+1-1
@@ -442,7 +442,7 @@ class min_cost_flow {
NODE_T PARENT(NODE_T i) {
return (i-1)/2;
}
-
+
}; // end min_cost_flow
@@ -453,7 +453,7 @@ class min_cost_flow {
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
-// met:
+// met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
diff --git a/test/test_pyemd.py b/test/test_pyemd.py
index 96c7ce2..b3f211b 100644
--- a/test/test_pyemd.py
+++ b/test/test_pyemd.py
@@ -8,6 +8,7 @@
from pyemd import emd, emd_samples, emd_with_flow
+
EMD_PRECISION = 5
FLOW_PRECISION = 4
@@ -30,60 +31,52 @@ def emd_flow_assert(got, expected):
def test_emd_1():
first_signature = np.array([0.0, 1.0])
second_signature = np.array([5.0, 3.0])
- distance_matrix = np.array([[0.0, 0.5],
- [0.5, 0.0]])
- emd_assert(
- emd(first_signature, second_signature, distance_matrix),
- 3.5
- )
+ distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]])
+ emd_assert(emd(first_signature, second_signature, distance_matrix), 3.5)
def test_emd_2():
first_signature = np.array([1.0, 1.0])
second_signature = np.array([1.0, 1.0])
- distance_matrix = np.array([[0.0, 1.0],
- [1.0, 0.0]])
- emd_assert(
- emd(first_signature, second_signature, distance_matrix),
- 0.0
- )
+ distance_matrix = np.array([[0.0, 1.0], [1.0, 0.0]])
+ emd_assert(emd(first_signature, second_signature, distance_matrix), 0.0)
def test_emd_3():
first_signature = np.array([6.0, 1.0])
second_signature = np.array([1.0, 7.0])
- distance_matrix = np.array([[0.0, 0.0],
- [0.0, 0.0]])
- emd_assert(
- emd(first_signature, second_signature, distance_matrix),
- 0.0
- )
+ distance_matrix = np.array([[0.0, 0.0], [0.0, 0.0]])
+ emd_assert(emd(first_signature, second_signature, distance_matrix), 0.0)
def test_emd_4():
first_signature = np.array([1.0, 2.0, 1.0, 2.0])
second_signature = np.array([2.0, 1.0, 2.0, 1.0])
- distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0],
- [1.0, 0.0, 2.0, 1.0],
- [1.0, 2.0, 0.0, 1.0],
- [2.0, 1.0, 1.0, 0.0]])
- emd_assert(
- emd(first_signature, second_signature, distance_matrix),
- 2.0
+ distance_matrix = np.array(
+ [
+ [0.0, 1.0, 1.0, 2.0],
+ [1.0, 0.0, 2.0, 1.0],
+ [1.0, 2.0, 0.0, 1.0],
+ [2.0, 1.0, 1.0, 0.0],
+ ]
)
+ emd_assert(emd(first_signature, second_signature, distance_matrix), 2.0)
def test_emd_extra_mass_penalty():
first_signature = np.array([0.0, 2.0, 1.0, 2.0])
second_signature = np.array([2.0, 1.0, 2.0, 1.0])
- distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0],
- [1.0, 0.0, 2.0, 1.0],
- [1.0, 2.0, 0.0, 1.0],
- [2.0, 1.0, 1.0, 0.0]])
+ distance_matrix = np.array(
+ [
+ [0.0, 1.0, 1.0, 2.0],
+ [1.0, 0.0, 2.0, 1.0],
+ [1.0, 2.0, 0.0, 1.0],
+ [2.0, 1.0, 1.0, 0.0],
+ ]
+ )
emd_assert(
- emd(first_signature, second_signature, distance_matrix,
- extra_mass_penalty=2.5),
- 4.5
+ emd(first_signature, second_signature, distance_matrix, extra_mass_penalty=2.5),
+ 4.5,
)
@@ -93,8 +86,7 @@ def test_emd_extra_mass_penalty():
def test_emd_validate_larger_signatures_1():
first_signature = np.array([0.0, 1.0, 2.0])
second_signature = np.array([5.0, 3.0, 3.0])
- distance_matrix = np.array([[0.0, 0.5],
- [0.5, 0.0]])
+ distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]])
with pytest.raises(ValueError):
emd(first_signature, second_signature, distance_matrix)
@@ -102,8 +94,7 @@ def test_emd_validate_larger_signatures_1():
def test_emd_validate_larger_signatures_2():
first_signature = np.array([0.0, 1.0, 2.0])
second_signature = np.array([5.0, 3.0])
- distance_matrix = np.array([[0.0, 0.5],
- [0.5, 0.0]])
+ distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]])
with pytest.raises(ValueError):
emd_with_flow(first_signature, second_signature, distance_matrix)
@@ -119,9 +110,7 @@ def test_emd_validate_larger_signatures_3():
def test_emd_validate_different_signature_dims():
first_signature = np.array([0.0, 1.0])
second_signature = np.array([5.0, 3.0, 3.0])
- distance_matrix = np.array([[0.0, 0.5, 0.0],
- [0.5, 0.0, 0.0],
- [0.5, 0.0, 0.0]])
+ distance_matrix = np.array([[0.0, 0.5, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0]])
with pytest.raises(ValueError):
emd(first_signature, second_signature, distance_matrix)
@@ -129,8 +118,7 @@ def test_emd_validate_different_signature_dims():
def test_emd_validate_symmetric_distance_matrix():
first_signature = np.array([0.0, 1.0])
second_signature = np.array([5.0, 3.0])
- distance_matrix = np.array([[0.0, 0.5, 3.0],
- [0.5, 0.0]])
+ distance_matrix = np.array([[0.0, 0.5, 3.0], [0.5, 0.0]], dtype=object)
with pytest.raises(ValueError):
emd(first_signature, second_signature, distance_matrix)
@@ -142,93 +130,102 @@ def test_emd_validate_symmetric_distance_matrix():
def test_emd_with_flow_1():
first_signature = np.array([0.0, 1.0])
second_signature = np.array([5.0, 3.0])
- distance_matrix = np.array([[0.0, 0.5],
- [0.5, 0.0]])
+ distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]])
emd_flow_assert(
emd_with_flow(first_signature, second_signature, distance_matrix),
- (3.5, [[0.0, 0.0],
- [0.0, 1.0]])
+ (3.5, [[0.0, 0.0], [0.0, 1.0]]),
)
def test_emd_with_flow_2():
first_signature = np.array([1.0, 1.0])
second_signature = np.array([1.0, 1.0])
- distance_matrix = np.array([[0.0, 1.0],
- [1.0, 0.0]])
+ distance_matrix = np.array([[0.0, 1.0], [1.0, 0.0]])
emd_flow_assert(
emd_with_flow(first_signature, second_signature, distance_matrix),
- (0.0, [[1.0, 0.0],
- [0.0, 1.0]])
+ (0.0, [[1.0, 0.0], [0.0, 1.0]]),
)
def test_emd_with_flow_3():
first_signature = np.array([6.0, 1.0])
second_signature = np.array([1.0, 7.0])
- distance_matrix = np.array([[0.0, 0.0],
- [0.0, 0.0]])
+ distance_matrix = np.array([[0.0, 0.0], [0.0, 0.0]])
emd_flow_assert(
emd_with_flow(first_signature, second_signature, distance_matrix),
- (0.0, [[1.0, 5.0],
- [0.0, 1.0]])
+ (0.0, [[1.0, 5.0], [0.0, 1.0]]),
)
def test_emd_with_flow_4():
first_signature = np.array([1.0, 7.0])
second_signature = np.array([6.0, 1.0])
- distance_matrix = np.array([[0.0, 0.0],
- [0.0, 0.0]])
+ distance_matrix = np.array([[0.0, 0.0], [0.0, 0.0]])
emd_flow_assert(
emd_with_flow(first_signature, second_signature, distance_matrix),
- (0.0, [[1.0, 0.0],
- [5.0, 1.0]])
+ (0.0, [[1.0, 0.0], [5.0, 1.0]]),
)
def test_emd_with_flow_5():
first_signature = np.array([3.0, 5.0])
second_signature = np.array([6.0, 2.0])
- distance_matrix = np.array([[0.0, 0.0],
- [0.0, 0.0]])
+ distance_matrix = np.array([[0.0, 0.0], [0.0, 0.0]])
emd_flow_assert(
emd_with_flow(first_signature, second_signature, distance_matrix),
- (0.0, [[3.0, 0.0],
- [3.0, 2.0]])
+ (0.0, [[3.0, 0.0], [3.0, 2.0]]),
)
def test_emd_with_flow_6():
first_signature = np.array([1.0, 2.0, 1.0, 2.0])
second_signature = np.array([2.0, 1.0, 2.0, 1.0])
- distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0],
- [1.0, 0.0, 2.0, 1.0],
- [1.0, 2.0, 0.0, 1.0],
- [2.0, 1.0, 1.0, 0.0]])
+ distance_matrix = np.array(
+ [
+ [0.0, 1.0, 1.0, 2.0],
+ [1.0, 0.0, 2.0, 1.0],
+ [1.0, 2.0, 0.0, 1.0],
+ [2.0, 1.0, 1.0, 0.0],
+ ]
+ )
emd_flow_assert(
emd_with_flow(first_signature, second_signature, distance_matrix),
- (2.0, [[1.0, 0.0, 0.0, 0.0],
- [1.0, 1.0, 0.0, 0.0],
- [0.0, 0.0, 1.0, 0.0],
- [0.0, 0.0, 1.0, 1.0]])
+ (
+ 2.0,
+ [
+ [1.0, 0.0, 0.0, 0.0],
+ [1.0, 1.0, 0.0, 0.0],
+ [0.0, 0.0, 1.0, 0.0],
+ [0.0, 0.0, 1.0, 1.0],
+ ],
+ ),
)
def test_emd_with_flow_extra_mass_penalty():
first_signature = np.array([0.0, 2.0, 1.0, 2.0])
second_signature = np.array([2.0, 1.0, 2.0, 1.0])
- distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0],
- [1.0, 0.0, 2.0, 1.0],
- [1.0, 2.0, 0.0, 1.0],
- [2.0, 1.0, 1.0, 0.0]])
+ distance_matrix = np.array(
+ [
+ [0.0, 1.0, 1.0, 2.0],
+ [1.0, 0.0, 2.0, 1.0],
+ [1.0, 2.0, 0.0, 1.0],
+ [2.0, 1.0, 1.0, 0.0],
+ ]
+ )
emd_flow_assert(
- emd_with_flow(first_signature, second_signature, distance_matrix,
- extra_mass_penalty=2.5),
- (4.5, [[0.0, 0.0, 0.0, 0.0],
- [1.0, 1.0, 0.0, 0.0],
- [0.0, 0.0, 1.0, 0.0],
- [0.0, 0.0, 1.0, 1.0]])
+ emd_with_flow(
+ first_signature, second_signature, distance_matrix, extra_mass_penalty=2.5
+ ),
+ (
+ 4.5,
+ [
+ [0.0, 0.0, 0.0, 0.0],
+ [1.0, 1.0, 0.0, 0.0],
+ [0.0, 0.0, 1.0, 0.0],
+ [0.0, 0.0, 1.0, 1.0],
+ ],
+ ),
)
@@ -238,8 +235,7 @@ def test_emd_with_flow_extra_mass_penalty():
def test_emd_with_flow_validate_larger_signatures_1():
first_signature = np.array([0.0, 1.0, 2.0])
second_signature = np.array([5.0, 3.0, 3.0])
- distance_matrix = np.array([[0.0, 0.5],
- [0.5, 0.0]])
+ distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]])
with pytest.raises(ValueError):
emd_with_flow(first_signature, second_signature, distance_matrix)
@@ -247,8 +243,7 @@ def test_emd_with_flow_validate_larger_signatures_1():
def test_emd_with_flow_validate_larger_signatures_2():
first_signature = np.array([0.0, 1.0, 2.0])
second_signature = np.array([5.0, 3.0])
- distance_matrix = np.array([[0.0, 0.5],
- [0.5, 0.0]])
+ distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]])
with pytest.raises(ValueError):
emd(first_signature, second_signature, distance_matrix)
@@ -256,8 +251,7 @@ def test_emd_with_flow_validate_larger_signatures_2():
def test_emd_with_flow_validate_larger_signatures_3():
first_signature = np.array([0.0, 1.0])
second_signature = np.array([5.0, 3.0, 3.0])
- distance_matrix = np.array([[0.0, 0.5],
- [0.5, 0.0]])
+ distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]])
with pytest.raises(ValueError):
emd_with_flow(first_signature, second_signature, distance_matrix)
@@ -265,9 +259,7 @@ def test_emd_with_flow_validate_larger_signatures_3():
def test_emd_with_flow_validate_different_signature_dims():
first_signature = np.array([0.0, 1.0])
second_signature = np.array([5.0, 3.0, 3.0])
- distance_matrix = np.array([[0.0, 0.5, 0.0],
- [0.5, 0.0, 0.0],
- [0.5, 0.0, 0.0]])
+ distance_matrix = np.array([[0.0, 0.5, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0]])
with pytest.raises(ValueError):
emd_with_flow(first_signature, second_signature, distance_matrix)
@@ -275,8 +267,7 @@ def test_emd_with_flow_validate_different_signature_dims():
def test_emd_with_flow_validate_square_distance_matrix():
first_signature = np.array([0.0, 1.0])
second_signature = np.array([5.0, 3.0])
- distance_matrix = np.array([[0.0, 0.5, 3.0],
- [0.5, 0.0]])
+ distance_matrix = np.array([[0.0, 0.5, 3.0], [0.5, 0.0]], dtype=object)
with pytest.raises(ValueError):
emd_with_flow(first_signature, second_signature, distance_matrix)
@@ -310,7 +301,9 @@ def test_emd_samples_1_not_normalized():
def test_emd_samples_1_custom_distance():
- dist = lambda x: np.array([[0.0 if i == j else 1.0 for i in x] for j in x])
+ def dist(x):
+ return np.array([[0.0 if i == j else 1.0 for i in x] for j in x])
+
first_array = [1, 2, 3, 4]
second_array = [2, 3, 4, 5]
emd_assert(emd_samples(first_array, second_array, distance=dist), 0.25)
@@ -318,18 +311,21 @@ def test_emd_samples_1_custom_distance():
def test_emd_samples_all_kwargs():
# Regression only; not checked by hand
- dist = lambda x: [
- [(i - j)**3 for i in range(len(x))] for j in range(len(x))
- ]
+ def dist(x):
+ return [[(i - j) ** 3 for i in range(len(x))] for j in range(len(x))]
+
first_array = [1, 2, 3, 4, 5]
second_array = [2, 3, 4, 5]
emd_assert(
- emd_samples(first_array, second_array,
- bins=30,
- normalized=False,
- range=(-5, 15),
- distance=dist),
- 24389.0
+ emd_samples(
+ first_array,
+ second_array,
+ bins=30,
+ normalized=False,
+ range=(-5, 15),
+ distance=dist,
+ ),
+ 24389.0,
)
@@ -368,7 +364,9 @@ def test_emd_samples_validate_empty():
def test_emd_samples_validate_distance_matrix_square():
- dist = lambda x: [[1, 2, 3]]
+ def dist(x):
+ return [[1, 2, 3]]
+
first_array = [1, 2, 3]
second_array = [1, 2, 3]
with pytest.raises(ValueError):
@@ -376,8 +374,9 @@ def test_emd_samples_validate_distance_matrix_square():
def test_emd_samples_validate_distance_matrix_size():
- dist = lambda x: [[0, 1],
- [1, 0]]
+ def dist(x):
+ return [[0, 1], [1, 0]]
+
first_array = [1, 2, 3, 4]
second_array = [1, 2, 3, 4]
with pytest.raises(ValueError):
diff --git a/test_requirements.txt b/test_requirements.txt
deleted file mode 100644
index 5bbae8f..0000000
--- a/test_requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-pytest
-tox
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index b035c93..0000000
--- a/tox.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-[tox]
-envlist = py{27,34,35,36}
-
-[testenv]
-deps = -r{toxinidir}/test_requirements.txt
-commands = make test
-whitelist_externals = make