Initial commit

analyticsinmotion · Nov 19, 2023 · 484d22a · 484d22a
1 parent e5df5ed
commit 484d22a
Show file tree

Hide file tree

Showing 13 changed files with 97 additions and 129 deletions.
diff --git a/meson.build b/meson.build
@@ -0,0 +1,20 @@
+project(
+    'werpy_test_v4', 
+    'c',
+    version : '0.0.1',
+    default_options : ['warning_level=3'])
+
+py = import('python').find_installation(pure: false)
+
+cython = find_program('cython')
+pyx_files = files('werpy_test_v4/metrics.pyx')
+c_files = custom_target('cythonize',
+                        output : 'metrics.c',
+                        input : pyx_files,
+                        command : [cython, '-3', '--fast-fail', '-o', '@OUTPUT@', '@INPUT@'])
+
+py.extension_module(
+    'werpy_test_v4',
+    c_files,
+    install : true
+)
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,17 +1,18 @@
 [build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
+build-backend = 'mesonpy'
+requires = ['meson-python', 'wheel', 'Cython']
+#requires = ['meson-python', 'wheel', 'Cython', 'numpy']
 
 [project]
-name = "werpy"
-version = "1.1.2"
+name = 'werpy_test_v4'
+version = '0.0.1'
+description = 'A powerful yet lightweight Python package to calculate and analyze the Word Error Rate (WER).'
+readme = 'README.md'
+requires-python = '>=3.8'
+license = {file = 'LICENSE'}
 authors = [
-  { name="Ross Armstrong", email="ross.armstrong@analyticsinmotion.com" },
+  {name = 'Ross Armstrong', email = 'ross.armstrong@analyticsinmotion.com'},
 ]
-description = "A powerful yet lightweight Python package to calculate and analyze the Word Error Rate (WER)."
-readme = "README.md"
-license = "BSD-3-Clause"
-requires-python = ">=3.8"
 classifiers = [
     "Programming Language :: Python :: 3",
     "License :: OSI Approved :: BSD License",
@@ -25,7 +26,6 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "Topic :: Scientific/Engineering :: Mathematics"
 ]
-
 keywords = [
     "wer",
     "word error rate",
@@ -37,7 +37,6 @@ keywords = [
     "nlp",
     "metrics"
 ]
-
 dependencies = [
   "numpy>=1.21.6; python_version<'3.11'",
   "numpy>=1.23.2; python_version>='3.11'",

diff --git a/werpy/metrics.py b/werpy/metrics.py
diff --git a/werpy/__init__.py → werpy_test_v4/__init__.py b/werpy/__init__.py → werpy_test_v4/__init__.py
diff --git a/werpy/errorhandler.py → werpy_test_v4/errorhandler.py b/werpy/errorhandler.py → werpy_test_v4/errorhandler.py
diff --git a/werpy_test_v4/metrics.pyx b/werpy_test_v4/metrics.pyx
@@ -0,0 +1,67 @@
+# metrics.pyx
+import numpy as np
+cimport numpy as np
+
+# Add cimport cython here
+cimport cython
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef np.ndarray calculations(object reference, object hypothesis):
+    cdef list reference_word = reference.split()
+    cdef list hypothesis_word = hypothesis.split()
+
+    cdef Py_ssize_t m, n, i, j, substitution_cost, ld, insertions, deletions, substitutions
+    cdef list inserted_words, deleted_words, substituted_words
+    m, n = len(reference_word), len(hypothesis_word)
+    ldm = [[0] * (n + 1) for _ in range(m + 1)]
+
+    for i in range(m + 1):
+        for j in range(n + 1):
+            if i == 0:
+                ldm[i][j] = j
+            elif j == 0:
+                ldm[i][j] = i
+            else:
+                substitution_cost = 0 if reference_word[i - 1] == hypothesis_word[j - 1] else 1
+                ldm[i][j] = min(
+                    ldm[i - 1][j] + 1,  # Deletion
+                    ldm[i][j - 1] + 1,  # Insertion
+                    ldm[i - 1][j - 1] + substitution_cost  # Substitution
+                )
+
+    ld = ldm[m][n]
+    wer = ld / m
+
+    insertions, deletions, substitutions = 0, 0, 0
+    inserted_words, deleted_words, substituted_words = [], [], []
+    i, j = m, n
+    while i > 0 or j > 0:
+        if i > 0 and j > 0 and reference_word[i - 1] == hypothesis_word[j - 1]:
+            i -= 1
+            j -= 1
+        else:
+            if i > 0 and j > 0 and ldm[i][j] == ldm[i - 1][j - 1] + 1:
+                substitutions += 1
+                substituted_words.append((reference_word[i - 1], hypothesis_word[j - 1]))
+                i -= 1
+                j -= 1
+            elif j > 0 and ldm[i][j] == ldm[i][j - 1] + 1:
+                insertions += 1
+                inserted_words.append(hypothesis_word[j - 1])
+                j -= 1
+            elif i > 0 and ldm[i][j] == ldm[i - 1][j] + 1:
+                deletions += 1
+                deleted_words.append(reference_word[i - 1])
+                i -= 1
+
+    inserted_words.reverse(), deleted_words.reverse(), substituted_words.reverse()
+
+    return np.array(
+        [wer, ld, m, insertions, deletions, substitutions, inserted_words, deleted_words, substituted_words],
+        dtype=object)
+
+def metrics(reference, hypothesis):
+    vectorize_calculations = np.vectorize(calculations)
+    result = vectorize_calculations(reference, hypothesis)
+    return result
diff --git a/werpy/normalize.py → werpy_test_v4/normalize.py b/werpy/normalize.py → werpy_test_v4/normalize.py
diff --git a/werpy/summary.py → werpy_test_v4/summary.py b/werpy/summary.py → werpy_test_v4/summary.py
diff --git a/werpy/summaryp.py → werpy_test_v4/summaryp.py b/werpy/summaryp.py → werpy_test_v4/summaryp.py
diff --git a/werpy/wer.py → werpy_test_v4/wer.py b/werpy/wer.py → werpy_test_v4/wer.py
diff --git a/werpy/werp.py → werpy_test_v4/werp.py b/werpy/werp.py → werpy_test_v4/werp.py
diff --git a/werpy/werps.py → werpy_test_v4/werps.py b/werpy/werps.py → werpy_test_v4/werps.py
diff --git a/werpy/wers.py → werpy_test_v4/wers.py b/werpy/wers.py → werpy_test_v4/wers.py