Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
rossarmstrong committed Nov 19, 2023
1 parent e5df5ed commit 484d22a
Show file tree
Hide file tree
Showing 13 changed files with 97 additions and 129 deletions.
20 changes: 20 additions & 0 deletions meson.build
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
project(
'werpy_test_v4',
'c',
version : '0.0.1',
default_options : ['warning_level=3'])

py = import('python').find_installation(pure: false)

cython = find_program('cython')
pyx_files = files('werpy_test_v4/metrics.pyx')
c_files = custom_target('cythonize',
output : 'metrics.c',
input : pyx_files,
command : [cython, '-3', '--fast-fail', '-o', '@OUTPUT@', '@INPUT@'])

py.extension_module(
'werpy_test_v4',
c_files,
install : true
)
21 changes: 10 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
build-backend = 'mesonpy'
requires = ['meson-python', 'wheel', 'Cython']
#requires = ['meson-python', 'wheel', 'Cython', 'numpy']

[project]
name = "werpy"
version = "1.1.2"
name = 'werpy_test_v4'
version = '0.0.1'
description = 'A powerful yet lightweight Python package to calculate and analyze the Word Error Rate (WER).'
readme = 'README.md'
requires-python = '>=3.8'
license = {file = 'LICENSE'}
authors = [
{ name="Ross Armstrong", email="ross.armstrong@analyticsinmotion.com" },
{name = 'Ross Armstrong', email = 'ross.armstrong@analyticsinmotion.com'},
]
description = "A powerful yet lightweight Python package to calculate and analyze the Word Error Rate (WER)."
readme = "README.md"
license = "BSD-3-Clause"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: BSD License",
Expand All @@ -25,7 +26,6 @@ classifiers = [
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Mathematics"
]

keywords = [
"wer",
"word error rate",
Expand All @@ -37,7 +37,6 @@ keywords = [
"nlp",
"metrics"
]

dependencies = [
"numpy>=1.21.6; python_version<'3.11'",
"numpy>=1.23.2; python_version>='3.11'",
Expand Down
118 changes: 0 additions & 118 deletions werpy/metrics.py

This file was deleted.

File renamed without changes.
File renamed without changes.
67 changes: 67 additions & 0 deletions werpy_test_v4/metrics.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# metrics.pyx
import numpy as np
cimport numpy as np

# Add cimport cython here
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef np.ndarray calculations(object reference, object hypothesis):
cdef list reference_word = reference.split()
cdef list hypothesis_word = hypothesis.split()

cdef Py_ssize_t m, n, i, j, substitution_cost, ld, insertions, deletions, substitutions
cdef list inserted_words, deleted_words, substituted_words
m, n = len(reference_word), len(hypothesis_word)
ldm = [[0] * (n + 1) for _ in range(m + 1)]

for i in range(m + 1):
for j in range(n + 1):
if i == 0:
ldm[i][j] = j
elif j == 0:
ldm[i][j] = i
else:
substitution_cost = 0 if reference_word[i - 1] == hypothesis_word[j - 1] else 1
ldm[i][j] = min(
ldm[i - 1][j] + 1, # Deletion
ldm[i][j - 1] + 1, # Insertion
ldm[i - 1][j - 1] + substitution_cost # Substitution
)

ld = ldm[m][n]
wer = ld / m

insertions, deletions, substitutions = 0, 0, 0
inserted_words, deleted_words, substituted_words = [], [], []
i, j = m, n
while i > 0 or j > 0:
if i > 0 and j > 0 and reference_word[i - 1] == hypothesis_word[j - 1]:
i -= 1
j -= 1
else:
if i > 0 and j > 0 and ldm[i][j] == ldm[i - 1][j - 1] + 1:
substitutions += 1
substituted_words.append((reference_word[i - 1], hypothesis_word[j - 1]))
i -= 1
j -= 1
elif j > 0 and ldm[i][j] == ldm[i][j - 1] + 1:
insertions += 1
inserted_words.append(hypothesis_word[j - 1])
j -= 1
elif i > 0 and ldm[i][j] == ldm[i - 1][j] + 1:
deletions += 1
deleted_words.append(reference_word[i - 1])
i -= 1

inserted_words.reverse(), deleted_words.reverse(), substituted_words.reverse()

return np.array(
[wer, ld, m, insertions, deletions, substitutions, inserted_words, deleted_words, substituted_words],
dtype=object)

def metrics(reference, hypothesis):
vectorize_calculations = np.vectorize(calculations)
result = vectorize_calculations(reference, hypothesis)
return result
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit 484d22a

Please sign in to comment.