Merge pull request #340 from claritychallenge/339-icassp-2024-cadenza…

…-baseline Baseline for ICASSP 2024 Cadenza Challenge
claritychallenge · Sep 15, 2023 · b005f41 · b005f41
2 parents 1663f32 + 92b444f
commit b005f41
Show file tree

Hide file tree

Showing 37 changed files with 2,778 additions and 25 deletions.
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@
 [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/claritychallenge/clarity/main.svg)](https://results.pre-commit.ci/latest/github/claritychallenge/clarity/main)
 [![Downloads](https://pepy.tech/badge/pyclarity)](https://pepy.tech/project/pyclarity)
 
-[![PyPI](https://img.shields.io/static/v1?label=CAD1%20and%20CPC2%20Challenges%20-%20pypi&message=v0.3.3&color=orange)](https://pypi.org/project/pyclarity/0.3.3/)
+[![PyPI](https://img.shields.io/static/v1?label=CAD1%20and%20CPC2%20Challenges%20-%20pypi&message=v0.4.0&color=orange)](https://pypi.org/project/pyclarity/0.4.0/)
 [![PyPI](https://img.shields.io/static/v1?label=ICASSP%202023%20Challenge%20-%20pypi&message=v0.2.1&color=orange)](https://pypi.org/project/pyclarity/0.2.1/)
 [![PyPI](https://img.shields.io/static/v1?label=CEC2%20Challenge%20-%20pypi&message=v0.1.1&color=orange)](https://pypi.org/project/pyclarity/0.1.1/)
 [![ORDA](https://img.shields.io/badge/ORDA--DOI-10.15131%2Fshef.data.23230694.v.1-lightgrey)](https://figshare.shef.ac.uk/articles/software/clarity/23230694/1)
@@ -34,10 +34,11 @@ In this repository, you will find code to support all Clarity and Cadenza Challe
 
 ## Current Events
 
-- The first Cadenza Challenge (CAD1) is now open. :fire::fire:
+- The ICASSP 2024 Cadenza Challenge (CAD_ICASSP_2024) is now open. :fire::fire:
   - Join the [Cadenza Challenge Group](https://groups.google.com/g/cadenza-challenge) to keep up-to-date on developments.
   - Visit the Cadenenza Challenge [website](https://cadenzachallenge.org/) for more details.
-  - Evaluation tools and a baseline system are available in the [CAD1 recipes](./recipes/cad1) folder. :new:
+- The first Cadenza Challenge (CAD1) is closed.
+  - Subjective Evaluation is underway. :new:
 - The 2nd Clarity Prediction Challenge (CPC2) is now open.   :fire::fire:
   - Join the [Clarity Challenge Group](https://groups.google.com/g/clarity-challenge) to keep up-to-date on developments.
   - Visit the Clarity Challenge [website](https://claritychallenge.org/) for more details.
@@ -86,11 +87,12 @@ pip install -e git+https://github.com/claritychallenge/clarity.git@main
 
 Current challenge
 
-- [The 1st Cadenza Challenge (CAD1)](./recipes/cad1)
-- [The 2nd Clarity Prediction Challenge (CPC2)](./recipes/cpc2)
+- [The ICASSP 2024 Cadenza CHallenge](./recipes/cad_icassp_2024)
 
 Previous challenges
 
+- [The 1st Cadenza Challenge (CAD1)](./recipes/cad1)
+- [The 2nd Clarity Prediction Challenge (CPC2)](./recipes/cpc2)
 - [The ICASSP 2023 Enhancement Challenge](./recipes/icassp_2023)
 - [The 2nd Clarity Enhancement Challenge (CEC2)](./recipes/cec2)
 - [The 1st Clarity Prediction Challenge (CPC1)](./recipes/cpc1)

diff --git a/clarity/evaluator/haaqi/haaqi.py b/clarity/evaluator/haaqi/haaqi.py
@@ -178,8 +178,9 @@ def haaqi_v1(
 def compute_haaqi(
     processed_signal: ndarray,
     reference_signal: ndarray,
+    processed_sample_rate: float,
+    reference_sample_rate: float,
     audiogram: Audiogram,
-    sample_rate: float,
     equalisation: int = 1,
     level1: float = 65.0,
 ) -> float:
@@ -191,8 +192,9 @@ def compute_haaqi(
         reference_signal (np.ndarray): Input reference speech signal with no noise
             or distortion. If a hearing loss is specified, NAL-R equalization
             is optional
+        processed_sample_rate (float): Sampling rate in Hz for processed signal.
+        reference_sample_rate (float): Sampling rate in Hz for reference signal.
         audiogram (Audiogram): Audiogram object.
-        sample_rate (int): Sample rate in Hz.
         equalisation (int): hearing loss equalization mode for reference signal:
             1 = no EQ has been provided, the function will add NAL-R
             2 = NAL-R EQ has already been added to the reference signal
@@ -211,9 +213,9 @@ def compute_haaqi(
 
     score, _, _, _ = haaqi_v1(
         reference=reference_signal,
-        reference_freq=sample_rate,
+        reference_freq=reference_sample_rate,
         processed=processed_signal,
-        processed_freq=sample_rate,
+        processed_freq=processed_sample_rate,
         audiogram=audiogram,
         equalisation=equalisation,
         level1=level1,

diff --git a/clarity/utils/flac_encoder.py b/clarity/utils/flac_encoder.py
@@ -0,0 +1,262 @@
+"""
+Class for encoding and decoding audio signals
+    using flac compression.
+"""
+from __future__ import annotations
+
+import logging
+import tempfile
+
+# pylint: disable=import-error, protected-access
+from pathlib import Path
+
+import numpy as np
+import pyflac as pf
+import soundfile as sf
+
+logger = logging.getLogger(__name__)
+
+
+class WavEncoder(pf.encoder._Encoder):
+    """
+    Class offers an adaptation of the pyflac.encoder.FileEncoder
+    to work directly with WAV signals as input.
+
+    """
+
+    def __init__(
+        self,
+        signal: np.ndarray,
+        sample_rate: int,
+        output_file: str | Path | None = None,
+        compression_level: int = 5,
+        blocksize: int = 0,
+        streamable_subset: bool = True,
+        verify: bool = False,
+    ) -> None:
+        """
+        Initialise the encoder.
+
+        Args:
+            signal (np.ndarray): The raw audio data to be encoded.
+            sample_rate (int): The sample rate of the audio data.
+            output_file (str | Path | None): Path to the output FLAC file,
+                a temporary file will be created if unspecified.
+            compression_level (int): The compression level parameter that
+                varies from 0 (fastest) to 8 (slowest). The default setting
+                is 5, see https://en.wikipedia.org/wiki/FLAC for more details.
+            blocksize (int): The size of the block to be returned in the
+                callback. The default is 0 which allows libFLAC to determine
+                the best block size.
+            streamable_subset (bool): Whether to use the streamable subset for encoding.
+                If true the encoder will check settings for compatibility. If false, the
+                settings may take advantage of the full range that the format allows.
+            verify (bool): If `True`, the encoder will verify it's own
+                encoded output by feeding it through an internal decoder and
+                comparing the original signal against the decoded signal.
+                If a mismatch occurs, the `process` method will raise a
+                `EncoderProcessException`.  Note that this will slow the
+                encoding process by the extra time required for decoding and comparison.
+        """
+        super().__init__()
+
+        self.__raw_audio = signal
+        self._sample_rate = sample_rate
+
+        if output_file:
+            self.__output_file = (
+                Path(output_file) if isinstance(output_file, str) else output_file
+            )
+        else:
+            with tempfile.NamedTemporaryFile(suffix=".flac") as ofile:
+                self.__output_file = Path(ofile.name)
+
+        self._blocksize = blocksize
+        self._compression_level = compression_level
+        self._streamable_subset = streamable_subset
+        self._verify = verify
+        self._initialised = False
+
+    def _init(self):
+        """
+        Initialise the encoder to write to a file.
+
+        Raises:
+            EncoderInitException: if initialisation fails.
+        """
+        c_output_filename = pf.encoder._ffi.new(
+            "char[]", str(self.__output_file).encode("utf-8")
+        )
+        rc = pf.encoder._lib.FLAC__stream_encoder_init_file(
+            self._encoder,
+            c_output_filename,
+            pf.encoder._lib._progress_callback,
+            self._encoder_handle,
+        )
+        pf.encoder._ffi.release(c_output_filename)
+        if rc != pf.encoder._lib.FLAC__STREAM_ENCODER_INIT_STATUS_OK:
+            raise pf.EncoderInitException(rc)
+
+        self._initialised = True
+
+    def process(self) -> bytes:
+        """
+        Process the audio data from the WAV file.
+
+        Returns:
+            (bytes): The FLAC encoded bytes.
+
+        Raises:
+            EncoderProcessException: if an error occurs when processing the samples
+        """
+        super().process(self.__raw_audio)
+        self.finish()
+        with open(self.__output_file, "rb") as f:
+            return f.read()
+
+
+class FileDecoder(pf.decoder.FileDecoder):
+    def process(self) -> tuple[np.ndarray, int]:
+        """
+        Overwritten version of the process method from the pyflac decoder.
+        Original process returns stereo signals in float64 format.
+
+        In this version, the data is returned using the original number
+        of channels and in in16 format.
+
+        Returns:
+            (tuple): A tuple of the decoded numpy audio array, and the sample rate
+                of the audio data.
+
+        Raises:
+            DecoderProcessException: if any fatal read, write, or memory allocation
+                error occurred (meaning decoding must stop)
+        """
+        result = pf.decoder._lib.FLAC__stream_decoder_process_until_end_of_stream(
+            self._decoder
+        )
+        if self.state != pf.decoder.DecoderState.END_OF_STREAM and not result:
+            raise pf.DecoderProcessException(str(self.state))
+
+        self.finish()
+        self.__output.close()
+        return sf.read(str(self.__output_file), always_2d=False, dtype="int16")
+
+
+class FlacEncoder:
+    """
+    Class for encoding and decoding audio signals using FLAC
+
+    It uses the pyflac library to encode and decode the audio data.
+    And offers convenient methods for encoding and decoding audio data.
+    """
+
+    def __init__(self, compression_level: int = 5) -> None:
+        """
+        Initialise the compressor.
+
+        Args:
+            compression_level (int): The compression level parameter that
+                varies from 0 (fastest) to 8 (slowest). The default setting
+                is 5, see https://en.wikipedia.org/wiki/FLAC for more details.
+        """
+        self.compression_level = compression_level
+
+    def encode(
+        self,
+        signal: np.ndarray,
+        sample_rate: int,
+        output_file: str | Path | None = None,
+    ) -> bytes:
+        """
+        Method to encode the audio data using FLAC compressor.
+
+        It creates a WavEncoder object and uses it to encode the audio data.
+
+        Args:
+            signal (np.ndarray): The raw audio data to be compressed.
+            sample_rate (int): The sample rate of the audio data.
+            output_file (str | Path): Path to where to
+                save the output FLAC file. If not specified, a temporary file
+                will be created.
+
+        Returns:
+            (bytes): The FLAC encoded audio signal.
+
+        Raises:
+            ValueError: If the audio signal is not in `np.int16` format.
+        """
+        if signal.dtype != np.int16:
+            logger.error(
+                f"FLAC encoder only supports 16-bit integer signals, "
+                f"but got {signal.dtype}"
+            )
+            raise ValueError(
+                f"FLAC encoder only supports 16-bit integer signals, "
+                f"but got {signal.dtype}"
+            )
+
+        wav_encoder = WavEncoder(
+            signal=signal,
+            sample_rate=sample_rate,
+            compression_level=self.compression_level,
+            output_file=output_file,
+        )
+        return wav_encoder.process()
+
+    @staticmethod
+    def decode(input_filename: Path | str) -> tuple[np.ndarray, float]:
+        """
+        Method to decode a flac file to wav audio data.
+
+        It uses the pyflac library to decode the flac file.
+
+        Args:
+            input_filename (pathlib.Path | str): Path to the input FLAC file.
+
+        Returns:
+            (np.ndarray): The raw audio data.
+
+        Raises:
+            FileNotFoundError: If the flac file to decode does not exist.
+        """
+        input_filename = (
+            Path(input_filename) if isinstance(input_filename, str) else input_filename
+        )
+
+        if not input_filename.exists():
+            logger.error(f"File {input_filename} not found.")
+            raise FileNotFoundError(f"File {input_filename} not found.")
+
+        decoder = FileDecoder(input_filename)
+        signal, sample_rate = decoder.process()
+
+        return signal, float(sample_rate)
+
+
+def read_flac_signal(filename: Path) -> tuple[np.ndarray, float]:
+    """Read a FLAC signal and return it as a numpy array
+
+    Args:
+        filename (Path): The path to the FLAC file to read.
+
+    Returns:
+        signal (np.ndarray): The decoded signal.
+        sample_rate (float): The sample rate of the signal.
+    """
+    # Create encoder object
+    flac_encoder = FlacEncoder()
+
+    # Decode FLAC file
+    signal, sample_rate = flac_encoder.decode(
+        filename,
+    )
+    signal = (signal / 32768.0).astype(np.float32)
+
+    # Load scale factor
+    if filename.with_suffix(".txt").exists():
+        with open(filename.with_suffix(".txt"), encoding="utf-8") as fp:
+            max_value = float(fp.read())
+            # Scale signal
+            signal *= max_value
+    return signal, sample_rate
diff --git a/clarity/utils/results_support.py b/clarity/utils/results_support.py
@@ -0,0 +1,63 @@
+"""Dataclass to save challenges results to a CSV file."""
+from __future__ import annotations
+
+# pylint: disable=import-error
+import csv
+from dataclasses import dataclass
+from pathlib import Path
+
+
+@dataclass
+class ResultsFile:
+    """A utility class for writing results to a CSV file.
+
+    Attributes:
+        file_name (str | Path): The name of the file to write results to.
+        header_columns (list[str]): The columns to write to the CSV file.
+        append_results (bool): Whether to append results to an existing file.
+            If False, a new file will be created and the header row will be written.
+            Defaults to False.
+    """
+
+    file_name: str | Path
+    header_columns: list[str]
+    append_results: bool = False
+
+    def __post_init__(self):
+        """Write the header row to the CSV file."""
+        if isinstance(self.file_name, str):
+            self.file_name = Path(self.file_name)
+
+        if self.append_results:
+            if not Path(self.file_name).exists():
+                raise FileNotFoundError(
+                    "Cannot append results to non-existent file "
+                    f"{self.file_name.as_posix()}"
+                    " - please set append_results=False"
+                )
+        else:
+            with open(self.file_name, "w", encoding="utf-8", newline="") as csv_file:
+                csv_writer = csv.writer(
+                    csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
+                )
+                csv_writer.writerow(self.header_columns)
+
+    def add_result(
+        self,
+        row_values: dict[str, str | float],
+    ):
+        """Add a result to the CSV file.
+
+        Args:
+            row_values (dict[str, str | float]): The values to write to the CSV file.
+        """
+
+        with open(self.file_name, "a", encoding="utf-8", newline="") as csv_file:
+            csv_writer = csv.writer(
+                csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
+            )
+            row = []
+            for column in self.header_columns:
+                row.append(row_values[column])
+
+            csv_writer.writerow(row)