Skip to content

Commit

Permalink
Merge pull request #340 from claritychallenge/339-icassp-2024-cadenza…
Browse files Browse the repository at this point in the history
…-baseline

Baseline for ICASSP 2024 Cadenza Challenge
  • Loading branch information
groadabike authored Sep 15, 2023
2 parents 1663f32 + 92b444f commit b005f41
Show file tree
Hide file tree
Showing 37 changed files with 2,778 additions and 25 deletions.
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/claritychallenge/clarity/main.svg)](https://results.pre-commit.ci/latest/github/claritychallenge/clarity/main)
[![Downloads](https://pepy.tech/badge/pyclarity)](https://pepy.tech/project/pyclarity)

[![PyPI](https://img.shields.io/static/v1?label=CAD1%20and%20CPC2%20Challenges%20-%20pypi&message=v0.3.3&color=orange)](https://pypi.org/project/pyclarity/0.3.3/)
[![PyPI](https://img.shields.io/static/v1?label=CAD1%20and%20CPC2%20Challenges%20-%20pypi&message=v0.4.0&color=orange)](https://pypi.org/project/pyclarity/0.4.0/)
[![PyPI](https://img.shields.io/static/v1?label=ICASSP%202023%20Challenge%20-%20pypi&message=v0.2.1&color=orange)](https://pypi.org/project/pyclarity/0.2.1/)
[![PyPI](https://img.shields.io/static/v1?label=CEC2%20Challenge%20-%20pypi&message=v0.1.1&color=orange)](https://pypi.org/project/pyclarity/0.1.1/)
[![ORDA](https://img.shields.io/badge/ORDA--DOI-10.15131%2Fshef.data.23230694.v.1-lightgrey)](https://figshare.shef.ac.uk/articles/software/clarity/23230694/1)
Expand All @@ -34,10 +34,11 @@ In this repository, you will find code to support all Clarity and Cadenza Challe

## Current Events

- The first Cadenza Challenge (CAD1) is now open. :fire::fire:
- The ICASSP 2024 Cadenza Challenge (CAD_ICASSP_2024) is now open. :fire::fire:
- Join the [Cadenza Challenge Group](https://groups.google.com/g/cadenza-challenge) to keep up-to-date on developments.
- Visit the Cadenenza Challenge [website](https://cadenzachallenge.org/) for more details.
- Evaluation tools and a baseline system are available in the [CAD1 recipes](./recipes/cad1) folder. :new:
- The first Cadenza Challenge (CAD1) is closed.
- Subjective Evaluation is underway. :new:
- The 2nd Clarity Prediction Challenge (CPC2) is now open. :fire::fire:
- Join the [Clarity Challenge Group](https://groups.google.com/g/clarity-challenge) to keep up-to-date on developments.
- Visit the Clarity Challenge [website](https://claritychallenge.org/) for more details.
Expand Down Expand Up @@ -86,11 +87,12 @@ pip install -e git+https://github.com/claritychallenge/clarity.git@main

Current challenge

- [The 1st Cadenza Challenge (CAD1)](./recipes/cad1)
- [The 2nd Clarity Prediction Challenge (CPC2)](./recipes/cpc2)
- [The ICASSP 2024 Cadenza CHallenge](./recipes/cad_icassp_2024)

Previous challenges

- [The 1st Cadenza Challenge (CAD1)](./recipes/cad1)
- [The 2nd Clarity Prediction Challenge (CPC2)](./recipes/cpc2)
- [The ICASSP 2023 Enhancement Challenge](./recipes/icassp_2023)
- [The 2nd Clarity Enhancement Challenge (CEC2)](./recipes/cec2)
- [The 1st Clarity Prediction Challenge (CPC1)](./recipes/cpc1)
Expand Down
10 changes: 6 additions & 4 deletions clarity/evaluator/haaqi/haaqi.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,9 @@ def haaqi_v1(
def compute_haaqi(
processed_signal: ndarray,
reference_signal: ndarray,
processed_sample_rate: float,
reference_sample_rate: float,
audiogram: Audiogram,
sample_rate: float,
equalisation: int = 1,
level1: float = 65.0,
) -> float:
Expand All @@ -191,8 +192,9 @@ def compute_haaqi(
reference_signal (np.ndarray): Input reference speech signal with no noise
or distortion. If a hearing loss is specified, NAL-R equalization
is optional
processed_sample_rate (float): Sampling rate in Hz for processed signal.
reference_sample_rate (float): Sampling rate in Hz for reference signal.
audiogram (Audiogram): Audiogram object.
sample_rate (int): Sample rate in Hz.
equalisation (int): hearing loss equalization mode for reference signal:
1 = no EQ has been provided, the function will add NAL-R
2 = NAL-R EQ has already been added to the reference signal
Expand All @@ -211,9 +213,9 @@ def compute_haaqi(

score, _, _, _ = haaqi_v1(
reference=reference_signal,
reference_freq=sample_rate,
reference_freq=reference_sample_rate,
processed=processed_signal,
processed_freq=sample_rate,
processed_freq=processed_sample_rate,
audiogram=audiogram,
equalisation=equalisation,
level1=level1,
Expand Down
262 changes: 262 additions & 0 deletions clarity/utils/flac_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
"""
Class for encoding and decoding audio signals
using flac compression.
"""
from __future__ import annotations

import logging
import tempfile

# pylint: disable=import-error, protected-access
from pathlib import Path

import numpy as np
import pyflac as pf
import soundfile as sf

logger = logging.getLogger(__name__)


class WavEncoder(pf.encoder._Encoder):
"""
Class offers an adaptation of the pyflac.encoder.FileEncoder
to work directly with WAV signals as input.
"""

def __init__(
self,
signal: np.ndarray,
sample_rate: int,
output_file: str | Path | None = None,
compression_level: int = 5,
blocksize: int = 0,
streamable_subset: bool = True,
verify: bool = False,
) -> None:
"""
Initialise the encoder.
Args:
signal (np.ndarray): The raw audio data to be encoded.
sample_rate (int): The sample rate of the audio data.
output_file (str | Path | None): Path to the output FLAC file,
a temporary file will be created if unspecified.
compression_level (int): The compression level parameter that
varies from 0 (fastest) to 8 (slowest). The default setting
is 5, see https://en.wikipedia.org/wiki/FLAC for more details.
blocksize (int): The size of the block to be returned in the
callback. The default is 0 which allows libFLAC to determine
the best block size.
streamable_subset (bool): Whether to use the streamable subset for encoding.
If true the encoder will check settings for compatibility. If false, the
settings may take advantage of the full range that the format allows.
verify (bool): If `True`, the encoder will verify it's own
encoded output by feeding it through an internal decoder and
comparing the original signal against the decoded signal.
If a mismatch occurs, the `process` method will raise a
`EncoderProcessException`. Note that this will slow the
encoding process by the extra time required for decoding and comparison.
"""
super().__init__()

self.__raw_audio = signal
self._sample_rate = sample_rate

if output_file:
self.__output_file = (
Path(output_file) if isinstance(output_file, str) else output_file
)
else:
with tempfile.NamedTemporaryFile(suffix=".flac") as ofile:
self.__output_file = Path(ofile.name)

self._blocksize = blocksize
self._compression_level = compression_level
self._streamable_subset = streamable_subset
self._verify = verify
self._initialised = False

def _init(self):
"""
Initialise the encoder to write to a file.
Raises:
EncoderInitException: if initialisation fails.
"""
c_output_filename = pf.encoder._ffi.new(
"char[]", str(self.__output_file).encode("utf-8")
)
rc = pf.encoder._lib.FLAC__stream_encoder_init_file(
self._encoder,
c_output_filename,
pf.encoder._lib._progress_callback,
self._encoder_handle,
)
pf.encoder._ffi.release(c_output_filename)
if rc != pf.encoder._lib.FLAC__STREAM_ENCODER_INIT_STATUS_OK:
raise pf.EncoderInitException(rc)

self._initialised = True

def process(self) -> bytes:
"""
Process the audio data from the WAV file.
Returns:
(bytes): The FLAC encoded bytes.
Raises:
EncoderProcessException: if an error occurs when processing the samples
"""
super().process(self.__raw_audio)
self.finish()
with open(self.__output_file, "rb") as f:
return f.read()


class FileDecoder(pf.decoder.FileDecoder):
def process(self) -> tuple[np.ndarray, int]:
"""
Overwritten version of the process method from the pyflac decoder.
Original process returns stereo signals in float64 format.
In this version, the data is returned using the original number
of channels and in in16 format.
Returns:
(tuple): A tuple of the decoded numpy audio array, and the sample rate
of the audio data.
Raises:
DecoderProcessException: if any fatal read, write, or memory allocation
error occurred (meaning decoding must stop)
"""
result = pf.decoder._lib.FLAC__stream_decoder_process_until_end_of_stream(
self._decoder
)
if self.state != pf.decoder.DecoderState.END_OF_STREAM and not result:
raise pf.DecoderProcessException(str(self.state))

self.finish()
self.__output.close()
return sf.read(str(self.__output_file), always_2d=False, dtype="int16")


class FlacEncoder:
"""
Class for encoding and decoding audio signals using FLAC
It uses the pyflac library to encode and decode the audio data.
And offers convenient methods for encoding and decoding audio data.
"""

def __init__(self, compression_level: int = 5) -> None:
"""
Initialise the compressor.
Args:
compression_level (int): The compression level parameter that
varies from 0 (fastest) to 8 (slowest). The default setting
is 5, see https://en.wikipedia.org/wiki/FLAC for more details.
"""
self.compression_level = compression_level

def encode(
self,
signal: np.ndarray,
sample_rate: int,
output_file: str | Path | None = None,
) -> bytes:
"""
Method to encode the audio data using FLAC compressor.
It creates a WavEncoder object and uses it to encode the audio data.
Args:
signal (np.ndarray): The raw audio data to be compressed.
sample_rate (int): The sample rate of the audio data.
output_file (str | Path): Path to where to
save the output FLAC file. If not specified, a temporary file
will be created.
Returns:
(bytes): The FLAC encoded audio signal.
Raises:
ValueError: If the audio signal is not in `np.int16` format.
"""
if signal.dtype != np.int16:
logger.error(
f"FLAC encoder only supports 16-bit integer signals, "
f"but got {signal.dtype}"
)
raise ValueError(
f"FLAC encoder only supports 16-bit integer signals, "
f"but got {signal.dtype}"
)

wav_encoder = WavEncoder(
signal=signal,
sample_rate=sample_rate,
compression_level=self.compression_level,
output_file=output_file,
)
return wav_encoder.process()

@staticmethod
def decode(input_filename: Path | str) -> tuple[np.ndarray, float]:
"""
Method to decode a flac file to wav audio data.
It uses the pyflac library to decode the flac file.
Args:
input_filename (pathlib.Path | str): Path to the input FLAC file.
Returns:
(np.ndarray): The raw audio data.
Raises:
FileNotFoundError: If the flac file to decode does not exist.
"""
input_filename = (
Path(input_filename) if isinstance(input_filename, str) else input_filename
)

if not input_filename.exists():
logger.error(f"File {input_filename} not found.")
raise FileNotFoundError(f"File {input_filename} not found.")

decoder = FileDecoder(input_filename)
signal, sample_rate = decoder.process()

return signal, float(sample_rate)


def read_flac_signal(filename: Path) -> tuple[np.ndarray, float]:
"""Read a FLAC signal and return it as a numpy array
Args:
filename (Path): The path to the FLAC file to read.
Returns:
signal (np.ndarray): The decoded signal.
sample_rate (float): The sample rate of the signal.
"""
# Create encoder object
flac_encoder = FlacEncoder()

# Decode FLAC file
signal, sample_rate = flac_encoder.decode(
filename,
)
signal = (signal / 32768.0).astype(np.float32)

# Load scale factor
if filename.with_suffix(".txt").exists():
with open(filename.with_suffix(".txt"), encoding="utf-8") as fp:
max_value = float(fp.read())
# Scale signal
signal *= max_value
return signal, sample_rate
63 changes: 63 additions & 0 deletions clarity/utils/results_support.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Dataclass to save challenges results to a CSV file."""
from __future__ import annotations

# pylint: disable=import-error
import csv
from dataclasses import dataclass
from pathlib import Path


@dataclass
class ResultsFile:
"""A utility class for writing results to a CSV file.
Attributes:
file_name (str | Path): The name of the file to write results to.
header_columns (list[str]): The columns to write to the CSV file.
append_results (bool): Whether to append results to an existing file.
If False, a new file will be created and the header row will be written.
Defaults to False.
"""

file_name: str | Path
header_columns: list[str]
append_results: bool = False

def __post_init__(self):
"""Write the header row to the CSV file."""
if isinstance(self.file_name, str):
self.file_name = Path(self.file_name)

if self.append_results:
if not Path(self.file_name).exists():
raise FileNotFoundError(
"Cannot append results to non-existent file "
f"{self.file_name.as_posix()}"
" - please set append_results=False"
)
else:
with open(self.file_name, "w", encoding="utf-8", newline="") as csv_file:
csv_writer = csv.writer(
csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
)
csv_writer.writerow(self.header_columns)

def add_result(
self,
row_values: dict[str, str | float],
):
"""Add a result to the CSV file.
Args:
row_values (dict[str, str | float]): The values to write to the CSV file.
"""

with open(self.file_name, "a", encoding="utf-8", newline="") as csv_file:
csv_writer = csv.writer(
csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
)
row = []
for column in self.header_columns:
row.append(row_values[column])

csv_writer.writerow(row)
Loading

0 comments on commit b005f41

Please sign in to comment.