claritychallenge · jonbarker68 · Nov 29, 2022 · Nov 15, 2022 · Nov 22, 2022 · Nov 22, 2022
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,3 +1,7 @@
 {
-    "python.formatting.provider": "black"
+    "python.formatting.provider": "black",
+    "cSpell.words": [
+        "haspi",
+        "hasqi"
+    ]
 }
diff --git a/clarity/evaluator/haspi/eb.py b/clarity/evaluator/haspi/eb.py
@@ -1598,16 +1598,17 @@ def ave_covary2(sigcov, sigMSx, thr):
     sigLinear = 10 ** (sigRMS / 20)  # Linear amplitude (specific loudness)
     xsum = np.sum(sigLinear, 0) / nchan  # Intensity averaged over frequency bands
     xsum = 20 * np.log10(xsum)  # Convert back to dB (loudness in phons)
-    index = np.argwhere(
-        xsum > thr
-    ).T.squeeze()  # Identify those segments above threshold
+    index = np.argwhere(xsum > thr).T  # Identify those segments above threshold
+    if index.size != 1:
+        index = index.squeeze()
     nseg = index.shape[0]  # Number of segments above threshold
 
     # Exit if not enough segments above zero
     if nseg <= 1:
         print("Function eb.AveCovary: Ave signal below threshold, outputs set to 0.")
         avecov = 0
-        syncov = 0
+        # syncov = 0
+        syncov = [0] * 6
         return avecov, syncov
 
     # Remove the silent segments

diff --git a/clarity/evaluator/hasqi/__init__.py b/clarity/evaluator/hasqi/__init__.py
@@ -1,3 +1,3 @@
-from clarity.evaluator.hasqi.hasqi import hasqi_v2
+from clarity.evaluator.hasqi.hasqi import hasqi_v2, hasqi_v2_be
 
-__all__ = ["hasqi_v2"]
+__all__ = ["hasqi_v2", "hasqi_v2_be"]
diff --git a/clarity/evaluator/hasqi/hasqi.py b/clarity/evaluator/hasqi/hasqi.py
@@ -1,7 +1,7 @@
 from clarity.evaluator.haspi import eb
 
 
-def hasqi_v2(x, fx, y, fy, HL, eq, level1=65):
+def hasqi_v2(x, fx, y, fy, HL, eq=1, level1=65):
     """
     Function to compute the HASQI version 2 quality index using the
     auditory model followed by computing the envelope cepstral
@@ -89,7 +89,6 @@ def hasqi_v2(x, fx, y, fy, HL, eq, level1=65):
     non_lin = (
         cep_corr**2
     ) * bm_sync5  # Combined envelope and temporal fine structure
-
     # Linear model
     linear = 0.579 * d_loud + 0.421 * d_slope  # Linear fit
 
@@ -136,7 +135,7 @@ def hasqi_v2_be(
         audiogram_r[i] for i in range(len(audiogram_cfs)) if audiogram_cfs[i] in aud
     ]
 
-    score_l, _, _, _ = hasqi_v2(xl, fs_signal, yl, fs_signal, hl_l, level)
-    score_r, _, _, _ = hasqi_v2(xr, fs_signal, yr, fs_signal, hl_r, level)
+    score_l, _, _, _ = hasqi_v2(xl, fs_signal, yl, fs_signal, hl_l, eq=1, level1=level)
+    score_r, _, _, _ = hasqi_v2(xr, fs_signal, yr, fs_signal, hl_r, eq=1, level1=level)
 
     return max(score_l, score_r)
diff --git a/recipes/icassp_2023/README.md b/recipes/icassp_2023/README.md
@@ -0,0 +1,153 @@
+# The ICASSP 2023 Clarity Enhancement Challenge (CEC_ICASSP2023)
+
+Clarity challenge code for the ICASSP 2023 Clarity Enhancement Challenge.
+
+For more information please visit the [challenge website](https://claritychallenge.org/docs/icassp2023/icassp2023_intro).
+
+Clarity tutorials are [now available](https://claritychallenge.github.io/clarity_CC_doc/tutorials). The tutorials introduce the Clarity installation, how to interact with Clarity metadata, and also provide examples of baseline systems and evaluation tools.
+
+## Data structure
+
+The ICASSP 2023 Clarity Enhancement Challenge is using the Clarity CEC2 dataset. To download data, please visit [here](https://mab.to/KjXsa3EskhQuU). The data is split into three packages: `clarity_CEC2_core.v1_1.tgz` [28 GB], `clarity_CEC2_train.v1_1.tgz` [69 GB] and `clarity_CEC2_hoairs.v1_0.tgz` [144 GB].
+
+Unpack packages under the same root directory using
+
+```bash
+tar -xvzf <PACKAGE_NAME>
+```
+
+**Core** contains metadata and development set signals, which can be used for validate existing systems
+
+```text
+clarity_data
+|   hrir/HRIRs_MAT 167M
+|
+└───dev
+|   └───rooms
+|   |   |   ac 20M
+|   |   |   rpf 79M
+|   |
+|   └───interferers
+|   |   |   music 5.8G
+|   |   |   noise 587M
+|   |   |   speech 1.4G
+|   |
+|   └───scenes 39G
+|   |
+|   └───targets 1.3G
+|   |
+|   └───speaker_adapt 20M
+|
+└───metadata
+    |   scenes.train.json
+    |   scenes.dev.json
+    |   rooms.train.json
+    |   rooms.dev.json
+    |   masker_music_list.json
+    |   masker_nonspeech_list.json
+    |   masker_speech_list.json
+    |   target_speech_list.json
+    |   hrir_data.json
+    |   listeners.json
+    |   scenes_listeners.dev.json
+    |   ...
+
+```
+
+**Train** contains training set, which can be used to optimise a system
+
+```text
+clarity_data
+└───train
+    └───rooms
+    |   |   ac 48M
+    |   |   rpf 190M
+    |
+    └───interferers
+    |   |   music 16GG
+    |   |   noise 3.9M
+    |   |   speech 4.5G
+    |
+    └───scenes 89G
+    |
+    └───targets 2.8G
+
+```
+
+**HOA_IRs** contains impulse responses for reproducing the scenes or for rendering more training data (scenes).
+
+```text
+clarity_data
+└───train/rooms/HOA_IRs 117G
+|
+└───dev/rooms/HOA_IRs 49G
+```
+
+## Baseline
+
+In the `baseline/' folder, we provide code for running the baseline enhancement system and performing the objective evaluation.
+
+### Enhancement
+
+The baseline enhancement simply takes the 6 channel hearing aid inputs and reduces this to a stereo hearing aid output by passing through the 'front' microphone signal of the left and right ear.
+
+To run the baseline enhancement system, firstly specify `root` in `config.yaml` to point to where you have installed the clarity data. You can also define your own `path.exp_folder` to store enhanced signals and evaluated results.
+
+
+Then run:
+
+```bash
+python enhance.py
+```
+
+Alternatively, you can provide the root variable on the command line, e.g.,
+
+```bash
+python enhance.py path.root=/Volumes/data/clarity_CEC2_data
+```
+
+The folder `enhanced_signals` will appear in the `exp` folder.
+
+### Evaluation
+
+The `evaluate.py`  will first pass signals through a provided hearing aid amplification stage using a NAL-R [[1](#references)] fitting amplification and a simple automatic gain compressor. The amplification is determined by the audiograms defined by the scene-listener pairs in `clarity_data/metadata/scenes_listeners.dev.json` for the development set. After amplification, the evaluate function calculates the better-ear HASPI  [[2](#references)] and better-ear HASQI  [[3](#references)] scores. The average of these two is computed and returned for each signal.
+
+To run the evaluation stage, make sure that `path.root` is set in the `config.yaml` file and then run
+
+```bash
+python evaluate.py
+```
+
+The full evaluation set is 7500 scene-listener pairs and will take a long time to run. A standard small set which uses 1/15 of the data has been defined and can be run with
+
+```bash
+python evaluate.py evaluate.small_test=True
+```
+
+A csv file containing the HASPI, HASQI and combined scores will be generated in the `exp_folder`.
+
+When computing HASPI and HASQI, the `_target_anechoic_CH1.wav` is used as the reference, with its level normalised to match that of the corresponding `_target_CH1.wav`.
+
+### Reporting results
+
+Once the evaluation script has completed, the final result can be reported with
+
+```bash
+python report_score.py
+```
+
+Or if you have run the small evaluation
+
+```bash
+python report_score.py evaluate.small_test=True
+```
+
+The score for the baseline enhancement is 0.185 overall (0.239 HASPI; 0.132 HASQI).
+
+Please note: HASPI and HASQI employ random thresholding noise so you will not get identical scores unless the random seed is set (in the given recipe, the random seed for each signal is set the last eight digits of the scene md5). However, if the seed is not set the differences between runs should be small (order of 1e-6).
+
+## References
+
+* [1] Byrne, Denis, and Harvey Dillon. "The National Acoustic Laboratories'(NAL) new procedure for selecting the gain and frequency response of a hearing aid." Ear and hearing 7.4 (1986): 257-265.
+* [2] Kates J M, Arehart K H. The hearing-aid speech perception index (HASPI)[J]. Speech Communication, 2014, 65: 75-93.
+* [3] Kates, J.M. and Arehart, K.H., 2014. "The hearing-aid speech quality index (HASQI) version 2". Journal of the Audio Engineering Society. 62 (3): 99–117.
diff --git a/recipes/icassp_2023/__init__.py b/recipes/icassp_2023/__init__.py
diff --git a/recipes/icassp_2023/baseline/__init__.py b/recipes/icassp_2023/baseline/__init__.py
diff --git a/recipes/icassp_2023/baseline/config.yaml b/recipes/icassp_2023/baseline/config.yaml
@@ -0,0 +1,30 @@
+path:
+  root: ???
+  metadata_dir: ${path.root}/clarity_data/metadata
+  scenes_listeners_file: ${path.metadata_dir}/scenes_listeners.dev.json
+  listeners_file: ${path.metadata_dir}/listeners.json
+  scenes_folder: ${path.root}/clarity_data/dev/scenes
+  exp_folder: ./exp # folder to store enhanced signals and final results
+
+
+nalr:
+  nfir: 220
+  fs: 44100
+
+compressor:
+  threshold: 0.35
+  attenuation: 0.1
+  attack: 50
+  release: 1000
+  rms_buffer_size: 0.064
+
+soft_clip: True
+
+evaluate:
+  set_random_seed: True
+  small_test: False
+
+# hydra config
+hydra:
+  run:
+    dir: ${path.exp_folder}
diff --git a/recipes/icassp_2023/baseline/enhance.py b/recipes/icassp_2023/baseline/enhance.py
@@ -0,0 +1,63 @@
+""" Run the dummy enhancement. """
+import json
+import logging
+import pathlib
+
+import hydra
+import numpy as np
+from evaluate import make_scene_listener_list
+from omegaconf import DictConfig
+from scipy.io import wavfile
+from tqdm import tqdm
+
+logger = logging.getLogger(__name__)
+
+
+@hydra.main(config_path=".", config_name="config")
+def enhance(cfg: DictConfig) -> None:
+    """Run the dummy enhancement."""
+
+    enhanced_folder = pathlib.Path("enhanced_signals")
+    enhanced_folder.mkdir(parents=True, exist_ok=True)
+
+    with open(cfg.path.scenes_listeners_file, "r", encoding="utf-8") as fp:
+        scenes_listeners = json.load(fp)
+
+    with open(cfg.path.listeners_file, "r", encoding="utf-8") as fp:
+        listener_audiograms = json.load(fp)  # noqa: F841
+
+    # Make list of all scene listener pairs that will be run
+    scene_listener_pairs = make_scene_listener_list(
+        scenes_listeners, cfg.evaluate.small_test
+    )
+
+    for scene, listener in tqdm(scene_listener_pairs):
+        sample_freq, signal = wavfile.read(
+            pathlib.Path(cfg.path.scenes_folder) / f"{scene}_mix_CH1.wav"
+        )
+
+        # Convert to 32-bit floating point scaled between -1 and 1
+        signal = (signal / 32768.0).astype(np.float32)
+
+        # # Audiograms can read like this, but they are not needed for the baseline
+        #
+        # cfs = np.array(listener_audiograms[listener]["audiogram_cfs"])
+        #
+        # audiogram_left = np.array(
+        #    listener_audiograms[listener]["audiogram_levels_l"]
+        # )
+        # audiogram_right = np.array(
+        #    listener_audiograms[listener]["audiogram_levels_r"]
+        # )
+
+        # Baseline just reads the signal from the front microphone pair
+        # and write it out as the enhanced signal
+        #
+
+        wavfile.write(
+            enhanced_folder / f"{scene}_{listener}_enhanced.wav", sample_freq, signal
+        )
+
+
+if __name__ == "__main__":
+    enhance()