diff --git a/01_train.sh b/01_train_2023t2.sh similarity index 100% rename from 01_train.sh rename to 01_train_2023t2.sh diff --git a/01_train_2024t2.sh b/01_train_2024t2.sh new file mode 100644 index 0000000..896a68a --- /dev/null +++ b/01_train_2024t2.sh @@ -0,0 +1,40 @@ +TZ=JST-9 date + +echo $0 $* + +dev_eval=$1 +echo -e "\tdev_eval = '$dev_eval'" +echo + +# check args +if [ "${dev_eval}" != "-d" ] \ + && [ "${dev_eval}" != "-e" ] \ + && [ "${dev_eval}" != "--dev" ] \ + && [ "${dev_eval}" != "--eval" ] +then + echo "$0: argument error" + echo -e "usage\t: $0 ['-d' | '--dev' | '-e' | '--eval']" + echo -e "\tinvalid choice '$dev_eval'" + echo -e "\tchoice from ['-d' | '--dev' | '-e' | '--eval']." + echo -e "\t\t-d, --dev\t: Using Development dataset. " + echo -e "\t\t-e, --eval\t: Using Additional training dataset and Evaluation dataset. " + echo -e "example\t: $ bash $0 -d" + exit 1 +fi + +# main process +base_job="bash" +job="train_ae.sh" + +if [ "${dev_eval}" = "-d" ] || [ "${dev_eval}" = "--dev" ] +then + dataset_list="DCASE2024T2bearing DCASE2024T2fan DCASE2024T2gearbox DCASE2024T2slider DCASE2024T2ToyCar DCASE2024T2ToyTrain DCASE2024T2valve" +elif [ "${dev_eval}" = "-e" ] || [ "${dev_eval}" = "--eval" ] +then + echo eval data has not been published yet. + exit 1 +fi + +for dataset in $dataset_list; do + ${base_job} ${job} ${dataset} ${dev_eval} 0 +done diff --git a/02a_test.sh b/02a_test_2023t2.sh similarity index 100% rename from 02a_test.sh rename to 02a_test_2023t2.sh diff --git a/02a_test_2024t2.sh b/02a_test_2024t2.sh new file mode 100644 index 0000000..4617b0c --- /dev/null +++ b/02a_test_2024t2.sh @@ -0,0 +1,40 @@ +TZ=JST-9 date + +echo $0 $* + +dev_eval=$1 +echo -e "\tdev_eval = '$dev_eval'" +echo + +# check args +if [ "${dev_eval}" != "-d" ] \ + && [ "${dev_eval}" != "-e" ] \ + && [ "${dev_eval}" != "--dev" ] \ + && [ "${dev_eval}" != "--eval" ] +then + echo "$0: argument error" + echo -e "usage\t: $0 ['-d' | '--dev' | '-e' | '--eval']" + echo -e "\tinvalid choice '$dev_eval'" + echo -e "\tchoice from ['-d' | '--dev' | '-e' | '--eval']." + echo -e "\t\t-d, --dev\t: Using Development dataset. " + echo -e "\t\t-e, --eval\t: Using Additional training dataset and Evaluation dataset. " + echo -e "example\t: $ bash $0 -d" + exit 1 +fi + +# main process +base_job="bash" +job="test_ae.sh" + +if [ "${dev_eval}" = "-d" ] || [ "${dev_eval}" = "--dev" ] +then + dataset_list="DCASE2024T2bearing DCASE2024T2fan DCASE2024T2gearbox DCASE2024T2slider DCASE2024T2ToyCar DCASE2024T2ToyTrain DCASE2024T2valve" +elif [ "${dev_eval}" = "-e" ] || [ "${dev_eval}" = "--eval" ] +then + echo eval data has not been published yet. + exit 1 +fi + +for dataset in $dataset_list; do + ${base_job} ${job} ${dataset} ${dev_eval} "MSE" 0 +done \ No newline at end of file diff --git a/02b_test.sh b/02b_test_2023t2.sh similarity index 100% rename from 02b_test.sh rename to 02b_test_2023t2.sh diff --git a/02b_test_2024t2.sh b/02b_test_2024t2.sh new file mode 100644 index 0000000..0909488 --- /dev/null +++ b/02b_test_2024t2.sh @@ -0,0 +1,40 @@ +TZ=JST-9 date + +echo $0 $* + +dev_eval=$1 +echo -e "\tdev_eval = '$dev_eval'" +echo + +# check args +if [ "${dev_eval}" != "-d" ] \ + && [ "${dev_eval}" != "-e" ] \ + && [ "${dev_eval}" != "--dev" ] \ + && [ "${dev_eval}" != "--eval" ] +then + echo "$0: argument error" + echo -e "usage\t: $0 ['-d' | '--dev' | '-e' | '--eval']" + echo -e "\tinvalid choice '$dev_eval'" + echo -e "\tchoice from ['-d' | '--dev' | '-e' | '--eval']." + echo -e "\t\t-d, --dev\t: Using Development dataset. " + echo -e "\t\t-e, --eval\t: Using Additional training dataset and Evaluation dataset. " + echo -e "example\t: $ bash $0 -d" + exit 1 +fi + +# main process +base_job="bash" +job="test_ae.sh" + +if [ "${dev_eval}" = "-d" ] || [ "${dev_eval}" = "--dev" ] +then + dataset_list="DCASE2024T2bearing DCASE2024T2fan DCASE2024T2gearbox DCASE2024T2slider DCASE2024T2ToyCar DCASE2024T2ToyTrain DCASE2024T2valve" +elif [ "${dev_eval}" = "-e" ] || [ "${dev_eval}" = "--eval" ] +then + echo eval data has not been published yet. + exit 1 +fi + +for dataset in $dataset_list; do + ${base_job} ${job} ${dataset} ${dev_eval} "MAHALA" 0 +done \ No newline at end of file diff --git a/03_summarize_results.sh b/03_summarize_results.sh index 107353e..bdea1c0 100644 --- a/03_summarize_results.sh +++ b/03_summarize_results.sh @@ -14,7 +14,8 @@ args_flag_dataset=0 if [ "${dataset}" != "DCASE2020T2" ] \ && [ "${dataset}" != "DCASE2021T2" ] \ && [ "${dataset}" != "DCASE2022T2" ] \ - && [ "${dataset}" != "DCASE2023T2" ] + && [ "${dataset}" != "DCASE2023T2" ] \ + && [ "${dataset}" != "DCASE2024T2" ] then args_flag=1 args_flag_dataset=1 @@ -33,16 +34,17 @@ fi if [ $args_flag -eq 1 ] then echo "$0: argument error" - echo -e "usage\t: $0 ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2' | 'DCASE2023T2' ] ['-d' | '--dev' | '-e' | '--eval']" + echo -e "usage\t: $0 ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2' | 'DCASE2023T2' | 'DCASE2024T2' ] ['-d' | '--dev' | '-e' | '--eval']" if [ $args_flag_dataset -eq 1 ] then echo -e "\tdataset: invalid choice '$dataset'" - echo -e "\tchoice from ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2' | 'DCASE2023T2' ]." + echo -e "\tchoice from ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2' | 'DCASE2023T2' | 'DCASE2024T2' ]." echo -e "\t\tDCASE2020T2\t: Use DCASE2020 Task2 datasets. " echo -e "\t\tDCASE2021T2\t: Use DCASE2021 Task2 datasets. " echo -e "\t\tDCASE2022T2\t: Use DCASE2022 Task2 datasets. " echo -e "\t\tDCASE2023T2\t: Use DCASE2023 Task2 datasets. " + echo -e "\t\tDCASE2023T2\t: Use DCASE2024 Task2 datasets. " echo fi diff --git a/README.md b/README.md index 927641e..ad4ce30 100644 --- a/README.md +++ b/README.md @@ -1,64 +1,48 @@ # Anomalous Sound Detection -## DCASE 2023 Challenge Task 2 Baseline Auto Encoder: dcase2023\_task2\_baseline\_ae +## DCASE 2024 Challenge Task 2 and DCASE 2023 Challenge Task 2 Baseline Auto Encoder: dcase2023\_task2\_baseline\_ae -This is an autoencoder-based baseline for the [DCASE2023 Challenge Task 2](https://dcase.community/challenge2023/). - -This source code is an example implementation of the baseline Auto Encoder of DCASE 2023 Challenge Task 2: First-Shot Unsupervised Anomalous Sound Detection for Machine Condition Monitoring. +This is an autoencoder-based baseline for the [DCASE2024 Challenge Task 2 (DCASE2024T2)](https://dcase.community/challenge2024/) and the [DCASE2023 Challenge Task 2 (DCASE2023T2)](https://dcase.community/challenge2023/). +This source code is an example implementation of the baseline Auto Encoder of DCASE2024T2 and DCASE2023T2: First-Shot Unsupervised Anomalous Sound Detection for Machine Condition Monitoring. This baseline implementation is based on the previous baseline, dcase2022\_baseline\_ae. The model parameter settings of this baseline AE are almost equivalent to those of the dcase2022\_task2\_baseline\_ae. Differences between the previous dcase2022\_baseline\_ae and this version are as follows: - The dcase2022\_baseline\_ae was implemented with Keras; however, this version is written in PyTorch. -- Data folder structure is updated to support DCASE 2023 Challenge Task 2 data sets. +- Data folder structure is updated to support DCASE2024T2 and DCASE2023T2 data sets. - The system uses the MSE loss as a loss function for training, but for testing, two score functions depend on the testing modes (i.e., MSE for the Simple Autoencoder mode and Mahalanobis distance for the Selective Mahalanobis mode). ## Description -This system consists of three main scripts (01_train.sh, 02a_test.sh, and 02b_test.sh) with some helper scripts: +This system consists of three main scripts (01_train.sh, 02a_test.sh, and 02b_test.sh) with some helper scripts for DCASE2024T2 (For DCASE2023T2, see [README_legacy](README_legacy.md)): -- Helper scripts - - data\_download\_2023dev.sh +- Helper scripts for DCASE2024T2 + - data\_download\_2024dev.sh - "Development dataset": - - This script downloads development data files and puts them into "data/dcase2023t2/dev\_data/raw/train/" and "data/dcase2023t2/dev\_data/raw/test/". - - data\_download\_2023add.sh **Newly added!!** - - "Additional train dataset for Evaluation": - - This script downloads evaluation data files and puts them into "data/dcase2023t2/eval\_data/raw/train". - - data\_download\_2023eval.sh **Newly added!!** - - "Additional test dataset for Evaluation" - - This script downloads evaluation data files and puts them into "data/dcase2023t2/eval\_data/raw/test". - - Add reference labels to test data in "data/dcase2023t2/eval\_data/raw/test" and put them into "data/dcase2023t2/eval\_data/raw/test\_rename". **Newly added!!** - -- 01_train.sh + - This script downloads development data files and puts them into "data/dcase2024t2/dev\_data/raw/train/" and "data/dcase2024t2/dev\_data/raw/test/". **Newly added!!** + +- 01_train_2024t2.sh - "Development" mode: - - This script trains a model for each machine type for each section ID by using the directory `data/dcase2023t2/dev_data/raw//train/` - - "Evaluation" mode: - - This script trains a model for each machine type for each section ID by using the directory `data/dcase2023t2/eval_data/raw//train/`. - -- 02a_test.sh (Use MSE as a score function for the Simple Autoencoder mode) + - This script trains a model for each machine type for each section ID by using the directory `data/dcase2024t2/dev_data/raw//train/`. **Newly added!!** + +- 02a_test_2024t2.sh (Use MSE as a score function for the Simple Autoencoder mode) - "Development" mode: - - This script makes a CSV file for each section, including the anomaly scores for each WAV file in the directories `data/dcase2023t2/dev_data/raw//test/`. + - This script makes a CSV file for each section, including the anomaly scores for each WAV file in the directories `data/dcase2024t2/dev_data/raw//test/`. **Newly added!!** - The CSV files will be stored in the directory `results/`. - It also makes a csv file including AUC, pAUC, precision, recall, and F1-score for each section. - - "Evaluation" mode: - - This script makes a CSV file for each section, including the anomaly scores for each wav file in the directories `data/dcase2023t2/eval_data/raw//test/`. (These directories will be made available with the "evaluation dataset".) - - The CSV files are stored in the directory `results/`. - - If you have `data/dcase2023t2/eval_data/raw//test_rename/`, It also makes a CSV file including AUC, pAUC, precision, recall, and F1-score for each section. -- 02b_test.sh (Use Mahalanobis distance as a score function for the Selective Mahalanobis mode) +- 02b_test_2024t2.sh (Use Mahalanobis distance as a score function for the Selective Mahalanobis mode) - "Development" mode: - - This script makes a CSV file for each section, including the anomaly scores for each wav file in the directories `data/dcase2023t2/dev_data/raw//test/`. + - This script makes a CSV file for each section, including the anomaly scores for each wav file in the directories `data/dcase2024t2/dev_data/raw//test/`. **Newly added!!** - The CSV files will be stored in the directory `results/`. - It also makes a csv file including AUC, pAUC, precision, recall, and F1-score for each section. - - "Evaluation" mode: - - This script makes a CSV file for each section, including the anomaly scores for each wav file in the directories `data/dcase2023t2/eval_data/raw//test/`. (These directories will be made available with the "evaluation dataset".) - - The CSV files are stored in the directory `results/`. - - If you have `data/dcase2023t2/eval_data/raw//test_rename/`, It also makes a CSV file including AUC, pAUC, precision, recall, and F1-score for each section. - - 03_summarize_results.sh - This script summarizes results into a CSV file. +C.f., for DCASE2023T2, see [README_legacy](README_legacy.md). + + ## Usage ### 1. Clone repository @@ -69,22 +53,25 @@ Clone this repository from GitHub. We will launch the datasets in three stages. Therefore, please download the datasets in each stage: - + "Development dataset" - + ~~Download dev\_data_.zip from [https://zenodo.org/record/7690148](https://zenodo.org/record/7690148).~~ - + ~~Download "dev\_data_.zip" from [https://zenodo.org/record/7690157](https://zenodo.org/record/7690157). **Updated on 2023/04/15**~~ - + Download "dev\_data_.zip" from [https://zenodo.org/record/7882613](https://zenodo.org/record/7882613). **Updated on 2023/05/01** + + DCASE 2024 Challenge Task 2 + + "Development Dataset" **New! (2024/04/01)** + + Download "dev\_data_.zip" from +[https://zenodo.org/records/10902294](https://zenodo.org/records/10902294). + + + For DCASE 2023 Challenge Task 2 + (C.f., for DCASE2023T2, see [README_legacy](README_legacy.md)) + + "Development Dataset" + + Download "dev\_data_.zip" from [https://zenodo.org/record/7882613](https://zenodo.org/record/7882613). + + "Additional Training Dataset", i.e., the evaluation dataset for training + + Download "eval\_data__train.zip" from [https://zenodo.org/record/7830345](https://zenodo.org/record/7830345). + + "Evaluation Dataset", i.e., the evaluation dataset for test + + Download "eval\_data__test.zip" from [https://zenodo.org/record/7860847](https://zenodo.org/record/7860847). - + "Additional training dataset", i.e., the evaluation dataset for training **Updated on 2023/04/15** - + After April 15, 2023, download the additional training dataset - + Download "eval\_data__train.zip" from [https://zenodo.org/record/7830345](https://zenodo.org/record/7830345). - + "Evaluation dataset", i.e., the evaluation dataset for test **New! (2023/05/01)** - + After May 1, 2023, download the evaluation dataset. - + Download "eval\_data__test.zip" from [https://zenodo.org/record/7860847](https://zenodo.org/record/7860847). ### 3. Unzip the downloaded files and make the directory structure as follows: + dcase2023\_task2\_baseline\_ae - + data/dcase2023t2/dev\_data/raw/ + + data/dcase2024t2/dev\_data/raw/ + fan/ + train (only normal clips) + section\_00\_source\_train\_normal\_0000\_.wav @@ -108,42 +95,7 @@ We will launch the datasets in three stages. Therefore, please download the data + section\_00\_target\_test\_anomaly\_0049\_.wav + attributes\_00.csv (attributes CSV for section 00) + gearbox/ (The other machine types have the same directory structure as fan.) - + data/dcase2023t2/eval\_data/raw/ - + \/ - + train/ (after launch of the additional training dataset) - + section\_00\_source\_train\_normal\_0000\_.wav - + ... - + section\_00\_source\_train\_normal\_0989\_.wav - + section\_00\_target\_train\_normal\_0000\_.wav - + ... - + section\_00\_target\_train\_normal\_0009\_.wav - + test/ (after launch of the evaluation dataset) - + section\_00\_test\_0000.wav - + ... - + section\_00\_test\_0199.wav - + test_rename/ (convert from test directory using `tools/rename.py`) - + section\_00\_source\_test\_normal\_0000\_.wav - + ... - + section\_00\_source\_test\_normal\_0049\_.wav - + section\_00\_source\_test\_anomaly\_0000\_.wav - + ... - + section\_00\_source\_test\_anomaly\_0049\_.wav - + section\_00\_target\_test\_normal\_0000\_.wav - + ... - + section\_00\_target\_test\_normal\_0049\_.wav - + section\_00\_target\_test\_anomaly\_0000\_.wav - + ... - + section\_00\_target\_test\_anomaly\_0049\_.wav - + attributes\_00.csv (attributes CSV for section 00) - + \ (The other machine types have the same directory structure as \/.) - -#### 3.1. Add reference labels to the test data in the eval\_data -Run the `tools/rename_eval_wav.py`. This script adds reference labels to test data in "data/dcase2023t2/eval\_data/raw/test" and puts them into "data/dcase2023t2/eval\_data/raw/test\_rename". - -```dotnetcli -$ python tools/rename_eval_wev.py --dataset_parent_dir=data --dataset_type=DCASE2023T2 -``` -Note that this script is also used in `data_download_2023eval.sh` + + data/dcase2024t2/eval\_data/raw/ ### 4. Change parameters @@ -152,48 +104,53 @@ Note that if values are specified in the command line option, it will overwrite ### 4.1. Enable Auto-download dataset -If you haven't yet downloaded the dataset yourself nor you have not run the download script (example, `data_download_2023dev.sh`) then you may want to use the auto download. +If you haven't yet downloaded the dataset yourself nor you have not run the download script (example, `data_download_2024dev.sh`) then you may want to use the auto download. To enable the auto-downloading, set the parameter `--is_auto_download` (default: `False`) `True` in `baseline.yaml`. If `--is_auto_download` is `True`, then auto-download is executed. ### 5. Run the training script (for the development dataset) -Run the training script 01_train.sh. Use the option -d for the development dataset `data/dcase2023t2/dev_data//raw/train/`. +Run the training script `01_train_2024t2.sh`. Use the option -d for the development dataset `data/dcase2024t2/dev_data//raw/train/`. +`01_train_2024t2.sh` differs from `01_train_2024t2.sh` only in dataset; ```dotnetcli -$ 01_train.sh -d +# using DCASE2024 Task 2 Datasets +$ 01_train_2024t2.sh -d ``` - -The two operating modes of this baseline implementation, the simple Autoencoder, and the Selective Mahalanobis AE modes, share the common training process. By running the script `01_train.sh`, all the model parameters for the simple Autoencoder and the selective Mahalanobis AE will be trained at the same time. +The two operating modes of this baseline implementation, the simple Autoencoder, and the Selective Mahalanobis AE modes, share the common training process. By running the script `01_train_2024t2.sh`, all the model parameters for the simple Autoencoder and the selective Mahalanobis AE will be trained at the same time. After the parameter update of the Autoencoder at the last epoch specified by either the yaml file or the command line option, the covariance matrixes for the Mahalanobis distance calculation will be set. ### 6. Run the test script (for the development dataset) ### 6.1. Testing with the Simple Autoencoder mode -Run the test script `02a_test.sh`. Use the option `-d` for the development dataset `data/dcase2023t2/dev_data//raw/test/`. +Run the test script `02a_test_2024t2.sh`. Use the option `-d` for the development dataset `data/dcase2024t2/dev_data//raw/test/`. +`02a_test_2024t2.sh` differs from `02a_test_2023t2.sh` only in dataset; ```dotnetcli -$ 02a_test.sh -d +# using DCASE2024 Task 2 Datasets +$ 02a_test_2024t2.sh -d ``` -The `02a_test.sh` options are the same as those for `01_train.sh`. `02a_test.sh` calculates an anomaly score for each wav file in the directories `data/dcase2023t2/dev_data/raw//test/` or `data/dcase2023t2/dev_data/raw//source_test/` and `data/dcase2023t2/dev_data/raw//target_test/`. +The `02a_test_2024t2.sh` options are the same as those for `01_train_2024t2.sh`. `02a_test_2024t2.sh` calculates an anomaly score for each wav file in the directories `data/dcase2024t2/dev_data/raw//test/` or `data/dcase2024t2/dev_data/raw//source_test/` and `data/dcase2024t2/dev_data/raw//target_test/`. A CSV file for each section, including the anomaly scores, will be stored in the directory `results/`. If the mode is "development", the script also outputs another CSV file, including AUC, pAUC, precision, recall, and F1-score for each section. ### 6.2. Testing with the Selective Mahalanobis mode -Run the test script `02b_test.sh`. Use the option `-d` for the development dataset `data/dcase2023t2/dev_data//raw/test/`. +Run the test script `02b_test_2024t2.sh`. Use the option `-d` for the development dataset `data/dcase2024t2/dev_data//raw/test/`. +`02b_test_2024t2.sh` differs from `02b_test_2023t2.sh` only in dataset; ```dotnetcli -$ 02b_test.sh -d +# using DCASE2024 Task 2 Datasets +$ 02b_test_2024t2.sh -d ``` -The `02b_test.sh` options are the same as those for `01_train.sh`. `02b_test.sh` calculates an anomaly score for each wav file in the directories `data/dcase2023t2/dev_data/raw//test/` or `data/dcase2023t2/dev_data/raw//source_test/` and `data/dcase2023t2/dev_data/raw//target_test/`. +The `02b_test_2024t2.sh` options are the same as those for `01_train_2024t2.sh`. `02b_test_2024t2.sh` calculates an anomaly score for each wav file in the directories `data/dcase2024t2/dev_data/raw//test/` or `data/dcase2024t2/dev_data/raw//source_test/` and `data/dcase2024t2/dev_data/raw//target_test/`. A CSV file for each section, including the anomaly scores, will be stored in the directory `results/`. If the mode is "development", the script also outputs another CSV file, including AUC, pAUC, precision, recall, and F1-score for each section. ### 7. Check results You can check the anomaly scores in the CSV files `anomaly_score__section__test.csv` in the directory `results/`. -Each anomaly score corresponds to a wav file in the directories `data/dcase2023t2/dev_data//test/`. +Each anomaly score corresponds to a wav file in the directories `data/dcase2024t2/dev_data//test/`. `anomaly_score__section_00_test.csv` @@ -228,57 +185,57 @@ arithmetic mean,00,0.88,0.5078,0.5063157894736842,0.5536842105263158,0.492631578 harmonic mean,00,0.88,0.5078,0.5063157894736842,0.5536842105263158,0.4926315789473684,0.0,0.0,0.0,0.0,0.0,0. ``` -### 8. Run training script for the additional training dataset (after April 15, 2023) +### 8. Run training script for the additional training dataset (after May 15, 2024) -After the additional training dataset is launched, download and unzip it. Move it to `data/dcase2023t2/eval_data/raw//train/`. Run the training script `01_train.sh` with the option `-e`. +After the additional training dataset is launched, download and unzip it. Move it to `data/dcase2024t2/eval_data/raw//train/`. Run the training script `01_train_2024t2.sh` with the option `-e`. ```dotnetcli -$ 01_train.sh -e +$ 01_train_2024t2.sh -e ``` -Models are trained by using the additional training dataset `data/dcase2023t2/raw/eval_data//train/`. +Models are trained by using the additional training dataset `data/dcase2024t2/raw/eval_data//train/`. -### 9. Run the test script for the evaluation dataset (after May 1, 2023) +### 9. Run the test script for the evaluation dataset (after June 1, 2024) ### 9.1. Testing with the Simple Autoencoder mode -After the evaluation dataset for the test is launched, download and unzip it. Move it to `data/dcase2023t2/eval_data/raw//test/`. Run the test script `02a_test.sh` with the option `-e`. +After the evaluation dataset for the test is launched, download and unzip it. Move it to `data/dcase2024t2/eval_data/raw//test/`. Run the test script `02a_test_2024t2.sh` with the option `-e`. ```dotnetcli -$ 02a_test.sh -e +$ 02a_test_2024t2.sh -e ``` -Anomaly scores are calculated using the evaluation dataset, i.e., `data/dcase2023t2/eval_data/raw//test/`. The anomaly scores are stored as CSV files in the directory `results/`. You can submit the CSV files for the challenge. From the submitted CSV files, we will calculate AUC, pAUC, and your ranking. +Anomaly scores are calculated using the evaluation dataset, i.e., `data/dcase2024t2/eval_data/raw//test/`. The anomaly scores are stored as CSV files in the directory `results/`. You can submit the CSV files for the challenge. From the submitted CSV files, we will calculate AUC, pAUC, and your ranking. ### 9.2. Testing with the Selective Mahalanobis mode -After the evaluation dataset for the test is launched, download and unzip it. Move it to `data/dcase2023t2/eval_data/raw//test/`. Run the `02b_test.sh` test script with the option `-e`. +After the evaluation dataset for the test is launched, download and unzip it. Move it to `data/dcase2024t2/eval_data/raw//test/`. Run the `02b_test_2024t2.sh` test script with the option `-e`. ```dotnetcli -$ 02b_test.sh -e +$ 02b_test_2024t2.sh -e ``` -Anomaly scores are calculated using the evaluation dataset, i.e., `data/dcase2023t2/eval_data/raw//test/`. The anomaly scores are stored as CSV files in the directory `results/`. You can submit the CSV files for the challenge. From the submitted CSV files, we will calculate AUC, pAUC, and your ranking. +Anomaly scores are calculated using the evaluation dataset, i.e., `data/dcase2024t2/eval_data/raw//test/`. The anomaly scores are stored as CSV files in the directory `results/`. You can submit the CSV files for the challenge. From the submitted CSV files, we will calculate AUC, pAUC, and your ranking. ### 10. Summarize results -After the executed `02a_test.sh`, `02b_test.sh`, or both. Run the summarize script `03_summarize_results.sh` with the option `DCASE2023T2 -d` or `DCASE2023T2 -e`. +After the executed `02a_test_2024t2.sh`, `02b_test_2024t2.sh`, or both. Run the summarize script `03_summarize_results.sh` with the option `DCASE2024T2 -d` or `DCASE2024T2 -e`. ```dotnetcli -# Summarize development dataset -$ 03_summarize_results.sh DCASE2023T2 -d +# Summarize development dataset 2024 +$ 03_summarize_results.sh DCASE2024T2 -d -# Summarize the evaluation dataset -$ 03_summarize_results.sh DCASE2023T2 -e +# Summarize the evaluation dataset 2024 +$ 03_summarize_results.sh DCASE2024T2 -e ``` -After the summary, the results are exported in CSV format to `results/dev_data/baseline/summarize/DCASE2023T2` or `results/eval_data/baseline/summarize/DCASE2023T2`. +After the summary, the results are exported in CSV format to `results/dev_data/baseline/summarize/DCASE2024T2` or `results/eval_data/baseline/summarize/DCASE2024T2`. If you want to change, summarize results directory or export directory, edit `03_summarize_results.sh`. ## Legacy support -This version takes the legacy datasets provided in DCASE2020 task2, DCASE2021 task2, and DCASE2022 task2 dataset for inputs. +This version takes the legacy datasets provided in DCASE2020 task2, DCASE2021 task2, DCASE2022 task2, and DCASE2023 task2 dataset for inputs. The Legacy support scripts are similar to the main scripts. These are in `tools` directory. [learn more](README_legacy.md) @@ -308,6 +265,12 @@ We developed and tested the source code on Ubuntu 18.04.6 LTS. ## Change Log +### [3.0.0](https://github.com/nttcslab/dcase2023_task2_baseline_ae/releases/tag/v3.0.0) + +#### Added + +- provides support for the datasets used in DCASE2024. + ### [2.0.1](https://github.com/nttcslab/dcase2023_task2_baseline_ae/releases/tag/v2.0.1) #### Fixed diff --git a/README_legacy.md b/README_legacy.md index 6a596b9..dbb5d42 100644 --- a/README_legacy.md +++ b/README_legacy.md @@ -1,6 +1,6 @@ # Legacy support -This version supports reading the datasets from DCASE2020 task2, DCASE2021 task2, and DCASE2022 task2 dataset for inputs. +This version supports reading the datasets from DCASE2020 task2, DCASE2021 task2, DCASE2022 task2 and DCASE2023 task2 dataset for inputs. ## Description @@ -17,6 +17,9 @@ Legacy-support scripts are similar to the main scripts. These are in `tools` dir - tools/data\_download\_2022.sh - This script downloads development data and evaluation data files and puts them into `data/dcase2022t2/dev_data/raw/` and `data/dcase2022t2/eval_data/raw/`. - Rename evaluation data after downloading the dataset to evaluate and calculate AUC score. Renamed data is stored in `data/dcase2022t2/eval_data/raw/test_rename` + - This script downloads development data and evaluation data files and puts them into `data/dcase2023t2/dev_data/raw/` and `data/dcase2023t2/eval_data/raw/`. + - Rename evaluation data after downloading the dataset to evaluate and calculate AUC score. Renamed data is stored in `data/dcase2023t2/eval_data/raw/test_rename` + - tools/01\_train\_legacy.sh - DCASE2020 task2 mode: @@ -34,6 +37,12 @@ Legacy-support scripts are similar to the main scripts. These are in `tools` dir - This script trains a model for each machine type for each section ID by using the directory `data/dcase2022t2/dev_data/raw//train/` - "Evaluation" mode: - This script trains a model for each machine type for each section ID by using the directory `data/dcase2022t2/eval_data/raw//train/`. + - DCASE2023 task2 mode: + - "Development" mode: + - This script trains a model for each machine type for each section ID by using the directory `data/dcase2023t2/dev_data/raw//train/` + - "Evaluation" mode: + - This script trains a model for each machine type for each section ID by using the directory `data/dcase2023t2/eval_data/raw//train/`. + - tools/02a\_test\_legacy.sh (Use MSE as a score function for the Simple Autoencoder mode) - DCASE2020 task2 mode: @@ -63,6 +72,15 @@ Legacy-support scripts are similar to the main scripts. These are in `tools` dir - This script generates a CSV file for each section, including the anomaly scores for each wav file in the directories `data/dcase2022t2/eval_data/raw//test/`. (These directories will be made available with the "evaluation dataset".) - The generated CSV files are stored in the directory `results/`. - If `test_rename` directory is available, this script generates a CSV file including AUC, pAUC, precision, recall, and F1-score for each section. + - DCASE2023 task2 mode: + - "Development" mode: + - This script generates a CSV file for each section, including the anomaly scores for each wav file in the directories `data/dcase2023t2/dev_data/raw//test/`. + - The generated CSV files will be stored in the directory `results/`. + - It also generates a CSV file including AUC, pAUC, precision, recall, and F1-score for each section. + - "Evaluation" mode: + - This script generates a CSV file for each section, including the anomaly scores for each wav file in the directories `data/dcase2023t2/eval_data/raw//test/`. (These directories will be made available with the "evaluation dataset".) + - The generated CSV files are stored in the directory `results/`. + - If `test_rename` directory is available, this script generates a CSV file including AUC, pAUC, precision, recall, and F1-score for each section. - tools/02b\_test\_legacy.sh (Use Mahalanobis distance as a score function for the Selective Mahalanobis mode) - "Development" mode: @@ -82,36 +100,53 @@ Legacy-support scripts are similar to the main scripts. These are in `tools` dir - This script generates a CSV file for each section, including the anomaly scores for each wav file in the directories `data/dcase2022t2/eval_data/raw//test/`. (These directories will be made available with the "evaluation dataset".) - The generated CSV files are stored in the directory. - This script also generates a CSV file, containing AUC, pAUC, precision, recall, and F1-score for each section. + - DCASE2023 task2 mode: + - "Development" mode: + - This script generates a CSV file for each section, including the anomaly scores for each wav file in the directories `data/dcase2023t2/dev_data/raw//test/`. + - The CSV files will be stored in the directory `results/`. + - It also makes a csv file including AUC, pAUC, precision, recall, and F1-score for each section. + - "Evaluation" mode: + - This script generates a CSV file for each section, including the anomaly scores for each wav file in the directories `data/dcase2023t2/eval_data/raw//test/`. (These directories will be made available with the "evaluation dataset".) + - The generated CSV files are stored in the directory. + - This script also generates a CSV file, containing AUC, pAUC, precision, recall, and F1-score for each section. - 03_summarize_results.sh - This script summarizes results into a csv file. - - Use the same as when summarizing DCASE2023T2 results. + - Use the same as when summarizing DCASE2023T2 and DCASE2024T2 results. ## Usage Legacy scripts in `tools` directory can be executed regardless of the current directory. ### 1. Download datasets - + DCASE2020T2 task2 + + DCASE2020T2 + "Development dataset" + Download "dev\_data\_\\.zip" from [https://zenodo.org/record/3678171](https://zenodo.org/record/3678171). + "Additional training dataset", i.e., the evaluation dataset for training + Download "eval\_data\_\_train.zip" from [https://zenodo.org/record/3727685](https://zenodo.org/record/3727685). + "Evaluation dataset", i.e., the evaluation dataset for test + Download "eval\_data\_\\_test.zip" from [https://zenodo.org/record/3841772](https://zenodo.org/record/3841772). - + DCASE2021T2 task2 + + DCASE2021T2 + "Development dataset" + Download "dev\_data\_\.zip" from [https://zenodo.org/record/4562016](https://zenodo.org/record/4562016). + "Additional training dataset", i.e., the evaluation dataset for training + Download "eval\_data\_\_train.zip" from [https://zenodo.org/record/4660992](https://zenodo.org/record/4660992). + "Evaluation dataset", i.e., the evaluation dataset for test + Download "eval\_data\_\\_test.zip" from [https://zenodo.org/record/4884786](https://zenodo.org/record/4884786). - + DCASE2022T2 task2 + + DCASE2022T2 + "Development dataset" + Download "dev\_data\_\.zip" from [https://zenodo.org/record/6355122](https://zenodo.org/record/6355122). + "Additional training dataset", i.e., the evaluation dataset for training + Download "eval\_data\_\\_train.zip" from [https://zenodo.org/record/6462969](https://zenodo.org/record/6462969). + "Evaluation dataset", i.e., the evaluation dataset for test + Download "eval\_data\_\\_test.zip" from [https://zenodo.org/record/6586456](https://zenodo.org/record/6586456). + + DCASE2023T2 + + "Development Dataset" + + Download "dev\_data_.zip" from [https://zenodo.org/record/7882613](https://zenodo.org/record/7882613). + + "Additional Training Dataset", i.e., the evaluation dataset for training + + Download "eval\_data__train.zip" from [https://zenodo.org/record/7830345](https://zenodo.org/record/7830345). + + "Evaluation Dataset", i.e., the evaluation dataset for test + + Download "eval\_data__test.zip" from [https://zenodo.org/record/7860847](https://zenodo.org/record/7860847). + ### 3. Unzip the downloaded files and make the directory structure as follows: @@ -123,12 +158,13 @@ The legacy dataset directory structure is the same as DCASE2023 task2. These par - /dcase2021t2 - /dcase2022t2 - /dcase2023t2 + - /dcase2024t2 [learn more about directory structure](#directory-structure-of-downloaded-dataset). ### 4. Change parameters -Change parameters using `baseline.yaml` in the same as [DCASE2023 mode](./README.md#4-change-parameters). +Change parameters using `baseline.yaml` in the same as [DCASE2024 mode](./README.md#4-change-parameters). ### 5. Run the training script @@ -145,6 +181,7 @@ $ bash tools/01_train.sh DCASE2020T2 -d - `DCASE2020T2` - `DCASE2021T2` - `DCASE2022T2` + - `DCASE2023T2` - Second parameters - `-d` - `-e` @@ -165,6 +202,7 @@ $ bash tools/02a_test_legacy.sh DCASE2020T2 -d - `DCASE2020T2` - `DCASE2021T2` - `DCASE2022T2` + - `DCASE2023T2` - Second parameters - `-d` - `-e` @@ -183,6 +221,7 @@ $ bash tools/02b_test_legacy.sh DCASE2020T2 -d - `DCASE2020T2` - `DCASE2021T2` - `DCASE2022T2` + - `DCASE2023T2` - Second parameters - `-d` - `-e` @@ -214,6 +253,7 @@ $ 03_summarize_results.sh DCASE2020T2 -e - `DCASE2021T2` - `DCASE2022T2` - `DCASE2023T2` + - `DCASE2024T2` - Second parameters - `-d` - `-e` diff --git a/data/dcase2024t2/dev_data/processed/.gitignore b/data/dcase2024t2/dev_data/processed/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/data/dcase2024t2/dev_data/processed/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/data/dcase2024t2/dev_data/raw/.gitignore b/data/dcase2024t2/dev_data/raw/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/data/dcase2024t2/dev_data/raw/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/data/dcase2024t2/eval_data/processed/.gitignore b/data/dcase2024t2/eval_data/processed/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/data/dcase2024t2/eval_data/processed/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/data/dcase2024t2/eval_data/raw/.gitignore b/data/dcase2024t2/eval_data/raw/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/data/dcase2024t2/eval_data/raw/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/data_download_2024dev.sh b/data_download_2024dev.sh new file mode 100644 index 0000000..2c11456 --- /dev/null +++ b/data_download_2024dev.sh @@ -0,0 +1,8 @@ +mkdir -p "data/dcase2024t2/dev_data/raw" + +# download dev data +cd "data/dcase2024t2/dev_data/raw" +for machine_type in bearing fan gearbox slider ToyCar ToyTrain valve; do +wget "https://zenodo.org/record/10902294/files/dev_${machine_type}.zip" +unzip "dev_${machine_type}.zip" +done diff --git a/datasets/datasets.py b/datasets/datasets.py index 354c069..dc044eb 100644 --- a/datasets/datasets.py +++ b/datasets/datasets.py @@ -104,6 +104,13 @@ def __init__(self, args): class Datasets: DatasetsDic = { + 'DCASE2024T2ToyCar':DCASE202XT2, + 'DCASE2024T2ToyTrain':DCASE202XT2, + 'DCASE2024T2bearing':DCASE202XT2, + 'DCASE2024T2fan':DCASE202XT2, + 'DCASE2024T2gearbox':DCASE202XT2, + 'DCASE2024T2slider':DCASE202XT2, + 'DCASE2024T2valve':DCASE202XT2, 'DCASE2023T2bandsaw':DCASE202XT2, 'DCASE2023T2bearing':DCASE202XT2, 'DCASE2023T2fan':DCASE202XT2, diff --git a/datasets/download_path_2024.yaml b/datasets/download_path_2024.yaml new file mode 100644 index 0000000..4793551 --- /dev/null +++ b/datasets/download_path_2024.yaml @@ -0,0 +1,22 @@ +DCASE2024T2: + bearing: + dev: + - https://zenodo.org/record/10902294/files/dev_bearing.zip + fan: + dev: + - https://zenodo.org/record/10902294/files/dev_fan.zip + gearbox: + dev: + - https://zenodo.org/record/10902294/files/dev_gearbox.zip + slider: + dev: + - https://zenodo.org/record/10902294/files/dev_slider.zip + ToyCar: + dev: + - https://zenodo.org/record/10902294/files/dev_ToyCar.zip + ToyTrain: + dev: + - https://zenodo.org/record/10902294/files/dev_ToyTrain.zip + valve: + dev: + - https://zenodo.org/record/10902294/files/dev_valve.zip diff --git a/datasets/loader_common.py b/datasets/loader_common.py index a6214a0..fa82894 100644 --- a/datasets/loader_common.py +++ b/datasets/loader_common.py @@ -62,6 +62,7 @@ # download dataset parameter ######################################################################## DOWNLOAD_PATH_YAML_DICT = { + "DCASE2024T2":"datasets/download_path_2024.yaml", "DCASE2023T2":"datasets/download_path_2023.yaml", "legacy":"datasets/download_path_legacy.yaml", } @@ -335,7 +336,9 @@ def download_raw_data( dataset, root ): - if dataset == "DCASE2023T2": + if dataset == "DCASE2024T2": + download_path_yaml = DOWNLOAD_PATH_YAML_DICT["DCASE2024T2"] + elif dataset == "DCASE2023T2": download_path_yaml = DOWNLOAD_PATH_YAML_DICT["DCASE2023T2"] else: download_path_yaml = DOWNLOAD_PATH_YAML_DICT["legacy"] @@ -464,15 +467,23 @@ def is_enabled_pickle(pickle_path): "legacy":"datasets/machine_type_legacy.yaml", "DCASE2023T2_dev":"datasets/machine_type_2023_dev.yaml", "DCASE2023T2_eval":"datasets/machine_type_2023_eval.yaml", + "DCASE2024T2_dev":"datasets/machine_type_2024_dev.yaml", } def get_machine_type_dict(dataset_name, mode=True): if dataset_name in ["DCASE2020T2", "DCASE2021T2", "DCASE2022T2"]: yaml_path = YAML_PATH["legacy"] + elif dataset_name == "DCASE2023T2" and mode: + yaml_path = YAML_PATH["DCASE2023T2_dev"] elif dataset_name == "DCASE2023T2" and not mode: yaml_path = YAML_PATH["DCASE2023T2_eval"] + elif dataset_name == "DCASE2024T2" and mode: + yaml_path = YAML_PATH["DCASE2024T2_dev"] + elif dataset_name == "DCASE2024T2" and not mode: + raise ValueError("DCASE2024T2 eval data has not been published yet.") + # yaml_path = YAML_PATH["DCASE2024T2_eval"] else: - yaml_path = YAML_PATH["DCASE2023T2_dev"] + raise KeyError() with open(yaml_path, "r") as f: machine_type_dict = yaml.safe_load(f) diff --git a/datasets/machine_type_2024_dev.yaml b/datasets/machine_type_2024_dev.yaml new file mode 100644 index 0000000..d5fbee1 --- /dev/null +++ b/datasets/machine_type_2024_dev.yaml @@ -0,0 +1,24 @@ +DCASE2024T2: + machine_type: + ToyCar: + dev: + - "00" + ToyTrain: + dev: + - "00" + fan: + dev: + - "00" + gearbox: + dev: + - "00" + bearing: + dev: + - "00" + slider: + dev: + - "00" + valve: + dev: + - "00" + section_keyword: section \ No newline at end of file diff --git a/tools/01_train_legacy.sh b/tools/01_train_legacy.sh index 93b9166..a6329b0 100644 --- a/tools/01_train_legacy.sh +++ b/tools/01_train_legacy.sh @@ -18,7 +18,8 @@ args_flag=0 args_flag_dataset=0 if [ "${dataset}" != "DCASE2020T2" ] \ && [ "${dataset}" != "DCASE2021T2" ] \ - && [ "${dataset}" != "DCASE2022T2" ] + && [ "${dataset}" != "DCASE2022T2" ] \ + && [ "${dataset}" != "DCASE2023T2" ] then args_flag=1 args_flag_dataset=1 @@ -37,15 +38,16 @@ fi if [ $args_flag -eq 1 ] then echo "$0: argument error" - echo -e "usage\t: $0 ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2'] ['-d' | '--dev' | '-e' | '--eval']" + echo -e "usage\t: $0 ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2' | 'DCASE2023T2'] ['-d' | '--dev' | '-e' | '--eval']" if [ $args_flag_dataset -eq 1 ] then echo -e "\tdataset: invalid choice '$dataset'" - echo -e "\tchoice from ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2']." + echo -e "\tchoice from ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2' | 'DCASE2023T2']." echo -e "\t\tDCASE2020T2\t: Use DCASE2020 Task2 datasets. " echo -e "\t\tDCASE2021T2\t: Use DCASE2021 Task2 datasets. " echo -e "\t\tDCASE2022T2\t: Use DCASE2022 Task2 datasets. " + echo -e "\t\tDCASE2023T2\t: Use DCASE2023 Task2 datasets. " echo fi @@ -64,7 +66,17 @@ fi # main process for job in "train_ae.sh"; do - if [ $dataset = "DCASE2022T2" ]; then + if [ $dataset = "DCASE2023T2" ]; then + if [ $dev_eval = "-d" ] || [ $dev_eval = "--dev" ]; then + for machine_type in DCASE2023T2bearing DCASE2023T2fan DCASE2023T2gearbox DCASE2023T2slider DCASE2023T2ToyCar DCASE2023T2ToyTrain DCASE2023T2valve; do + ${base_job} $job ${machine_type} ${dev_eval} 0 + done + else # $dev_eval = "-e" || $dev_eval = "--eval" + for machine_type in DCASE2023T2ToyDrone DCASE2023T2ToyNscale DCASE2023T2ToyTank DCASE2023T2Vacuum DCASE2023T2bandsaw DCASE2023T2grinder DCASE2023T2shaker; do + ${base_job} $job ${machine_type} ${dev_eval} 0 + done + fi + elif [ $dataset = "DCASE2022T2" ]; then if [ $dev_eval = "-d" ] || [ $dev_eval = "--dev" ]; then for machine_type in DCASE2022T2bearing DCASE2022T2fan DCASE2022T2gearbox DCASE2022T2slider DCASE2022T2ToyCar DCASE2022T2ToyTrain DCASE2022T2valve; do ${base_job} $job ${machine_type} ${dev_eval} 0 diff --git a/tools/02a_test_legacy.sh b/tools/02a_test_legacy.sh index 69d9ba3..fc97da9 100644 --- a/tools/02a_test_legacy.sh +++ b/tools/02a_test_legacy.sh @@ -19,7 +19,8 @@ args_flag=0 args_flag_dataset=0 if [ "${dataset}" != "DCASE2020T2" ] \ && [ "${dataset}" != "DCASE2021T2" ] \ - && [ "${dataset}" != "DCASE2022T2" ] + && [ "${dataset}" != "DCASE2022T2" ] \ + && [ "${dataset}" != "DCASE2023T2" ] then args_flag=1 args_flag_dataset=1 @@ -38,15 +39,16 @@ fi if [ $args_flag -eq 1 ] then echo "$0: argument error" - echo -e "usage\t: $0 ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2'] ['-d' | '--dev' | '-e' | '--eval']" + echo -e "usage\t: $0 ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2' | 'DCASE2023T2'] ['-d' | '--dev' | '-e' | '--eval']" if [ $args_flag_dataset -eq 1 ] then echo -e "\tdataset: invalid choice '$dataset'" - echo -e "\tchoice from ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2']." + echo -e "\tchoice from ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2' | 'DCASE2023T2']." echo -e "\t\tDCASE2020T2\t: Use DCASE2020 Task2 datasets. " echo -e "\t\tDCASE2021T2\t: Use DCASE2021 Task2 datasets. " echo -e "\t\tDCASE2022T2\t: Use DCASE2022 Task2 datasets. " + echo -e "\t\tDCASE2023T2\t: Use DCASE2023 Task2 datasets. " echo fi @@ -66,7 +68,17 @@ fi # main process for job in "test_ae.sh"; do - if [ $dataset = "DCASE2022T2" ]; then + if [ $dataset = "DCASE2023T2" ]; then + if [ $dev_eval = "-d" ] || [ $dev_eval = "--dev" ]; then + for machine_type in DCASE2023T2bearing DCASE2023T2fan DCASE2023T2gearbox DCASE2023T2slider DCASE2023T2ToyCar DCASE2023T2ToyTrain DCASE2023T2valve; do + ${base_job} $job ${machine_type} ${dev_eval} ${score} 0 + done + else # $dev_eval = "-e" || $dev_eval = "--eval" + for machine_type in DCASE2023T2ToyDrone DCASE2023T2ToyNscale DCASE2023T2ToyTank DCASE2023T2Vacuum DCASE2023T2bandsaw DCASE2023T2grinder DCASE2023T2shaker; do + ${base_job} $job ${machine_type} ${dev_eval} ${score} 0 + done + fi + elif [ $dataset = "DCASE2022T2" ]; then if [ $dev_eval = "-d" ] || [ $dev_eval = "--dev" ]; then for machine_type in DCASE2022T2bearing DCASE2022T2fan DCASE2022T2gearbox DCASE2022T2slider DCASE2022T2ToyCar DCASE2022T2ToyTrain DCASE2022T2valve; do ${base_job} $job ${machine_type} ${dev_eval} ${score} 0 diff --git a/tools/02b_test_legacy.sh b/tools/02b_test_legacy.sh index b76956d..b92d75a 100644 --- a/tools/02b_test_legacy.sh +++ b/tools/02b_test_legacy.sh @@ -19,7 +19,8 @@ args_flag=0 args_flag_dataset=0 if [ "${dataset}" != "DCASE2020T2" ] \ && [ "${dataset}" != "DCASE2021T2" ] \ - && [ "${dataset}" != "DCASE2022T2" ] + && [ "${dataset}" != "DCASE2022T2" ] \ + && [ "${dataset}" != "DCASE2023T2" ] then args_flag=1 args_flag_dataset=1 @@ -38,15 +39,16 @@ fi if [ $args_flag -eq 1 ] then echo "$0: argument error" - echo -e "usage\t: $0 ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2'] ['-d' | '--dev' | '-e' | '--eval']" + echo -e "usage\t: $0 ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2' | 'DCASE2023T2'] ['-d' | '--dev' | '-e' | '--eval']" if [ $args_flag_dataset -eq 1 ] then echo -e "\tdataset: invalid choice '$dataset'" - echo -e "\tchoice from ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2']." + echo -e "\tchoice from ['DCASE2020T2' | 'DCASE2021T2' | 'DCASE2022T2' | 'DCASE2023T2']." echo -e "\t\tDCASE2020T2\t: Use DCASE2020 Task2 datasets. " echo -e "\t\tDCASE2021T2\t: Use DCASE2021 Task2 datasets. " echo -e "\t\tDCASE2022T2\t: Use DCASE2022 Task2 datasets. " + echo -e "\t\tDCASE2023T2\t: Use DCASE2023 Task2 datasets. " echo fi @@ -66,7 +68,17 @@ fi # main process for job in "test_ae.sh"; do - if [ $dataset = "DCASE2022T2" ]; then + if [ $dataset = "DCASE2023T2" ]; then + if [ $dev_eval = "-d" ] || [ $dev_eval = "--dev" ]; then + for machine_type in DCASE2023T2bearing DCASE2023T2fan DCASE2023T2gearbox DCASE2023T2slider DCASE2023T2ToyCar DCASE2023T2ToyTrain DCASE2023T2valve; do + ${base_job} $job ${machine_type} ${dev_eval} ${score} 0 + done + else # $dev_eval = "-e" || $dev_eval = "--eval" + for machine_type in DCASE2023T2ToyDrone DCASE2023T2ToyNscale DCASE2023T2ToyTank DCASE2023T2Vacuum DCASE2023T2bandsaw DCASE2023T2grinder DCASE2023T2shaker; do + ${base_job} $job ${machine_type} ${dev_eval} ${score} 0 + done + fi + elif [ $dataset = "DCASE2022T2" ]; then if [ $dev_eval = "-d" ] || [ $dev_eval = "--dev" ]; then for machine_type in DCASE2022T2bearing DCASE2022T2fan DCASE2022T2gearbox DCASE2022T2slider DCASE2022T2ToyCar DCASE2022T2ToyTrain DCASE2022T2valve; do ${base_job} $job ${machine_type} ${dev_eval} ${score} 0 diff --git a/tools/concat_divided_roc.py b/tools/concat_divided_roc.py index e8c2e58..1f830fb 100644 --- a/tools/concat_divided_roc.py +++ b/tools/concat_divided_roc.py @@ -122,7 +122,7 @@ def export_csv(file_path, auc_list, column_header, machine_id_list): help='') parser.add_argument("--file_name", type=str, default='*roc.csv', metavar='N', help='') - parser.add_argument('--dataset',type=str, default="DCASE2020T2", choices=["DCASE2020T2", "DCASE2021T2", "DCASE2022T2", "DCASE2023T2"]) + parser.add_argument('--dataset',type=str, default="DCASE2020T2", choices=["DCASE2020T2", "DCASE2021T2", "DCASE2022T2", "DCASE2023T2", "DCASE2024T2"]) parser.add_argument('-d', '--dev', action='store_true', help='Use Development dataset') parser.add_argument('-e', '--eval', action='store_true', diff --git a/tools/data_download_2023.sh b/tools/data_download_2023.sh new file mode 100644 index 0000000..c39634c --- /dev/null +++ b/tools/data_download_2023.sh @@ -0,0 +1,25 @@ +parent_dir=data +ROOT_DIR=$(cd $(dirname $0); pwd)/../ +mkdir -p "${ROOT_DIR}/${parent_dir}/dcase2023t2/dev_data/raw" +mkdir -p "${ROOT_DIR}/${parent_dir}/dcase2023t2/eval_data/raw" + +# download dev data +cd "${ROOT_DIR}/data/dcase2023t2/dev_data/raw" +for machine_type in bearing fan gearbox slider ToyCar ToyTrain valve; do +wget "https://zenodo.org/record/7882613/files/dev_${machine_type}.zip" +unzip "dev_${machine_type}.zip" +done + +# download eval data +cd - +cd "${ROOT_DIR}/data/dcase2023t2/eval_data/raw" +for machine_type in bandsaw grinder shaker ToyDrone ToyNscale ToyTank Vacuum; do +wget "https://zenodo.org/record/7830345/files/eval_data_${machine_type}_train.zip" +unzip "eval_data_${machine_type}_train.zip" + +wget "https://zenodo.org/record/7860847/files/eval_data_${machine_type}_test.zip" +unzip "eval_data_${machine_type}_test.zip" +done + +# Adds reference labels to test data. +python ${ROOT_DIR}/tools/rename_eval_wav.py --dataset_parent_dir=${parent_dir} --dataset_type=DCASE2023T2 diff --git a/tools/export_results.py b/tools/export_results.py index d7228a3..2983b24 100644 --- a/tools/export_results.py +++ b/tools/export_results.py @@ -183,7 +183,7 @@ def main(parent_dir, dataset, machine_type_dict, row_index=["arithmetic mean", " parser = argparse.ArgumentParser( description='Main function to call training for different AutoEncoders') parser.add_argument("parent_dir", type=str) - parser.add_argument("--dataset", type=str, default="DCASE2020T2", choices=["DCASE2020T2", "DCASE2021T2", "DCASE2022T2", "DCASE2023T2"]) + parser.add_argument("--dataset", type=str, default="DCASE2020T2", choices=["DCASE2020T2", "DCASE2021T2", "DCASE2022T2", "DCASE2023T2", "DCASE2024T2"]) parser.add_argument('-d', '--dev', action='store_true', help='Use Development dataset') parser.add_argument('-e', '--eval', action='store_true', diff --git a/tools/extract_results.py b/tools/extract_results.py index b041b4c..358186a 100644 --- a/tools/extract_results.py +++ b/tools/extract_results.py @@ -18,7 +18,7 @@ parser.add_argument("parent_dir", type=str) parser.add_argument("--file_name", type=str, default="auc_pauc") parser.add_argument("--ext", type=str, default=".csv") - parser.add_argument("--dataset", type=str, default="DCASE2020T2", choices=["DCASE2020T2", "DCASE2021T2", "DCASE2022T2", "DCASE2023T2"]) + parser.add_argument("--dataset", type=str, default="DCASE2020T2", choices=["DCASE2020T2", "DCASE2021T2", "DCASE2022T2", "DCASE2023T2", "DCASE2024T2"]) parser.add_argument("--float_format", type=str, default="%.4f") parser.add_argument('-d', '--dev', action='store_true', help='Use Development dataset') diff --git a/tools/rename_eval_wav.py b/tools/rename_eval_wav.py index 1912e7d..53f56d1 100644 --- a/tools/rename_eval_wav.py +++ b/tools/rename_eval_wav.py @@ -62,7 +62,7 @@ def copy_wav(dataset_parent_dir, dataset_type): description='Main function to call training for different AutoEncoders') parser.add_argument("--dataset_parent_dir", type=str, default="data", help="saving datasets directory name.") - parser.add_argument("--dataset_type", type=str, required=True, choices=["DCASE2020T2", "DCASE2021T2", "DCASE2022T2", "DCASE2023T2"], + parser.add_argument("--dataset_type", type=str, required=True, choices=["DCASE2020T2", "DCASE2021T2", "DCASE2022T2", "DCASE2023T2", "DCASE2024T2"], help="what Dataset name to renamed.") args = parser.parse_args()