Skip to content

Commit

Permalink
Release 1.3.0 with added DeepScores dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
apacha committed Jun 10, 2020
1 parent 6935f4b commit 6d7a5e3
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 15 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
Changelog
=========

1.3.0
-----
Added download capabilities of DeepScores V1 with extended vocabulary and
opened the downloader, so you can download custom datasets, as well as utilize
other methods from it that were previously private.

1.2.2
-----
Fixed incorrect import statement in `__init__.py`
Expand Down
42 changes: 28 additions & 14 deletions omrdatasettools/Downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,33 @@ def download_and_extract_dataset(self, dataset: OmrDataset, destination_director
>>> downloader.download_and_extract_dataset(OmrDataset.HOMUS_V2, "data")
"""
if not os.path.exists(dataset.get_dataset_filename()):
print("Downloading {0} dataset...".format(dataset.name))
self.__download_file(dataset.get_dataset_download_url(), dataset.get_dataset_filename())

print("Extracting {0} dataset...".format(dataset.name))
self.__extract_dataset(os.path.abspath(destination_directory), dataset.get_dataset_filename())
self.download_and_extract_custom_dataset(dataset.name, dataset.get_dataset_download_url(),
dataset.get_dataset_filename(), destination_directory)

if dataset is OmrDataset.Fornes:
self.__fix_capital_file_endings(os.path.join(os.path.abspath(destination_directory), "Music_Symbols"))

if dataset in [OmrDataset.MuscimaPlusPlus_V1, OmrDataset.MuscimaPlusPlus_V2]:
self.__download_muscima_pp_images(dataset, destination_directory)

def download_and_extract_custom_dataset(self, dataset_name: str, dataset_url: str, dataset_filename: str,
destination_directory: str):
""" Starts the download of a custom dataset and extracts it into the specified directory.
Examples
--------
>>> from omrdatasettools import Downloader
>>> downloader = Downloader()
>>> downloader.download_and_extract_custom_dataset("MyNewOmrDataset", "https://example.org/dataset.zip", "dataset.zip", "data/MyNewOmrDataset")
"""
if not os.path.exists(dataset_filename):
print("Downloading {0} dataset...".format(dataset_name))
self.download_file(dataset_url, dataset_filename)

print("Extracting {0} dataset...".format(dataset_name))
self.extract_dataset(os.path.abspath(destination_directory), dataset_filename)

def download_images_from_mei_annotation(self, dataset: OmrDataset, dataset_directory: str, base_url: str):
""" Crawls the images of an Edirom dataset, if provided with the respective URL. To avoid repetitive crawling,
this URL has to be provided manually. If you are interested in these datasets, please contact the authors.
Expand Down Expand Up @@ -84,15 +98,15 @@ def __download_edirom_images(self, base, base_url, source):
def __download_muscima_pp_images(self, dataset: OmrDataset, destination_directory: str):
# Automatically download the images and measure annotations with the MUSCIMA++ dataset
muscima_pp_images_filename = dataset.dataset_file_names()["MuscimaPlusPlus_Images"]
self.__download_file(dataset.dataset_download_urls()["MuscimaPlusPlus_Images"], muscima_pp_images_filename)
self.download_file(dataset.dataset_download_urls()["MuscimaPlusPlus_Images"], muscima_pp_images_filename)
absolute_path_to_temp_folder = os.path.abspath('MuscimaPpImages')
self.__extract_dataset(absolute_path_to_temp_folder, muscima_pp_images_filename)
self.extract_dataset(absolute_path_to_temp_folder, muscima_pp_images_filename)
if dataset is OmrDataset.MuscimaPlusPlus_V1:
target_folder = os.path.join(os.path.abspath(destination_directory), "v1.0", "data", "images")
if dataset is OmrDataset.MuscimaPlusPlus_V2:
target_folder = os.path.join(os.path.abspath(destination_directory), "v2.0", "data", "images")
self.__copytree(os.path.join(absolute_path_to_temp_folder, "fulls"), target_folder)
self.__clean_up_temp_directory(absolute_path_to_temp_folder)
self.copytree(os.path.join(absolute_path_to_temp_folder, "fulls"), target_folder)
self.clean_up_temp_directory(absolute_path_to_temp_folder)

def __fix_capital_file_endings(self, absolute_path_to_temp_folder):
image_with_capital_file_ending = [y for x in os.walk(absolute_path_to_temp_folder) for y in
Expand All @@ -101,7 +115,7 @@ def __fix_capital_file_endings(self, absolute_path_to_temp_folder):
os.rename(image, image[:-3] + "bmp")

@staticmethod
def __copytree(src, dst):
def copytree(src, dst):
if not os.path.exists(dst):
os.makedirs(dst)
for item in os.listdir(src):
Expand All @@ -114,18 +128,18 @@ def __copytree(src, dst):
shutil.copy2(s, d)

@staticmethod
def __extract_dataset(absolute_path_to_folder: str, dataset_filename: str):
def extract_dataset(absolute_path_to_folder: str, dataset_filename: str):
archive = ZipFile(dataset_filename, "r")
archive.extractall(absolute_path_to_folder)
archive.close()

@staticmethod
def __clean_up_temp_directory(temp_directory):
def clean_up_temp_directory(temp_directory):
print("Deleting temporary directory {0}".format(temp_directory))
shutil.rmtree(temp_directory, ignore_errors=True)

@staticmethod
def __download_file(url, destination_filename=None) -> str:
def download_file(url, destination_filename=None) -> str:
u = urllib2.urlopen(url)
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
filename = os.path.basename(path)
Expand Down
6 changes: 6 additions & 0 deletions omrdatasettools/OmrDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ class OmrDataset(Enum):
#: The Rebelo dataset (part 2) with music symbols from http://www.inescporto.pt/~arebelo/index.php, Copyright 2017 by Ana Rebelo under CC BY-SA 4.0 license
Rebelo2 = auto()

#: The DeepScore dataset (version 1) with extended vocabulary from https://tuggeluk.github.io/downloads/, License unspecified.
DeepScores_V1_Extended = auto()

def get_dataset_download_url(self) -> str:
""" Returns the url of the selected dataset.
Example usage: OmrDataset.Fornes.get_dataset_download_url() """
Expand Down Expand Up @@ -132,6 +135,8 @@ def dataset_download_urls(self) -> Dict[str, str]:

"Rebelo1": "https://github.com/apacha/OMR-Datasets/releases/download/datasets/Rebelo-Music-Symbol-Dataset1.zip",
"Rebelo2": "https://github.com/apacha/OMR-Datasets/releases/download/datasets/Rebelo-Music-Symbol-Dataset2.zip",

"DeepScores_V1_Extended": "https://repository.cloudlab.zhaw.ch/artifactory/deepscores/ds_extended.zip"
}

def dataset_file_names(self) -> Dict[str, str]:
Expand All @@ -156,4 +161,5 @@ def dataset_file_names(self) -> Dict[str, str]:
"Printed": "PrintedMusicSymbolsDataset.zip",
"Rebelo1": "Rebelo-Music-Symbol-Dataset1.zip",
"Rebelo2": "Rebelo-Music-Symbol-Dataset2.zip",
"DeepScores_V1_Extended": "ds_extended.zip"
}
2 changes: 1 addition & 1 deletion omrdatasettools/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.2.2'
__version__ = '1.3.0'
9 changes: 9 additions & 0 deletions omrdatasettools/tests/test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,15 @@ def test_download_of_rebelo2_dataset(self):
self.download_dataset_and_verify_correct_extraction(destination_directory, number_of_samples_in_the_dataset,
target_file_extension, dataset)

def test_download_of_deepscores_dataset(self):
destination_directory = "DeepScoresV1Extended"
dataset = OmrDataset.DeepScores_V1_Extended
number_of_samples_in_the_dataset = 3408
target_file_extension = "*.png"

self.download_dataset_and_verify_correct_extraction(destination_directory, number_of_samples_in_the_dataset,
target_file_extension, dataset)

def download_dataset_and_verify_correct_extraction(self: unittest.TestCase, destination_directory: str,
number_of_samples_in_the_dataset: int,
target_file_extension: str, dataset: OmrDataset):
Expand Down

0 comments on commit 6d7a5e3

Please sign in to comment.