Skip to content

Commit

Permalink
Do not ingest Jamendo records with downloads disabled
Browse files Browse the repository at this point in the history
  • Loading branch information
stacimc committed Jan 2, 2024
1 parent ddec27d commit 7b8e9eb
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 2 deletions.
18 changes: 17 additions & 1 deletion catalog/dags/providers/provider_api_scripts/jamendo.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,19 @@ class JamendoDataIngester(ProviderDataIngester):
batch_limit = 200
headers = {"Accept": "application/json"}

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Keep track of the number of records which are not ingested due
# to downloads being disabled
self.download_disabled_count = 0

def ingest_records(self, **kwargs):
super().ingest_records(**kwargs)
logger.info(
f"Discarded {self.download_disabled_count} records with"
" `audiodownload_allowed` = False."
)

def get_media_type(self, record):
return constants.AUDIO

Expand Down Expand Up @@ -155,7 +168,6 @@ def _get_metadata(data):
"downloads": stats.get("rate_download_total", 0),
"listens": stats.get("rate_listened_total", 0),
"playlists": stats.get("rate_playlisted_total", 0),
"audiodownload_allowed": data.get("audiodownload_allowed", True),
}
return {k: v for k, v in metadata.items() if v is not None}

Expand Down Expand Up @@ -191,6 +203,10 @@ def get_record_data(self, data):
if not (license_info := get_license_info(data.get("license_ccurl"))):
return None

if data.get("audiodownload_allowed") is False:
self.download_disabled_count += 1
return None

if duration := data.get("duration"):
duration = int(duration) * 1000
title = data.get("name")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ def test_get_record_data():
"listens": 5616,
"playlists": 0,
"release_date": "2005-04-12",
"audiodownload_allowed": True,
},
"raw_tags": ["instrumental", "speed_medium"],
"audio_set_foreign_identifier": "119",
Expand Down Expand Up @@ -208,3 +207,28 @@ def test_add_trailing_slash(url, expected):
def test_remove_track_id_handles_data(thumbnail_url, expected):
actual = jamendo._remove_trackid(thumbnail_url)
assert actual == expected


@pytest.mark.parametrize(
"audiodownload_allowed, should_ingest",
[
# Happy path, download is allowed
(True, True),
# Only prevent ingestion if audiodownload_allowed is explicitly False.
(None, True),
# Download disabled; prevent ingestion
(False, False),
],
)
def test_get_record_data_discards_records_with_downloads_disabled(
audiodownload_allowed, should_ingest
):
item_data = _get_resource_json("audio_data_example.json")
item_data["audiodownload_allowed"] = audiodownload_allowed

record_data = jamendo.get_record_data(item_data)

if should_ingest:
assert record_data is not None
else:
assert record_data is None

0 comments on commit 7b8e9eb

Please sign in to comment.