Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Toggleable keyword evaluation behavior #1130

Merged
merged 2 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,13 @@ contains
:description:
Returns True if ``contains`` is in ``string``. False otherwise.

contains_all
~~~~~~~~~~~~
:spec: ``contains_all(string: String, contains_array: Array) -> Boolean``

:description:
Returns true if all elements in ``contains_array`` are in ``string``. False otherwise.

contains_any
~~~~~~~~~~~~
:spec: ``contains_any(string: String, contains_array: Array) -> Boolean``
Expand Down
17 changes: 13 additions & 4 deletions docs/source/prebuilt_presets/helpers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ Filter Keywords

``Filter Keywords`` can include or exclude media with any of the listed keywords. Both keywords and title/description are lower-cased before filtering.

Default behavior for Keyword evaluation is ANY, meaning the filter will succeed if any of the keywords are present. This can be set to ANY or ALL using the respective ``_eval`` variable.

Supports the following override variables:

* ``title_include_keywords``
* ``title_exclude_keywords``
* ``description_include_keywords``
* ``description_exclude_keywords``
* ``title_include_keywords``, ``title_include_eval``
* ``title_exclude_keywords``, ``title_exclude_eval``
* ``description_include_keywords``, ``title_exclude_eval``
* ``description_exclude_keywords``, ``title_exclude_eval``

.. tip::

Expand All @@ -61,6 +63,13 @@ Supports the following override variables:
title_include_keywords:
- "To Catch a Smuggler"

= Sports:
"~Maple Leafs Highlights":
url: "https://www.youtube.com/@NHL"
title_include_eval: "ALL"
title_include_keywords:
- "maple leafs"
- "highlights"

Chunk Downloads
---------------
Expand Down
34 changes: 27 additions & 7 deletions src/ytdl_sub/prebuilt_presets/helpers/filtering.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ presets:
description_include_keywords: "{ [] }"
description_exclude_keywords: "{ [] }"

title_include_eval: "ANY"
title_exclude_eval: "ANY"
description_include_eval: "ANY"
description_exclude_eval: "ANY"

"%ensure_string": >-
{
%assert_then(
Expand All @@ -32,21 +37,36 @@ presets:
)
}

# $0 - var to evaluate
# $1 - keyword list
# $2 - eval type
"%contains_keywords_inner": >-
{
%elif(
%eq(%ensure_string($2), 'any'),
%contains_any( $0, $1 ),
%eq(%ensure_string($2), 'all'),
%contains_all( $0, $1 ),
%throw('Keyword eval must be either ANY or ALL')
)
}

# $0 - var to evaluate
# $1 - keyword list
# $2 - variable name for error messages
# $3 - default return if keyword list is empty
# $3 - keyword eval
# $4 - default return if keyword list is empty
"%contains_keywords": >-
{
%if(
%bool( $1 ),
%contains_any( %lower($0), %ensure_lower_array($1, $2) ),
$3
%contains_keywords_inner( %lower($0), %ensure_lower_array($1, $2), $3 ),
$4
)
}

filter_exclude:
- "{ %not( %contains_keywords(title, title_include_keywords, 'title_include_keywords', true) ) }"
- "{ %not( %contains_keywords(description, description_include_keywords, 'description_include_keywords', true) ) }"
- "{ %contains_keywords(title, title_exclude_keywords, 'title_exclude_keywords', false) }"
- "{ %contains_keywords(description, description_exclude_keywords, 'description_exclude_keywords',false) }"
- "{ %not( %contains_keywords(title, title_include_keywords, 'title_include_keywords', title_include_eval, true) ) }"
- "{ %not( %contains_keywords(description, description_include_keywords, 'description_include_keywords', description_include_eval, true) ) }"
- "{ %contains_keywords(title, title_exclude_keywords, 'title_exclude_keywords', title_exclude_eval, false) }"
- "{ %contains_keywords(description, description_exclude_keywords, 'description_exclude_keywords', description_exclude_eval, false) }"
14 changes: 14 additions & 0 deletions src/ytdl_sub/script/functions/string_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,20 @@ def contains_any(string: String, contains_array: Array) -> Boolean:
)
)

@staticmethod
def contains_all(string: String, contains_array: Array) -> Boolean:
"""
:description:
Returns true if all elements in ``contains_array`` are in ``string``. False otherwise.
"""
return Boolean(
all(
str(val) in string.value
for val in contains_array.value
if isinstance(val, (String, Integer, Boolean, Float))
)
)

@staticmethod
def slice(string: String, start: Integer, end: Optional[Integer] = None) -> String:
"""
Expand Down
100 changes: 97 additions & 3 deletions tests/integration/prebuilt_presets/test_filter_keywords.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import re

import pytest
from expected_transaction_log import assert_transaction_log_matches

from ytdl_sub.script.utils.exceptions import UserThrownRuntimeError
from ytdl_sub.subscriptions.subscription import Subscription
from ytdl_sub.utils.exceptions import ValidationException


@pytest.fixture
Expand Down Expand Up @@ -77,6 +74,39 @@ def test_title(
transaction_log_summary_file_name=f"integration/prebuilt_presets/title_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize("filter_mode", ["include", "exclude"])
def test_title_all(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
filter_mode: str,
):
filter_subscription_dict["overrides"][f"title_{filter_mode}_eval"] = "all"
filter_subscription_dict["overrides"][f"title_{filter_mode}_keywords"] = [
"MOCK",
"ENTRY",
"20-3",
]
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with mock_download_collection_entries(
is_youtube_channel=False, num_urls=1, is_dry_run=True
):
transaction_log = subscription.download(dry_run=True)

assert_transaction_log_matches(
output_directory=output_directory,
transaction_log=transaction_log,
transaction_log_summary_file_name=f"integration/prebuilt_presets/title_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize("filter_mode", ["include", "exclude"])
def test_description(
self,
Expand Down Expand Up @@ -108,6 +138,38 @@ def test_description(
transaction_log_summary_file_name=f"integration/prebuilt_presets/description_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize("filter_mode", ["include", "exclude"])
def test_description_all(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
filter_mode: str,
):
filter_subscription_dict["overrides"][f"description_{filter_mode}_eval"] = "ALL"
filter_subscription_dict["overrides"][f"description_{filter_mode}_keywords"] = [
"descr",
"iption",
]
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with mock_download_collection_entries(
is_youtube_channel=False, num_urls=1, is_dry_run=True
):
transaction_log = subscription.download(dry_run=True)

assert_transaction_log_matches(
output_directory=output_directory,
transaction_log=transaction_log,
transaction_log_summary_file_name=f"integration/prebuilt_presets/description_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize(
"keyword_variable",
[
Expand Down Expand Up @@ -169,3 +231,35 @@ def test_error_not_string_keyword(
pytest.raises(UserThrownRuntimeError, match="filter keywords must be strings"),
):
_ = subscription.download(dry_run=True)

@pytest.mark.parametrize(
"keyword_variable",
[
"title_include",
"title_exclude",
"description_include",
"description_exclude",
],
)
def test_error_not_correct_eval(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
keyword_variable,
):
filter_subscription_dict["overrides"][f"{keyword_variable}_keywords"] = ["hmm"]
filter_subscription_dict["overrides"][f"{keyword_variable}_eval"] = "LOL"
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with (
mock_download_collection_entries(is_youtube_channel=False, num_urls=1, is_dry_run=True),
pytest.raises(UserThrownRuntimeError, match="Keyword eval must be either ANY or ALL"),
):
_ = subscription.download(dry_run=True)
8 changes: 8 additions & 0 deletions tests/unit/script/functions/test_string_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,14 @@ def test_contains_any(self, value, expected_output):
output = single_variable_output(f"{{%contains_any('a brown dog', {value})}}")
assert output == expected_output

@pytest.mark.parametrize(
"value, expected_output",
[("['a', 'b', 'c']", False), ("['nope', [], {}]", False), ("['a', 'dog']", True)],
)
def test_contains_all(self, value, expected_output):
output = single_variable_output(f"{{%contains_all('a brown dog', {value})}}")
assert output == expected_output

@pytest.mark.parametrize(
"input_string, split, max_split, expected_output",
[
Expand Down
Loading