diff --git a/TEx/core/mapper/telethon_message_mapper.py b/TEx/core/mapper/telethon_message_mapper.py index 98bf1f2..ba79200 100644 --- a/TEx/core/mapper/telethon_message_mapper.py +++ b/TEx/core/mapper/telethon_message_mapper.py @@ -56,6 +56,7 @@ async def to_finder_notification_facade_entity(message: Message, downloaded_medi message_id=message.id, is_reply=message.is_reply, downloaded_media_info=downloaded_media_info, + found_on='UNDEFINED', ) return h_result diff --git a/TEx/finder/base_finder.py b/TEx/finder/base_finder.py index b5983c9..fa03e08 100644 --- a/TEx/finder/base_finder.py +++ b/TEx/finder/base_finder.py @@ -1,4 +1,6 @@ """Base Class for All Finders.""" +from __future__ import annotations + import abc diff --git a/TEx/finder/finder_engine.py b/TEx/finder/finder_engine.py index bad2fef..afee7b9 100644 --- a/TEx/finder/finder_engine.py +++ b/TEx/finder/finder_engine.py @@ -1,10 +1,14 @@ """Finder Engine.""" from __future__ import annotations -from configparser import ConfigParser +from configparser import ConfigParser, SectionProxy from typing import Dict, List, Optional +import aiofiles +import aiofiles.os + from TEx.finder.all_messages_finder import AllMessagesFinder +from TEx.finder.base_finder import BaseFinder from TEx.finder.regex_finder import RegexFinder from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity from TEx.notifier.notifier_engine import NotifierEngine @@ -18,12 +22,8 @@ def __init__(self) -> None: self.is_finder_enabled: bool = False self.rules: List[Dict] = [] self.notification_engine: NotifierEngine - - def __is_finder_enabled(self, config: ConfigParser) -> bool: - """Check if Finder Module is Enabled.""" - return ( - config.has_option('FINDER', 'enabled') and config['FINDER']['enabled'] == 'true' - ) + self.find_in_text_enabled: bool = False + self.find_in_text_files_max_size_bytes: int = 0 def __load_rules(self, config: ConfigParser) -> None: """Load Finder Rules.""" @@ -35,18 +35,34 @@ def __load_rules(self, config: ConfigParser) -> None: 'id': sec, 'instance': RegexFinder(config=config[sec]), 'notifier': config[sec]['notifier'], + 'type': config[sec]['type'], }) elif config[sec]['type'] == 'all': self.rules.append({ 'id': sec, 'instance': AllMessagesFinder(config=config[sec]), 'notifier': config[sec]['notifier'], + 'type': config[sec]['type'], }) def configure(self, config: ConfigParser, notification_engine: NotifierEngine) -> None: """Configure Finder.""" - self.is_finder_enabled = self.__is_finder_enabled(config=config) - self.__load_rules(config=config) + finder_config_proxy: Optional[SectionProxy] = config['FINDER'] if config.has_section('FINDER') else None + + if finder_config_proxy: + + # Get Basic Props + self.is_finder_enabled = finder_config_proxy.get('enabled', fallback='false') == 'true' + self.find_in_text_enabled = finder_config_proxy.get('find_in_text_files_enabled', fallback='false') == 'true' + self.find_in_text_files_max_size_bytes = int(finder_config_proxy.get('find_in_text_files_max_size_bytes', fallback='10000000')) + + # Load all Rules + self.__load_rules(config=config) + + else: + self.find_in_text_enabled = False + + # Set Notification Engine self.notification_engine = notification_engine async def run(self, entity: Optional[FinderNotificationMessageEntity], source: str) -> None: @@ -59,10 +75,29 @@ async def run(self, entity: Optional[FinderNotificationMessageEntity], source: s if not self.is_finder_enabled or not entity: return + cached_file_content: str = '' + for rule in self.rules: - is_found: bool = await rule['instance'].find(raw_text=entity.raw_text) - if is_found: + # Resolve Finder + finder: BaseFinder = rule['instance'] + + # Find in Raw Text Content + is_found_on_content: bool = await finder.find(raw_text=entity.raw_text) + is_found_on_text_downloaded_file: bool = False + + # Find into Downloaded File (If Applicable) + if not is_found_on_content and self.find_in_text_enabled and rule['type'] != 'all': + is_found_on_text_downloaded_file = await self.__find_in_text_files( + entity=entity, + finder=finder, + file_content=cached_file_content, + ) + + if is_found_on_content or is_found_on_text_downloaded_file: + + # Update found_on Flag + entity.found_on = 'MESSAGE' if is_found_on_content else f'FILE\n{entity.downloaded_media_info.disk_file_path}' # type: ignore # Runt the Notification Engine await self.notification_engine.run( @@ -71,3 +106,28 @@ async def run(self, entity: Optional[FinderNotificationMessageEntity], source: s rule_id=rule['id'], source=source, ) + + async def __find_in_text_files(self, entity: FinderNotificationMessageEntity, finder: BaseFinder, file_content: str) -> bool: + """Try to Run the Finder Engine into the Downloaded Text File.""" + if not entity.downloaded_media_info or not entity.downloaded_media_info.allow_search_in_text_file(): + return False + + # Check if File Exists + file_exists: bool = await aiofiles.os.path.exists(entity.downloaded_media_info.disk_file_path) + if not file_exists: + return False + + # Check Max Size + max_size_exceeded: bool = entity.downloaded_media_info.size_bytes > self.find_in_text_files_max_size_bytes + if max_size_exceeded: + return False + + # Open and Read the File + if file_content == '': + async with aiofiles.open(entity.downloaded_media_info.disk_file_path, 'rb') as f: + file_bytes = await f.read() + file_content = file_bytes.decode('UTF-8') + await f.close() + + # Run Finder + return await finder.find(raw_text=file_content) diff --git a/TEx/models/facade/finder_notification_facade_entity.py b/TEx/models/facade/finder_notification_facade_entity.py index 8fb6dd5..81dedfb 100644 --- a/TEx/models/facade/finder_notification_facade_entity.py +++ b/TEx/models/facade/finder_notification_facade_entity.py @@ -24,3 +24,4 @@ class FinderNotificationMessageEntity(BaseModel): message_id: Optional[int] is_reply: Optional[bool] downloaded_media_info: Optional[MediaHandlingEntity] + found_on: str diff --git a/TEx/models/facade/media_handler_facade_entity.py b/TEx/models/facade/media_handler_facade_entity.py index 9b4ac41..a363c3b 100644 --- a/TEx/models/facade/media_handler_facade_entity.py +++ b/TEx/models/facade/media_handler_facade_entity.py @@ -21,3 +21,17 @@ def is_video(self) -> bool: """Return if Downloaded Image are a Video.""" return self.content_type in ['application/ogg', 'video/mp4', 'video/quicktime', 'video/webm'] + def allow_search_in_text_file(self) -> bool: + """Return if Allow to Find in the Text File.""" + return self.content_type in [ + 'application/atom+xml', + 'application/bittorrent', + 'application/csv', + 'application/html', + 'application/json', + 'application/ld+json', + 'text/csv', + 'text/html', + 'text/plain', + 'text/xml', + ] diff --git a/TEx/notifier/discord_notifier.py b/TEx/notifier/discord_notifier.py index 610cc39..efa83c3 100644 --- a/TEx/notifier/discord_notifier.py +++ b/TEx/notifier/discord_notifier.py @@ -126,6 +126,7 @@ async def __get_finder_notification_embed(self, entity: FinderNotificationMessag embed.add_embed_field(name='Group Name', value=entity.group_name if entity.group_name else '', inline=True) embed.add_embed_field(name='Group ID', value=str(entity.group_id), inline=True) + embed.add_embed_field(name='Found On', value=entity.found_on, inline=False) embed.add_embed_field(name='Message Date', value=str(entity.date_time), inline=False) embed.add_embed_field(name='Tag', value=duplication_tag, inline=False) diff --git a/TEx/notifier/elastic_search_notifier.py b/TEx/notifier/elastic_search_notifier.py index f0a15bf..b40faaf 100644 --- a/TEx/notifier/elastic_search_notifier.py +++ b/TEx/notifier/elastic_search_notifier.py @@ -78,6 +78,7 @@ async def __get_dict_for_finder_notification(self, entity: FinderNotificationMes 'reply_to_msg_id': entity.reply_to_msg_id, 'message_id': entity.message_id, 'is_reply': entity.is_reply, + 'found_on': entity.found_on, } if entity.downloaded_media_info: diff --git a/docs/finder/configuration.md b/docs/finder/configuration.md new file mode 100644 index 0000000..eb55d0f --- /dev/null +++ b/docs/finder/configuration.md @@ -0,0 +1,39 @@ +# Message Finder System + +**Compatibility:** Message Listener Command + +Telegram Explorer allows to specify many message finders. Usually, the finder engine looks at messages, but, they also can look at downloaded text files (plain, csv, xml, json, etc.). + +**Configuration Spec:** + +In order to use the finder engine, you must set a configuration to enable-it and configure if you want to allow the engine to find on files. + +**Parameters:** + + * **enabled** > Required - Enable(true)/Disable(false) the finder engine. + * **find_in_text_files_enabled** > Optional - Enable(true)/Disable(false) the behavior that run the finder engine inside the downloaded files. + * Default: false + * **find_in_text_files_max_size_bytes** > Optional - Set the max size in bytes of file that allow the engine to load the file in memory and perform the searches. + * Default: 10000000 + + +**Changes on Configuration File** +```ini +[FINDER] +enabled=true +find_in_text_files_enabled=true +find_in_text_files_max_size_bytes=20000000 +``` + +**Files Supported for the Engine:** + + * application/atom+xml + * application/bittorrent + * application/csv + * application/html + * application/json + * application/ld+json + * text/csv + * text/html + * text/plain + * text/xml \ No newline at end of file diff --git a/docs/notification/notification_elasticsearch_index_template.md b/docs/notification/notification_elasticsearch_index_template.md index 702b8fe..7518b7c 100644 --- a/docs/notification/notification_elasticsearch_index_template.md +++ b/docs/notification/notification_elasticsearch_index_template.md @@ -4,104 +4,93 @@ If you want, create a new Index Template before create all Telegram Explorer ind This will help you to get the best of all data provided and allow's to extract many more value and informations from the data. -**Index Template JSON** +**Index Mapping JSON** ```json { - "settings": { - "index": { - "routing": { - "allocation": { - "include": { - "_tier_preference": "data_content" - } - } - } - } - }, - "mappings": { - "dynamic": "true", - "dynamic_date_formats": [ - "strict_date_optional_time", - "yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z" - ], - "dynamic_templates": [], - "date_detection": true, - "numeric_detection": false, - "properties": { - "from_id": { - "type": "long" - }, - "group_id": { - "type": "long" - }, - "group_name": { - "type": "text", - "fielddata": true, - "fielddata_frequency_filter": { - "min": 0.01, - "max": 1, - "min_segment_size": 50 - } - }, - "has_media": { - "type": "boolean" - }, - "is_reply": { - "type": "boolean" - }, - "media_mime_type": { - "type": "text", - "fielddata": true, - "fielddata_frequency_filter": { - "min": 0.01, - "max": 1, - "min_segment_size": 50 - } - }, - "media_size": { - "type": "long" - }, - "message_id": { - "type": "text" - }, - "raw": { - "type": "text", - "fielddata": true, - "fielddata_frequency_filter": { - "min": 0.01, - "max": 1, - "min_segment_size": 50 - } + "numeric_detection": false, + "dynamic_date_formats": [ + "strict_date_optional_time", + "yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z" + ], + "dynamic": "true", + "dynamic_templates": [], + "date_detection": true, + "properties": { + "from_id": { + "type": "long" + }, + "media_size": { + "type": "long" + }, + "group_name": { + "fielddata_frequency_filter": { + "min": 0.01, + "max": 1, + "min_segment_size": 50 }, - "reply_to_msg_id": { - "type": "long" + "fielddata": true, + "type": "text" + }, + "reply_to_msg_id": { + "type": "long" + }, + "has_media": { + "type": "boolean" + }, + "raw": { + "fielddata_frequency_filter": { + "min": 0.01, + "max": 1, + "min_segment_size": 50 }, - "rule": { - "type": "text", - "fielddata": true, - "fielddata_frequency_filter": { - "min": 0.01, - "max": 1, - "min_segment_size": 50 - } + "fielddata": true, + "type": "text" + }, + "rule": { + "fielddata_frequency_filter": { + "min": 0.01, + "max": 1, + "min_segment_size": 50 }, - "source": { - "type": "text", - "fielddata": true, - "fielddata_frequency_filter": { - "min": 0.01, - "max": 1, - "min_segment_size": 50 - } + "fielddata": true, + "type": "text" + }, + "to_id": { + "type": "long" + }, + "message_id": { + "type": "text" + }, + "source": { + "fielddata_frequency_filter": { + "min": 0.01, + "max": 1, + "min_segment_size": 50 }, - "time": { - "type": "date" + "fielddata": true, + "type": "text" + }, + "is_reply": { + "type": "boolean" + }, + "found_on": { + "type": "text" + }, + "group_id": { + "type": "long" + }, + "media_mime_type": { + "fielddata_frequency_filter": { + "min": 0.01, + "max": 1, + "min_segment_size": 50 }, - "to_id": { - "type": "long" - } + "fielddata": true, + "type": "text" + }, + "time": { + "type": "date" } - }, - "aliases": {} + } } ``` diff --git a/docs/notification/notification_elasticsearch_signals_template.md b/docs/notification/notification_elasticsearch_signals_template.md index a364b3d..63ed073 100644 --- a/docs/notification/notification_elasticsearch_signals_template.md +++ b/docs/notification/notification_elasticsearch_signals_template.md @@ -4,62 +4,60 @@ In order to use the Signal Notification with Elastic Search, you should create a This will help you to get the best of all signals provided. -**Index Template JSON** +**Index Mapping JSON** ```json { - "settings": { - "index": { - "routing": { - "allocation": { - "include": { - "_tier_preference": "data_content" - } - } - } - } - }, - "mappings": { - "dynamic": "true", - "dynamic_date_formats": [ - "strict_date_optional_time", - "yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z" - ], - "dynamic_templates": [], - "date_detection": true, - "numeric_detection": false, - "properties": { - "content": { - "type": "text", - "fielddata": true, - "fielddata_frequency_filter": { - "min": 0.01, - "max": 1, - "min_segment_size": 50 - } + "numeric_detection": false, + "dynamic_date_formats": [ + "strict_date_optional_time", + "yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z" + ], + "dynamic": "true", + "dynamic_templates": [], + "date_detection": true, + "properties": { + "source": { + "fielddata_frequency_filter": { + "min": 0.01, + "max": 1, + "min_segment_size": 50 }, - "signal": { - "type": "text", - "fielddata": true, - "fielddata_frequency_filter": { - "min": 0.01, - "max": 1, - "min_segment_size": 50 - } + "fielddata": true, + "type": "text" + }, + "time": { + "type": "date" + }, + "signal": { + "eager_global_ordinals": false, + "index_phrases": false, + "fielddata_frequency_filter": { + "min": 0.01, + "max": 1, + "min_segment_size": 50 }, - "source": { - "type": "text", - "fielddata": true, - "fielddata_frequency_filter": { - "min": 0.01, - "max": 1, - "min_segment_size": 50 - } + "fielddata": true, + "norms": true, + "index": true, + "store": false, + "type": "text", + "index_options": "positions" + }, + "content": { + "eager_global_ordinals": false, + "index_phrases": false, + "fielddata_frequency_filter": { + "min": 0.01, + "max": 1, + "min_segment_size": 50 }, - "time": { - "type": "date" - } + "fielddata": true, + "norms": true, + "index": true, + "store": false, + "type": "text", + "index_options": "positions" } - }, - "aliases": {} + } } ``` diff --git a/mkdocs.yml b/mkdocs.yml index bdd242e..6df1c1f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -44,6 +44,7 @@ nav: - 'Signals Template': 'notification/notification_elasticsearch_signals_template.md' - 'Signals': 'notification/signals.md' - 'Message Finder System': + - 'Configuration': 'finder/configuration.md' - 'Catch All': 'finder/finder_catchall.md' - 'RegEx Finder': 'finder/finder_regex.md' - 'Reports': diff --git a/tests/finder/test_finder_engine.py b/tests/finder/test_finder_engine.py index 13f79fc..44ce415 100644 --- a/tests/finder/test_finder_engine.py +++ b/tests/finder/test_finder_engine.py @@ -9,6 +9,7 @@ from TEx.core.mapper.telethon_message_mapper import TelethonMessageEntityMapper from TEx.finder.finder_engine import FinderEngine from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity +from TEx.models.facade.media_handler_facade_entity import MediaHandlingEntity from tests.modules.common import TestsCommon from tests.modules.mockups_groups_mockup_data import channel_1_mocked @@ -36,6 +37,7 @@ def test_run_with_regex_finder(self): message_id=969696, is_reply=False, downloaded_media_info=None, + found_on='UTFOUND' ) args: Dict = { @@ -63,11 +65,15 @@ def test_run_with_regex_finder(self): ) ) + # Get Expected Entity for the Notification Engine + expected_entity: FinderNotificationMessageEntity = message_entity.model_copy(deep=True) + expected_entity.found_on = 'MESSAGE' + # Check if Webhook was Executed target.notification_engine.run.assert_has_awaits([ - call(notifiers=['NOTIFIER.DISCORD.NOT_002'], entity=message_entity, rule_id='FINDER.RULE.UT_Finder_Demo', source='+15558987453'), - call(notifiers=['NOTIFIER.DISCORD.NOT_002'], entity=message_entity, rule_id='FINDER.RULE.UT_Finder_Demo_MultiLine', source='+15558987453'), - call(notifiers=['NOTIFIER.DISCORD.NOT_002'], entity=message_entity, rule_id='FINDER.RULE.UT_Finder_Demo_MultiLine_WithLineBreak', source='+15558987453'), + call(notifiers=['NOTIFIER.DISCORD.NOT_002'], entity=expected_entity, rule_id='FINDER.RULE.UT_Finder_Demo', source='+15558987453'), + call(notifiers=['NOTIFIER.DISCORD.NOT_002'], entity=expected_entity, rule_id='FINDER.RULE.UT_Finder_Demo_MultiLine', source='+15558987453'), + call(notifiers=['NOTIFIER.DISCORD.NOT_002'], entity=expected_entity, rule_id='FINDER.RULE.UT_Finder_Demo_MultiLine_WithLineBreak', source='+15558987453'), ]) def test_run_not_found(self): @@ -87,6 +93,7 @@ def test_run_not_found(self): message_id=969696, is_reply=False, downloaded_media_info=None, + found_on='UTFOUND' ) args: Dict = { @@ -116,3 +123,66 @@ def test_run_not_found(self): # Check if Webhook was Executed target.notification_engine.run.assert_not_called() + + def test_run_with_regex_finder_on_file(self): + """Test Run With Regex Finder on Downloaded File.""" + + # Setup Mock + notifier_engine_mock = mock.AsyncMock() + + message_entity: FinderNotificationMessageEntity = FinderNotificationMessageEntity( + date_time=datetime.datetime.utcnow(), + raw_text="Mocked Raw Text", + group_name="Group 002", + group_id=123456, + from_id="1234", + to_id=9876, + reply_to_msg_id=5544, + message_id=969696, + is_reply=False, + downloaded_media_info=MediaHandlingEntity( + media_id=123, + file_name='LargeDownloadedFile.txt', + content_type='text/plain', + size_bytes=12279, + disk_file_path='resources/LargeDownloadedFile.txt', + is_ocr_supported=False, + ), + found_on='UTFOUND' + ) + + args: Dict = { + 'config': 'unittest_configfile.config' + } + data: Dict = {} + TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data) + + target: FinderEngine = FinderEngine() + + # Execute Discord Notifier Configure Method + target.configure( + config=self.config, + notification_engine=notifier_engine_mock + ) + target.notification_engine = notifier_engine_mock + + loop = asyncio.get_event_loop() + loop.run_until_complete( + + # Invoke Test Target + target.run( + entity=message_entity, + source='+15558987453' + ) + ) + + # Get Expected Entity for the Notification Engine + expected_entity: FinderNotificationMessageEntity = message_entity.model_copy(deep=True) + expected_entity.found_on = 'FILE\nresources/LargeDownloadedFile.txt' + + # Check if Webhook was Executed + target.notification_engine.run.assert_has_awaits([ + call(notifiers=['NOTIFIER.DISCORD.NOT_002'], entity=expected_entity, rule_id='FINDER.RULE.UT_Finder_Demo', source='+15558987453'), + call(notifiers=['NOTIFIER.DISCORD.NOT_002'], entity=expected_entity, rule_id='FINDER.RULE.UT_Finder_Demo_MultiLine', source='+15558987453'), + call(notifiers=['NOTIFIER.DISCORD.NOT_002'], entity=expected_entity, rule_id='FINDER.RULE.UT_Finder_Demo_MultiLine_WithLineBreak', source='+15558987453'), + ]) \ No newline at end of file diff --git a/tests/notifier/test_discord_notifier.py b/tests/notifier/test_discord_notifier.py index 57b852c..00543fd 100644 --- a/tests/notifier/test_discord_notifier.py +++ b/tests/notifier/test_discord_notifier.py @@ -37,6 +37,7 @@ def test_run_no_duplication(self): message_id=5975883, is_reply=False, downloaded_media_info=None, + found_on='UT FOUND 2' ) target: DiscordNotifier = DiscordNotifier() @@ -71,13 +72,14 @@ def test_run_no_duplication(self): self.assertEqual(call_arg.title, '**Channel 1972142108** (1972142108)') self.assertEqual(call_arg.description, 'Mocked Raw Text') - self.assertEqual(len(call_arg.fields), 7) + self.assertEqual(len(call_arg.fields), 8) self.assertEqual(call_arg.fields[0], {'inline': True, 'name': 'Source', 'value': '+15558987453'}) self.assertEqual(call_arg.fields[1], {'inline': True, 'name': 'Rule', 'value': 'RULE_UT_01'}) self.assertEqual(call_arg.fields[2], {'inline': False, 'name': 'Message ID', 'value': '5975883'}) self.assertEqual(call_arg.fields[3], {'inline': True, 'name': 'Group Name', 'value': 'Channel 1972142108'}) self.assertEqual(call_arg.fields[4], {'inline': True, 'name': 'Group ID', 'value': '1972142108'}) - self.assertEqual(call_arg.fields[6], {'inline': False, 'name': 'Tag', 'value': 'de33f5dda9c686c64d23b8aec2eebfc7'}) + self.assertEqual(call_arg.fields[5], {'inline': False, 'name': 'Found On', 'value': 'UT FOUND 2'}) + self.assertEqual(call_arg.fields[7], {'inline': False, 'name': 'Tag', 'value': 'de33f5dda9c686c64d23b8aec2eebfc7'}) # Check if Webhook was Executed discord_webhook_mock.execute.assert_awaited_once() @@ -100,6 +102,7 @@ def test_run_duplication_control(self): message_id=5975883, is_reply=False, downloaded_media_info=None, + found_on='UT FOUND' ) target: DiscordNotifier = DiscordNotifier() @@ -169,6 +172,7 @@ def test_run_with_downloaded_media_image(self): disk_file_path='resources/122761750_387013276008970_8208112669996447119_n.jpg', is_ocr_supported=True ), + found_on='UT FOUND 3' ) target: DiscordNotifier = DiscordNotifier() @@ -205,13 +209,14 @@ def test_run_with_downloaded_media_image(self): self.assertEqual(embed_call_arg.title, '**Channel 1972142108** (1972142108)') self.assertEqual(embed_call_arg.description, 'Mocked Raw Text') - self.assertEqual(len(embed_call_arg.fields), 7) + self.assertEqual(len(embed_call_arg.fields), 8) self.assertEqual(embed_call_arg.fields[0], {'inline': True, 'name': 'Source', 'value': '+15558987453'}) self.assertEqual(embed_call_arg.fields[1], {'inline': True, 'name': 'Rule', 'value': 'RULE_UT_01'}) self.assertEqual(embed_call_arg.fields[2], {'inline': False, 'name': 'Message ID', 'value': '5975883'}) self.assertEqual(embed_call_arg.fields[3], {'inline': True, 'name': 'Group Name', 'value': 'Channel 1972142108'}) self.assertEqual(embed_call_arg.fields[4], {'inline': True, 'name': 'Group ID', 'value': '1972142108'}) - self.assertEqual(embed_call_arg.fields[6], {'inline': False, 'name': 'Tag', 'value': 'de33f5dda9c686c64d23b8aec2eebfc7'}) + self.assertEqual(embed_call_arg.fields[5], {'inline': False, 'name': 'Found On', 'value': 'UT FOUND 3'}) + self.assertEqual(embed_call_arg.fields[7], {'inline': False, 'name': 'Tag', 'value': 'de33f5dda9c686c64d23b8aec2eebfc7'}) self.assertEqual(embed_call_arg.image['url'], 'attachment://122761750_387013276008970_8208112669996447119_n.jpg') @@ -247,6 +252,7 @@ def test_run_with_downloaded_media_video(self): disk_file_path='resources/unknow.mp4', is_ocr_supported=False ), + found_on='UT FOUND 4' ) target: DiscordNotifier = DiscordNotifier() @@ -283,13 +289,14 @@ def test_run_with_downloaded_media_video(self): self.assertEqual(embed_call_arg.title, '**Channel 1972142108** (1972142108)') self.assertEqual(embed_call_arg.description, 'Mocked Raw Text') - self.assertEqual(len(embed_call_arg.fields), 7) + self.assertEqual(len(embed_call_arg.fields), 8) self.assertEqual(embed_call_arg.fields[0], {'inline': True, 'name': 'Source', 'value': '+15558987453'}) self.assertEqual(embed_call_arg.fields[1], {'inline': True, 'name': 'Rule', 'value': 'RULE_UT_01'}) self.assertEqual(embed_call_arg.fields[2], {'inline': False, 'name': 'Message ID', 'value': '5975883'}) self.assertEqual(embed_call_arg.fields[3], {'inline': True, 'name': 'Group Name', 'value': 'Channel 1972142108'}) self.assertEqual(embed_call_arg.fields[4], {'inline': True, 'name': 'Group ID', 'value': '1972142108'}) - self.assertEqual(embed_call_arg.fields[6], {'inline': False, 'name': 'Tag', 'value': 'de33f5dda9c686c64d23b8aec2eebfc7'}) + self.assertEqual(embed_call_arg.fields[5], {'inline': False, 'name': 'Found On', 'value': 'UT FOUND 4'}) + self.assertEqual(embed_call_arg.fields[7], {'inline': False, 'name': 'Tag', 'value': 'de33f5dda9c686c64d23b8aec2eebfc7'}) self.assertEqual(embed_call_arg.video['url'], 'attachment://unknow.mp4') diff --git a/tests/notifier/test_elastic_search_notifier.py b/tests/notifier/test_elastic_search_notifier.py index 27aae7d..a43483b 100644 --- a/tests/notifier/test_elastic_search_notifier.py +++ b/tests/notifier/test_elastic_search_notifier.py @@ -114,6 +114,7 @@ def test_run_without_downloaded_file(self): message_id=5975883, is_reply=False, downloaded_media_info=None, + found_on='UT FOUND 6' ) with mock.patch('TEx.notifier.elastic_search_notifier.AsyncElasticsearch', return_value=elastic_search_api_mock): @@ -155,7 +156,8 @@ def test_run_without_downloaded_file(self): 'is_reply': False, 'has_media': False, 'media_mime_type': None, - 'media_size': None + 'media_size': None, + 'found_on': 'UT FOUND 6' } self.assertEqual(submited_document, expected_document) @@ -193,6 +195,7 @@ def test_run_with_downloaded_file(self): disk_file_path='/folder/file.png', is_ocr_supported=True ), + found_on='UT FOUND 5' ) with mock.patch('TEx.notifier.elastic_search_notifier.AsyncElasticsearch', return_value=elastic_search_api_mock): @@ -234,7 +237,8 @@ def test_run_with_downloaded_file(self): 'is_reply': False, 'has_media': True, 'media_mime_type': 'application/pdf', - 'media_size': 5858 + 'media_size': 5858, + 'found_on': 'UT FOUND 5' } self.assertEqual(submited_document, expected_document) diff --git a/tests/notifier/test_notifier_engine.py b/tests/notifier/test_notifier_engine.py index 81549fc..10ca9f7 100644 --- a/tests/notifier/test_notifier_engine.py +++ b/tests/notifier/test_notifier_engine.py @@ -54,6 +54,7 @@ def test_run(self): message_id=55, is_reply=False, downloaded_media_info=None, + found_on='UT FOUND 7' ) with mock.patch('TEx.notifier.notifier_engine.DiscordNotifier', return_value=discord_notifier_mockup): diff --git a/tests/resources/LargeDownloadedFile.txt b/tests/resources/LargeDownloadedFile.txt new file mode 100644 index 0000000..b8a9475 --- /dev/null +++ b/tests/resources/LargeDownloadedFile.txt @@ -0,0 +1,40 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla feugiat sollicitudin quam sed scelerisque. Nunc in libero dui. Maecenas blandit enim sed turpis cursus elementum. Quisque pharetra non felis nec facilisis. Donec id accumsan purus. Aliquam consectetur pellentesque nunc. Integer quis diam vehicula, porttitor augue id, ornare massa. Integer mi lorem, molestie et fringilla id, malesuada non nisl. Nam a felis eget purus egestas scelerisque et nec magna. Maecenas aliquam finibus aliquam. Aliquam tempor mi sed tristique pellentesque. Etiam ac mauris aliquam, varius tortor ac, placerat ligula. + +Vivamus laoreet condimentum mauris egestas sagittis. Praesent scelerisque nulla a tempus posuere. Morbi in metus metus. Aliquam a placerat elit. Nullam consequat mattis faucibus. Cras efficitur ac magna quis viverra. Aliquam tincidunt sem fringilla dui egestas rutrum. Sed placerat laoreet arcu porttitor tincidunt. + +Aliquam dictum orci vitae elit auctor ullamcorper. Nulla sit amet lacus at mi sodales porta. Morbi pharetra mi sit amet arcu convallis venenatis. Phasellus pellentesque purus eget massa tempor, sit amet scelerisque ipsum tristique. Aliquam ultrices tempor elit, ut posuere magna bibendum in. Ut lacinia quis enim vitae vestibulum. Proin ut tristique libero. Aliquam malesuada nibh quis rhoncus iaculis. Nam nec tempor magna. Integer at odio nisl. Fusce lobortis venenatis ante, non lobortis odio porta a. + +Duis in accumsan ligula. Proin ut pulvinar arcu. Curabitur massa odio, ultrices sit amet turpis in, euismod fringilla nunc. Duis tellus tortor, finibus id suscipit eu, finibus pretium quam. Integer rhoncus eu sapien quis tempor. Sed laoreet libero vel turpis viverra, eu vulputate augue lobortis. Praesent lacinia interdum neque et tincidunt. + +Ut eget malesuada ipsum. Phasellus eget odio ornare mauris dignissim auctor. Sed eu porttitor sapien. Cras tellus ipsum, porta eget nisl in, consectetur tristique ligula. In vitae venenatis dolor, sit amet pretium leo. Integer in posuere risus, ut tempus sem. Cras ut ipsum vehicula, sollicitudin lectus et, pellentesque massa. Curabitur elementum sit amet arcu ac ultricies. Nulla ut placerat lacus. Aliquam ullamcorper odio ut metus imperdiet, id gravida dolor bibendum. Aliquam finibus sem id massa ultrices, non tristique ex posuere. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Sed posuere pulvinar ultricies. + +Etiam condimentum velit quis iaculis finibus. Phasellus suscipit ut libero et eleifend. Duis vel neque in diam eleifend facilisis. Sed sit amet ligula ac metus finibus gravida. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Praesent sodales ipsum tortor, vel iaculis nunc euismod et. Fusce aliquet mauris ligula, faucibus egestas neque congue a. Cras lacinia velit eu magna gravida, et consequat elit elementum. Aliquam vitae turpis in est tincidunt fermentum eget a leo. Vestibulum rutrum ex lobortis ex accumsan, vitae ultricies ante bibendum. In et ipsum quis velit ornare laoreet. Fusce sit amet dui at augue aliquam euismod. Maecenas sit amet malesuada elit. Nullam eget magna eu quam blandit ultrices. Vivamus blandit ex in nulla sagittis, sed convallis tortor maximus. + +Suspendisse accumsan lacinia magna, a ultricies mauris. Aliquam sed porta velit. Integer iaculis urna et lacus posuere hendrerit. Aliquam erat volutpat. Praesent ac egestas ipsum, ut egestas turpis. Suspendisse elit arcu, scelerisque a felis id, aliquam posuere orci. Sed aliquet magna eu velit tempus, at placerat eros lobortis. Praesent ex nunc, elementum tristique sollicitudin ut, volutpat ac odio. Sed pellentesque felis a felis vulputate, in faucibus metus imperdiet. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut malesuada viverra dui quis mollis. Ut a lobortis elit. Duis pulvinar sem odio, ut varius arcu posuere porttitor. Nullam lacinia est quis massa maximus, auctor sagittis ex accumsan. Etiam eu lorem et sem sollicitudin auctor at id dolor. + +Ut efficitur leo a metus sagittis, id pretium lacus volutpat. Aenean dapibus magna sit amet risus bibendum, at porttitor mi malesuada. Mauris tellus purus, convallis suscipit sapien sed, rhoncus vehicula ante. Nullam dictum nunc sit amet elementum maximus. Morbi lacinia lectus mauris, sit amet vehicula libero dignissim sit amet. Donec metus ipsum, suscipit ac nibh at, consequat tempus nibh. Nunc aliquet, nulla at ultrices cursus, urna purus suscipit arcu, a cursus nisi eros et felis. + +Suspendisse potenti. Mauris eu leo id tellus accumsan pharetra. Suspendisse ac massa eget velit hendrerit commodo id sed erat. Cras et metus non nisl dictum mattis ac vitae nulla. Quisque fringilla tincidunt iaculis. Sed sit amet elit eget dolor venenatis pulvinar. Integer gravida leo eget lectus tempus, ut hendrerit ante aliquet. Nunc nec neque convallis, dignissim orci at, fringilla sem. Quisque ullamcorper, erat eu auctor dapibus, metus est feugiat odio, accumsan eleifend turpis mi at nibh. Aenean tempus in augue et finibus. Aliquam rhoncus pellentesque vulputate. Ut pulvinar elit ipsum, a fringilla ex ultrices nec. Suspendisse posuere est vitae pulvinar varius. Nunc efficitur eros non risus faucibus, non porttitor dui commodo. + +Nam aliquet, erat id ultricies volutpat, mi ipsum finibus lorem, at sollicitudin nisi mi et magna. Aliquam id tristique ipsum. Mauris eu lorem eget nulla egestas maximus. Sed et enim diam. Morbi nibh nibh, congue sit amet pretium maximus, maximus et lorem. Ut ac sem mauris. Phasellus congue, tortor et consequat commodo, nunc neque sagittis magna, at tincidunt eros tellus dapibus arcu. Sed fermentum tempor metus at tincidunt. Ut eu mi consequat, sodales orci sit amet, placerat felis. In vel aliquam augue. Integer non ullamcorper mi. + +Fusce cursus augue eget magna vehicula bibendum. Integer non velit eu eros mollis imperdiet eu non lectus. Aenean non lacinia magna, sed faucibus lacus. Vivamus tellus mi, volutpat non turpis nec, tempor faucibus nulla. Nam ac facilisis nisl. Nam ullamcorper tellus id tellus maximus, quis lobortis ipsum gravida. Maecenas facilisis quam id posuere scelerisque. Phasellus viverra ut orci vitae ultricies. Curabitur sollicitudin orci at sapien aliquam, sit amet aliquam ante gravida. Maecenas ac gravida nunc, eget placerat urna. Pellentesque mollis arcu mauris, accumsan sodales odio tristique quis. Cras eu quam et nisl aliquam varius in vel enim. Nulla gravida augue eget dapibus laoreet. Proin et lacus urna. In efficitur congue ante. + +Sed suscipit sem at leo aliquam feugiat eu fermentum velit. Vestibulum facilisis pharetra ipsum, eget auctor purus facilisis sagittis. Nulla a ex ut purus efficitur varius sed et arcu. Phasellus ac justo maximus, congue velit at, fermentum ex. Donec efficitur bibendum velit, nec pharetra mi maximus vitae. Duis laoreet massa sed convallis tempus. Curabitur sit amet ipsum non nunc sagittis convallis. Sed pellentesque malesuada odio. Suspendisse quis imperdiet risus, at ornare magna. Curabitur in accumsan ligula. + +Etiam eu nisi faucibus, volutpat eros sed, lobortis eros. Vivamus libero term2 tortor, fringilla sit amet elementum at, tempor eget massa. Sed porttitor aliquam hendrerit. Donec facilisis metus orci, ac sodales tortor porta et. Etiam faucibus est rhoncus est efficitur elementum. Phasellus tempus tristique hendrerit. Sed vitae viverra nunc. Nunc vehicula felis vel magna euismod, sit amet ornare lacus tempus. Nullam pulvinar, nibh non venenatis condimentum, urna augue dictum neque, sed semper diam metus vel purus. Morbi et efficitur lectus, sed tristique neque. Suspendisse et neque et arcu auctor faucibus ac a sem. Sed fermentum a felis nec viverra. Sed convallis, neque eu sollicitudin interdum, metus odio tempor ex, at vestibulum elit arcu at massa. + +Sed vel pretium orci. Phasellus eu augue ac dui euismod rhoncus. Suspendisse nec est pharetra, gravida sapien id, gravida mi. Donec tortor dui, euismod at dapibus non, lacinia eget nisl. Mauris viverra tortor et dui aliquet pulvinar. Mauris molestie pulvinar purus, vel ornare urna consequat non. Phasellus varius odio nunc. Sed sit amet massa blandit, condimentum ante ut, pretium eros. Curabitur eros enim, egestas in libero sit amet, rhoncus sodales nulla. Maecenas luctus tellus quis odio tristique, et volutpat ante blandit. + +Fusce augue tellus, hendrerit sit amet auctor vel, ultricies ac massa. Proin blandit viverra euismod. Integer posuere sem urna, vel faucibus ipsum pretium sit amet. Vivamus vulputate pulvinar neque eu pellentesque. Nulla facilisi. Aliquam accumsan sapien sit amet consequat mattis. Aliquam fringilla nulla at est consectetur lobortis. + +Etiam eget interdum dolor. Vivamus tincidunt quam gravida condimentum sagittis. Nullam tortor risus, blandit id varius laoreet, eleifend non ligula. Aliquam erat volutpat. Morbi tempus venenatis elit eu rutrum. In in sem quis lectus facilisis ullamcorper. Aliquam porttitor est non ante fermentum interdum. Quisque et iaculis augue, sit amet elementum ligula. Pellentesque ac tincidunt enim. Nulla facilisis commodo leo at consectetur. Praesent molestie neque eget sapien eleifend, quis lacinia metus lobortis. Pellentesque fringilla at lacus ac feugiat. Phasellus tristique orci quis turpis venenatis imperdiet. Suspendisse ipsum ex, ullamcorper in mollis ac, porta et lectus. Aliquam efficitur, nulla at dapibus porta, tellus ante rhoncus libero, eget malesuada nisi lacus vitae enim. + +Duis sed eros et ipsum aliquet luctus. Sed faucibus fermentum velit sed pretium. Phasellus facilisis dolor ullamcorper, hendrerit mi non, volutpat sapien. Aenean consequat ornare magna non dapibus. Pellentesque euismod, tortor vel hendrerit congue, dui nisi molestie risus, non ultrices purus diam tincidunt magna. Aliquam a arcu sit amet nisi semper consequat. Ut dictum ex a consequat molestie. Nulla nec ligula lectus. Mauris tortor odio, commodo eget mattis non, pretium blandit erat. Phasellus ornare sollicitudin justo. In aliquet justo vel magna dapibus molestie. Aenean a metus eu arcu porta bibendum sit amet a neque. Duis augue leo, euismod id ante nec, feugiat varius nisi. Vestibulum tincidunt, metus et ultricies aliquet, risus lorem lobortis diam, non imperdiet orci turpis ut orci. Curabitur commodo porta nisi sed euismod. + +Maecenas suscipit, odio in condimentum rhoncus, ligula ante mattis tellus, sit amet vulputate leo dui sit amet sem. Vivamus sed lacus ac risus suscipit placerat eget et mauris. Suspendisse ultricies tortor ac leo malesuada varius. Aenean gravida turpis eget sem scelerisque feugiat et euismod nibh. Ut auctor fringilla gravida. Integer placerat consequat elementum. Nullam feugiat, odio ac viverra blandit, urna quam vestibulum leo, id dignissim leo ligula dictum leo. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Curabitur tincidunt iaculis luctus. Aenean volutpat scelerisque leo tempor mollis. Duis iaculis nulla ut tempor condimentum. Proin non elit nunc. Praesent tincidunt venenatis urna, at aliquet metus consectetur ut. + +Nullam facilisis ante eleifend augue dignissim, sed blandit enim vulputate. Morbi vitae risus tristique, sagittis leo sed, tempor ipsum. Etiam tortor nibh, blandit eu turpis id, aliquet sodales arcu. Vestibulum neque ante, eleifend sit amet metus finibus, mollis euismod enim. Sed eleifend magna ante, quis rhoncus sapien rutrum eget. Praesent quam sapien, molestie quis risus in, ultricies facilisis nisl. Maecenas sit amet mi massa. Nulla facilisi. Proin eget est egestas, suscipit nulla vel, porta velit. + +Donec non lacinia massa. Etiam eget purus vehicula, dapibus purus eu, auctor velit. Sed lobortis neque ut nunc tincidunt, vel pellentesque erat feugiat. Duis a lacinia felis. Proin iaculis mattis nulla, vitae auctor erat mattis in. Aliquam et tincidunt enim, non lobortis mauris. Integer tristique velit sit amet mi ullamcorper efficitur. Donec laoreet justo vitae purus iaculis malesuada. Integer semper sapien ut leo vestibulum interdum. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Aliquam at finibus velit. Ut sagittis laoreet odio ac dictum. Suspendisse potenti. Fusce blandit diam vitae feugiat scelerisque. + diff --git a/tests/unittest_configfile.config b/tests/unittest_configfile.config index fcdcb81..d6602eb 100644 --- a/tests/unittest_configfile.config +++ b/tests/unittest_configfile.config @@ -43,6 +43,8 @@ groups=5586,12099,1 [FINDER] enabled=true +find_in_text_files_enabled=true +find_in_text_files_max_size_bytes=20000000 [FINDER.RULE.UT_Finder_Demo] type=regex