From 8ab6320223ba94b5dda20c9b7a118803751c3dff Mon Sep 17 00:00:00 2001 From: harutono Date: Tue, 2 Jan 2024 00:11:42 +0800 Subject: [PATCH] add img embedding add natural language image search tweak manager running logic add: log improve img emb search load and ux add: lock during img embedding tweak onboard setting fix: handle error with no search results Update embedding_img_for_all_videofiles.py add i18n add: webui img embed search add manual img emb script add idle routine refactor video file on disk checking eliminate unnecessary endswith add: video file embedding process remove random walk, tweak code, WIP, sorry. add db_get_row_from_vid_filename tweak webui add image search webui tweak search ux; add image embed lib add install script update onboard setting add extension readme Update languages.json --- config/src/config_default.json | 7 +- config/src/languages.json | 45 ++- ...index_img_embedding_for_all_videofiles.bat | 11 + .../index_img_embedding_for_all_videofiles.py | 93 +++++ .../meta.json | 7 + .../install_img_embedding_module.bat | 107 ++++++ .../install_img_embedding_module/meta.json | 7 + .../test_install.py | 55 +++ extension/place_or_dev_extension_here | 43 +++ onboard_setting.py | 15 +- pyproject.toml | 2 + record_screen.py | 20 +- windrecorder/config.py | 19 +- windrecorder/db_manager.py | 53 ++- windrecorder/file_utils.py | 64 ++-- windrecorder/img_embed_manager.py | 330 ++++++++++++++++ windrecorder/ocr_manager.py | 23 +- windrecorder/oneday.py | 7 +- windrecorder/ui/oneday.py | 6 +- windrecorder/ui/search.py | 362 +++++++++++------- windrecorder/ui/setting.py | 45 ++- windrecorder/utils.py | 36 +- 22 files changed, 1148 insertions(+), 209 deletions(-) create mode 100644 extension/index_img_embedding_for_all_videofiles/index_img_embedding_for_all_videofiles.bat create mode 100644 extension/index_img_embedding_for_all_videofiles/index_img_embedding_for_all_videofiles.py create mode 100644 extension/index_img_embedding_for_all_videofiles/meta.json create mode 100644 extension/install_img_embedding_module/install_img_embedding_module.bat create mode 100644 extension/install_img_embedding_module/meta.json create mode 100644 extension/install_img_embedding_module/test_install.py create mode 100644 extension/place_or_dev_extension_here create mode 100644 windrecorder/img_embed_manager.py diff --git a/config/src/config_default.json b/config/src/config_default.json index c1902c77..1273c099 100644 --- a/config/src/config_default.json +++ b/config/src/config_default.json @@ -11,6 +11,7 @@ "flag_mark_note_filename": "flag_mark_note.csv", "thumbnail_generation_size_width": 70, "thumbnail_generation_jpg_quality": 30, + "vdb_img_path": "./db_imgemb", "search_max_num": "50", "lang": "sc", "ocr_lang": "zh-Hans-CN", @@ -50,10 +51,14 @@ "maintain_lock_subdir": "LOCK_MAINTAIN", "record_lock_name": "LOCK_FILE_RECORD.MD", "tray_lock_name": "LOCK_FILE_TRAY.MD", + "img_emb_lock_name": "LOCK_FILE_IMG_EMB.MD", "last_idle_maintain_file_path": "cache\\LAST_IDLE_MAINTAIN.MD", "iframe_dir": "cache\\i_frames", "log_dir": "cache\\logs", "win_title_dir": "cache\\win_title", "show_oneday_left_side_stat": true, - "webui_access_password_md5": "" + "webui_access_password_md5": "", + "enable_img_embed_search": true, + "img_embed_search_recall_result_per_db": 20, + "img_embed_module_install": false } diff --git a/config/src/languages.json b/config/src/languages.json index 36fa0e08..165c5bb9 100644 --- a/config/src/languages.json +++ b/config/src/languages.json @@ -61,6 +61,13 @@ "gs_text_intro": "This is the global search page where you can search all the recorded content to date. Press Enter to search after entering the keywords.", "gs_slider_to_rewind_result": "Drag to rewind search results", "gs_text_randomwalk": "random walk", + "gs_text_video_file_not_on_disk": "Video File **{df_videofile_name}** not on disk.", + "gs_option_ocr_text_search": "Text OCR search", + "gs_option_img_emb_search": "Image semantic search", + "gs_input_img_emb_search": "Use natural language to describe images", + "gs_text_img_emb_help": "Use natural language to describe the content of the screen. The more precise the description, the closer the result will be. Up to 21 languages ​​such as Chinese, English, Japanese, and Korean are supported here (detailed uform document). The video needs to be embedded after indexing Only by searching, please see the setting item description on the settings page for details.", + "gs_text_searching": "Searching, please stand by...", + "gs_text_loading_text_embed_model": "Loading Text Embedding model...", "stat_md_month_title": "### 🌖 Monthly Statistics", "stat_md_year_title": "### 🎏 {stat_year_title} Record", @@ -147,7 +154,12 @@ "set_pwd_text": "webui access password (leave blank to disable)", "set_pwd_help": "After enabling this setting, you will be asked to provide a password when accessing webui. This setting will not encrypt your data, but only protects the entrance to webui to avoid access by unfamiliar users in the same LAN.", "set_pwd_forget_help": "Forgot your password? Delete the webui_access_password_md5 item in config_user.json to reset password.", - + "set_checkbox_enable_img_emb": "Enable image semantic retrieval", + "set_text_enable_img_emb_help": "Image semantic retrieval is a method of image retrieval based on the semantic content of images through computer vision technology. It can retrieve queries from large-scale image databases based on the semantic description of the content of the image. Related images. Windrecorder uses uform-vl-multilingual-v2 to embed the index. When this option is turned on, the program will build an image embed index for the video in its free time, and then it can be searched globally. You can also search it through the program directory The extension/index_img_embedding_for_all_videofilesscript is indexed manually.", + "set_text_img_emb_not_suppport_cuda": "Your device does not appear to support CUDA and may have lower performance when using the CPU to semantically index images.", + "set_input_img_emb_max_recall_count": "The maximum number of results recalled from each database in natural semantic search", + "set_text_help_img_emb_max_recall_count": "During natural semantic search, a specified number of results will be recalled from the database for each time period. Too high or too low a number may lead to a decrease in query accuracy.", + "qs_config_indicator": " ← Current options", "qs_la_text_same_as_previous": "The interface language remains the same as before: English", "qs_un_set_your_username": "Set your username as a database identifier.", @@ -170,6 +182,7 @@ "qs_mo_describe": "Note: Due to the lack of official support for multiple monitors in pyautogui, Windrecorder will only record the screen set as the 'primary display' in Windows.\n", "qs_mo_detect": "The detected resolution of the primary display is: {monitor_width}x{monitor_height}", "qs_mo_cta": "This setting will be automatically detected each time you start recording, so you don't need to choose or set it separately.", + "qs_et_describe": "Windrecorder also provides some extension functions, which you can later install/use in the extension directory.", "qs_end_describe": "Congratulations! You have completed all initial settings. Don’t worry, you can adjust the settings anytime within the app! \n\nNow, you can open [start_app.bat] in the directory to start using it. \n", "qs_end_slogan": "> Capture and preserve the fleeting moments of the wind, as seen through your eyes.", "qs_end_feedback": "> Encountered a problem or have suggestions? Feel free to submit issues and PRs at https://github.com/yuka-friends/Windrecorder.", @@ -261,6 +274,13 @@ "gs_text_intro": "这里是全局搜索页,可以搜索到迄今记录的所有内容。输入关键词后回车即可搜索。", "gs_slider_to_rewind_result": "拖动回溯搜索结果", "gs_text_randomwalk": "随便走走", + "gs_text_video_file_not_on_disk": "磁盘上没有找到 **{df_videofile_name}**", + "gs_option_ocr_text_search": "文本 OCR 搜索", + "gs_option_img_emb_search": "图像语义搜索", + "gs_input_img_emb_search": "使用自然语言描述图像", + "gs_text_img_emb_help": "用自然语言描述画面内容,描述越精确、结果将越接近。此处支持中、英、日、韩等多达 21 种语言输入(详细 uform 文档)。视频需要被嵌入索引后才能搜索得到,详见设置页设置项说明。", + "gs_text_searching": "搜索中,请稍后……", + "gs_text_loading_text_embed_model": "加载文本嵌入模型中,请稍后……", "stat_md_month_title": "### 🌖 当月数据统计", "stat_md_year_title": "### 🎏 {stat_year_title} 记录", @@ -347,6 +367,11 @@ "set_pwd_text": "webui 访问密码(留空则不启用)", "set_pwd_help": "启用此项设置后,会在访问 webui 时要求提供密码。此项设置不会加密你的数据,仅保护 webui 的使用入口,以避免同局域网内陌生用户访问。", "set_pwd_forget_help": "忘记密码?请将 config_user.json 中的 webui_access_password_md5 项删除重置。", + "set_checkbox_enable_img_emb": "启用图像语义检索", + "set_text_enable_img_emb_help": "图像语义检索是一种通过计算机视觉技术、基于图像的语义内容进行图像检索的方法。它可以做到根据对图像的内容语义描述,从大规模的图像数据库中检索出查询出相关的图像。Windrecorder 使用 uform-vl-multilingual-v2 来嵌入索引。开启该选项后,程序将在空闲时间对视频建立图像嵌入索引,之后便能对此进行全局搜索。你也可以通过程序目录下的 extension/index_img_embedding_for_all_videofiles脚本手动索引。", + "set_text_img_emb_not_suppport_cuda": "你的设备似乎不支持 CUDA,在使用 CPU 对图像语义索引时可能性能较低。", + "set_input_img_emb_max_recall_count": "自然语义搜索中,从每个数据库召回的最大结果数", + "set_text_help_img_emb_max_recall_count": "自然语义搜索时,会分别从每个时间段的数据库中召回指定数量结果,过高或过低的数量可能导致查询准确率降低。", "qs_config_indicator": " ← 当前选项", "qs_la_text_same_as_previous": "界面语言保持与先前一致的:简体中文", @@ -370,6 +395,7 @@ "qs_mo_describe": "注意:由于 pyautogui 暂未官方支持多显示器,捕风记录仪将只记录 Windows 下设置的【主显示器】\n", "qs_mo_detect": "当前检测到的主显示器分辨率为:{monitor_width}x{monitor_height}", "qs_mo_cta": "此项设定将在每次录屏时自动识别,无需额外选择与设定。", + "qs_et_describe": "捕风记录仪 还提供了一些扩展功能,你可以稍后在 extension 目录下安装/使用。", "qs_end_describe": "恭喜!你已完成所有初始设定。别担心,你可以随时在应用内调整设置!\n\n现在,你可以打开目录下的 【start_app.bat】 来开始使用啦。\n", "qs_end_slogan": "> 一起捕捉贮藏风一般掠过的、你的目之所见。", "qs_end_feedback": "> 遇到问题、想反馈建议?欢迎在 https://github.com/yuka-friends/Windrecorder 提交 issue 与 PR。", @@ -461,7 +487,14 @@ "gs_text_intro": "これはグローバル検索ページです。これまでに記録されたすべてのコンテンツを検索できます。キーワードを入力して Enter キーを押すと検索が開始されます。", "gs_slider_to_rewind_result": "検索結果を巻き戻す", "gs_text_randomwalk": "散歩する", - + "gs_text_video_file_not_on_disk": "**{df_videofile_name}** がディスク上に見つかりません", + "gs_option_ocr_text_search": "テキスト OCR 検索", + "gs_option_img_emb_search": "画像セマンティック検索", + "gs_input_img_emb_search": "自然言語を使用して画像を説明します", + "gs_text_img_emb_help": "画面の内容を自然言語で説明します。説明が正確であればあるほど、結果はより正確になります。ここでは、中国語、英語、日本語、韓国語など、最大 21 の言語がサポートされています(uform ドキュメントの詳細)。ビデオはインデックス作成後に埋め込む必要があります。検索のみで、詳細については設定ページの設定項目の説明を参照してください。", + "gs_text_searching": "検索中です、お待ちください...", + "gs_text_loading_text_embed_model": "テキスト埋め込みモデルを読み込み中...", + "stat_md_month_title": "### 🌖 今月のデータ統計", "stat_md_year_title": "### 🎏 {stat_year_title} の記録", "stat_md_memory_title": "### 🧩 メモリの要約", @@ -547,7 +580,12 @@ "set_pwd_text": "webui アクセス パスワード (無効にする場合は空白のままにします)", "set_pwd_help": "この設定を有効にすると、webui にアクセスするときにパスワードの入力を求められます。この設定はデータを暗号化しませんが、同じ LAN 内の見慣れないユーザーによるアクセスを避けるために webui への入り口を保護するだけです。", "set_pwd_forget_help": "パスワードをお忘れですか? config_user.json の webui_access_password_md5 項目を削除してリセットしてください。", - + "set_checkbox_enable_img_emb": "画像のセマンティック検索を有効にする", + "set_text_enable_img_emb_help": "画像意味検索は、コンピューター ビジョン テクノロジーによる画像の意味内容に基づく画像検索方法です。画像内容の意味記述に基づいて、大規模画像データベースからクエリを取得できます。関連画像Windrecorder は、uform-vl-multilingual-v2 を使用してインデックスを埋め込みます。このオプションをオンにすると、プログラムは空き時間にビデオの画像埋め込みインデックスを構築し、グローバルに検索できるようになります。プログラム ディレクトリを介して、extension/index_img_embedding_for_all_videofilesスクリプトのインデックスが手動で作成されます。", + "set_text_img_emb_not_suppport_cuda": "お使いのデバイスは CUDA をサポートしていないようです。CPU を使用してイメージのセマンティック インデックスを作成するとパフォーマンスが低下する可能性があります。", + "set_input_img_emb_max_recall_count": "自然セマンティック検索で各データベースから呼び出される結果の最大数", + "set_text_help_img_emb_max_recall_count": "ナチュラル セマンティック検索では、指定された数の結果が期間ごとにデータベースから呼び出されます。数値が多すぎたり低すぎたりすると、クエリの精度が低下する可能性があります。", + "qs_config_indicator": " ← 現在のオプション", "qs_la_text_same_as_previous": "インターフェイス言語は以前と同じで日本語です。", "qs_un_set_your_username": "データベースの識別子として使用するユーザー名を設定してください。", @@ -570,6 +608,7 @@ "qs_mo_describe": "注意:pyautoguiは公式に複数のディスプレイをサポートしていないため、WindrecorderはWindowsで設定された【メインディスプレイ】のみを記録します。\n", "qs_mo_detect": "現在検出されたメインディスプレイの解像度は{monitor_width}x{monitor_height}", "qs_mo_cta": "この設定は画面録画時に自動的に識別され、追加の選択や設定は必要ありません。", + "qs_et_describe": "Windrecorder にはいくつかの拡張機能も用意されており、後で拡張機能ディレクトリにインストールして使用できます。", "qs_end_describe": "おめでとう! すべての初期設定が完了しました。 心配しないでください。設定はアプリ内でいつでも調整できます。 \n\nこれで、ディレクトリ内の [start_app.bat] を開いて使用を開始できます。 \n", "qs_end_slogan": "> 一緒に、あなたの目が見た、風のように過ぎ去るものをキャプチャしましょう。", "qs_end_feedback": "> 問題が発生した場合やフィードバックの提案がある場合は、https://github.com/yuka-friends/Windrecorder でissueやPRを提出してください。", diff --git a/extension/index_img_embedding_for_all_videofiles/index_img_embedding_for_all_videofiles.bat b/extension/index_img_embedding_for_all_videofiles/index_img_embedding_for_all_videofiles.bat new file mode 100644 index 00000000..1e5ee629 --- /dev/null +++ b/extension/index_img_embedding_for_all_videofiles/index_img_embedding_for_all_videofiles.bat @@ -0,0 +1,11 @@ +@echo off +echo Loading extension, please stand by. +echo. + +cd /d %~dp0 +for /F "tokens=* USEBACKQ" %%A in (`python -m poetry env info --path`) do call %%A\Scripts\activate.bat +chcp 65001 +cls + +python "%~dp0\index_img_embedding_for_all_videofiles.py" +pause \ No newline at end of file diff --git a/extension/index_img_embedding_for_all_videofiles/index_img_embedding_for_all_videofiles.py b/extension/index_img_embedding_for_all_videofiles/index_img_embedding_for_all_videofiles.py new file mode 100644 index 00000000..6998983a --- /dev/null +++ b/extension/index_img_embedding_for_all_videofiles/index_img_embedding_for_all_videofiles.py @@ -0,0 +1,93 @@ +# Set workspace to Windrecorder dir +import sys +import os +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.append(parent_parent_dir) +os.chdir("..") +os.chdir("..") + +import subprocess +import datetime +from os import getpid + +from windrecorder import file_utils, utils +from windrecorder.config import config +from windrecorder.exceptions import LockExistsException +from windrecorder.lock import FileLock + +if config.img_embed_module_install: + try: + from windrecorder import img_embed_manager + except ModuleNotFoundError: + config.set_and_save_config("img_embed_module_install", False) + print('Img Embedding Module seems not installed, please install first.') + sys.exit() +else: + print('Img Embedding Module seems not installed, please install first.') + sys.exit() + +subprocess.run("title Embedding Img for existing video files", shell=True) + +videos_filepath = file_utils.get_file_path_list(config.record_videos_dir) +videos_filepath_filter = [item for item in videos_filepath if '-IMGEMB' not in item] +videos_filepath_filter_num = len(videos_filepath_filter) + +per_video_embedding_time = datetime.timedelta(minutes=2) * config.record_seconds / 900 # 在使用 cuda 的情况下,每 900s 视频需要 2 分钟完成索引。其中拆 iframe 占了大部分时间 +eta_process_all_video = videos_filepath_filter_num * per_video_embedding_time + + +def main(): + while True: + subprocess.run("cls", shell=True) + if img_embed_manager.is_cuda_available: + print('√ Your device support CUDA acceleration.') + else: + print('X Your device seems not support CUDA acceleration, embedding performance might be slow.') + + text_intro = f""" + +本脚本可以将你未进行图像嵌入索引的历史视频进行索引。索引完成后,你可以使用自然语言描述来查找对应图像画面。 +This script can index your no image embedding historical videos. After indexed, you can use natural language descriptions to find corresponding images in video files. + +-------------------------------------------------------------------- + +约有 {videos_filepath_filter_num} 个视频未图像嵌入索引,索引所有视频预估用时:{utils.convert_seconds_to_hhmmss(eta_process_all_video.seconds)} + +- 若要索引全部视频文件,请输入 Y 后回车确认。 +- 若只想先索引部分视频,请输入数字后回车确认(应小于 {videos_filepath_filter_num} )。每个视频的索引用时预估{utils.convert_seconds_to_hhmmss(per_video_embedding_time.seconds)},同时将会从最新的视频开始、向旧视频进行索引。 + +提示: 索引过程中,可以随时关闭终端窗口来中止索引。别担心,已索引的进度都会被保存,下次会继续进度。 + +There are approximately {videos_filepath_filter_num} videos without image embedding index. Estimated time to index all videos: {utils.convert_seconds_to_hhmmss(eta_process_all_video.seconds)} + +- To index all video files, please enter Y and press Enter to confirm. +- If you only want to index some videos first, please enter the number and press Enter to confirm (should be less than {videos_filepath_filter_num}). The indexing time of each video is estimated {utils.convert_seconds_to_hhmmss(per_video_embedding_time.seconds)}, and indexing will start from the latest video to the old video. + +Tip: During the indexing process, you can close the terminal window at any time to abort the indexing. Don't worry, all indexed progress will be saved and progress will continue next time. + + """ + print(text_intro) + user_input = input("> ") + if user_input.lower() == "y": + img_embed_manager.all_videofile_do_img_embedding_routine(video_queue_count=videos_filepath_filter_num) + break + try: + val = int(user_input) + if 0 < val < videos_filepath_filter_num: + img_embed_manager.all_videofile_do_img_embedding_routine(video_queue_count=val) + break + except ValueError: + pass + + subprocess.run("cls", shell=True) + print('指定的选项下视频已索引完成,你可以在 webui 使用自然语言描述来查找对应图像画面。') + + +try: + img_emb_lock = FileLock(config.img_emb_lock_path, str(getpid()), timeout_s=None) + with img_emb_lock: + main() +except LockExistsException: + subprocess.run("cls", shell=True) + print('Warring: Seems another img embedding indexing process is running.\n If not, please try to delete cache/lock/LOCK_FILE_IMG_EMB.MD and try again.\n') \ No newline at end of file diff --git a/extension/index_img_embedding_for_all_videofiles/meta.json b/extension/index_img_embedding_for_all_videofiles/meta.json new file mode 100644 index 00000000..9143cb4c --- /dev/null +++ b/extension/index_img_embedding_for_all_videofiles/meta.json @@ -0,0 +1,7 @@ +{ + "extension_name": "Index image embedding for all videofiles", + "developer_name": "antonoko", + "developer_url": "https://github.com/Antonoko", + "version": "0.0.1", + "description_markdown": "本脚本可以将你未进行图像嵌入索引的历史视频进行索引。索引完成后,你可以在 webui 中使用自然语言描述,来搜索对应图像画面。\n\nThis script can index your historical videos that have not been indexed by image embedding. After the indexing is completed, you can use natural language descriptions in webui to search for corresponding images.\n\n[什么是图像嵌入索引?What's an image embedding?](https://blog.roboflow.com/what-is-an-image-embedding/)" +} \ No newline at end of file diff --git a/extension/install_img_embedding_module/install_img_embedding_module.bat b/extension/install_img_embedding_module/install_img_embedding_module.bat new file mode 100644 index 00000000..423cbcf0 --- /dev/null +++ b/extension/install_img_embedding_module/install_img_embedding_module.bat @@ -0,0 +1,107 @@ +@echo off +echo Loading extension, please stand by. +echo. + +cd /d %~dp0 +for /F "tokens=* USEBACKQ" %%A in (`python -m poetry env info --path`) do call %%A\Scripts\activate.bat +chcp 65001 + +:start_install +cls +echo. +echo This script will install the image semantic indexing function for Windrecorder. +echo After installation, you can index and search for corresponding images using natural language descriptions. +echo 本向导将为捕风记录仪安装图像语义索引功能。安装完毕后,可以索引并用自然语言描述来搜索对应画面。 +echo. +echo ================================================================================ +echo. +echo Installation options (install the download in the virtual environment of Windrecorder, occupying about 4G space): +echo 安装选项(将下载安装在 Windrecorder 的虚拟环境中,约占用 4G 空间): +echo. +echo 1. Install a version that supports CUDA acceleration for Nvidia graphics cards; +echo 安装支持 Nvidia 显卡 CUDA 加速的版本; +echo. +echo 2. Install CPU version; 安装 CPU 版本; +echo. +echo. +set /p choice= Please enter the options and press Enter: + +if "%choice%"=="1" ( + echo Installing an environment that supports CUDA acceleration; 正在安装支持 CUDA 加速的环境 + goto install_cuda +) + +if "%choice%"=="2" ( + echo Installing an environment that supports CPU computing; 正在安装支持 CPU 运算的环境 + goto install_cpu +) + +goto start_install + + +@REM ------------------------------------------------- +:install_cpu +poetry run pip install -i https://pypi.tuna.tsinghua.edu.cn/simple uform +goto :finish + + +@REM ------------------------------------------------- +:install_cuda + +:: 查找 Python 版本 +for /f "delims=" %%I in ('python --version 2^>^&1') do set "result=%%I" + +:: 根据 Python 版本跳转执行命令 +echo %result% | findstr /C:"Python 3.10" 1>nul +if errorlevel 1 ( +echo %result% | findstr /C:"Python 3.11" 1>nul +if errorlevel 1 ( + echo Other version installed + goto Other +) else ( + echo Python 3.11 installed + goto Python311 +)) else ( + echo Python 3.10 installed + goto Python310 +) + +:Python310 +echo Running Python 3.10 specific commands... +@REM using aliyun mirrors +poetry run pip install -i https://pypi.tuna.tsinghua.edu.cn/simple uform +poetry run pip install https://mirrors.aliyun.com/pytorch-wheels/cu121/torch-2.2.0+cu121-cp310-cp310-win_amd64.whl +poetry run pip install https://mirrors.aliyun.com/pytorch-wheels/cu121/torchaudio-2.2.0+cu121-cp310-cp310-win_amd64.whl +poetry run pip install https://mirrors.aliyun.com/pytorch-wheels/cu121/torchvision-0.17.0+cu121-cp310-cp310-win_amd64.whl +goto :finish + +:Python311 +echo Running Python 3.11 specific commands... +@REM using official source +@REM poetry run pip install https://download.pytorch.org/whl/cu121/torch-2.1.0%2Bcu121-cp311-cp311-win_amd64.whl +@REM poetry run pip install https://download.pytorch.org/whl/cu121/torchaudio-2.1.0%2Bcu121-cp311-cp311-win_amd64.whl +@REM poetry run pip install https://download.pytorch.org/whl/cu121/torchvision-0.16.0%2Bcu121-cp311-cp311-win_amd64.whl + +@REM using aliyun mirrors +poetry run pip install -i https://pypi.tuna.tsinghua.edu.cn/simple uform +poetry run pip install https://mirrors.aliyun.com/pytorch-wheels/cu121/torch-2.2.0+cu121-cp311-cp311-win_amd64.whl +poetry run pip install https://mirrors.aliyun.com/pytorch-wheels/cu121/torchaudio-2.2.0+cu121-cp311-cp311-win_amd64.whl +poetry run pip install https://mirrors.aliyun.com/pytorch-wheels/cu121/torchvision-0.17.0+cu121-cp311-cp311-win_amd64.whl +goto :finish + +:Other +echo Error: python3.10 or 3.11 not detected +goto :finish + + +@REM ------------------------------------------------- +:finish +echo. +echo checking the installation results... 检查安装结果…… +echo. +python test_install.py +echo. +echo The installation script has been completed. 已执行完安装脚本。 +echo. +pause +exit \ No newline at end of file diff --git a/extension/install_img_embedding_module/meta.json b/extension/install_img_embedding_module/meta.json new file mode 100644 index 00000000..71a5b7fe --- /dev/null +++ b/extension/install_img_embedding_module/meta.json @@ -0,0 +1,7 @@ +{ + "extension_name": "Install Image Embedding Module for Windrecorder", + "developer_name": "antonoko", + "developer_url": "https://github.com/Antonoko", + "version": "0.0.1", + "description_markdown": "本脚本可以为捕风记录仪安装图像语义索引功能。安装完毕后,可以索引并用自然语言描述来搜索对应画面。\n受限于使用场景与模型精度,可能实际并没有那么有用和准确,但还是挺有意思的。\n\nThis script can install the image semantic indexing function for Windrecorder. After installation, you can index and search for corresponding images using natural language descriptions.\nLimited by usage scenarios and model accuracy, it may not actually be that useful and accurate, but it is still quite interesting.\n\n[什么是图像嵌入索引?What's an image embedding?](https://blog.roboflow.com/what-is-an-image-embedding/)" +} \ No newline at end of file diff --git a/extension/install_img_embedding_module/test_install.py b/extension/install_img_embedding_module/test_install.py new file mode 100644 index 00000000..56895164 --- /dev/null +++ b/extension/install_img_embedding_module/test_install.py @@ -0,0 +1,55 @@ +# Set workspace to Windrecorder dir +import sys +import os +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.append(parent_parent_dir) +os.chdir("..") +os.chdir("..") + +from windrecorder.config import config +def set_config_module_install(state: bool): + config.set_and_save_config("img_embed_module_install", state) + +# 检查是否能启用 cuda +try: + import torch + if torch.cuda.is_available(): + print(' 恭喜!你的设备支持 CUDA 加速。') + print(' Congratulations! Your device supports CUDA acceleration.') + else: + print(' 你的设备似乎不支持 CUDA 加速、或安装了 CPU 计算的 pytorch 环境。在索引时可能会存在性能问题。') + print(' Your device does not seem to support CUDA acceleration, or the pytorch environment for CPU computing is installed. There may be performance issues during indexing.') + +except ModuleNotFoundError: + print(' Pytorch 未能成功安装,若重试后仍然安装失败,请复制以上报错信息前往 GitHub issue 进行反馈。') + print(' Pytorch failed to install successfully. If the installation still fails after trying again, please copy the above error message and send it to GitHub issue for feedback.') + set_config_module_install(False) + +print() + +# 检查 uform 是否被安装 +try: + import uform + print(' uform 已成功安装!') + print(' uform has been successfully installed!') + + try: + from windrecorder import img_embed_manager + print() + print(' 检查是否已下载嵌入模型,若有将跳过。') + print(' Checking if the embedded model has been downloaded, if so it will be skipped.') + + img_embed_manager.get_model('cpu') + set_config_module_install(True) + config.set_and_save_config("enable_img_embed_search", True) + except Exception as e: + print(e) + print(' uform 模型似乎下载失败,请检查网络、添加代理或进行重试。') + print(' uform model seems to have failed to download, please check the network, add a proxy, or try again.') + set_config_module_install(False) + +except ModuleNotFoundError: + print(' uform 未成功安装,若重试后仍然安装失败,请复制以上报错信息前往 GitHub issue 进行反馈。') + print(' uform was not successfully installed. If the installation still fails after retrying, please copy the above error message and send it to GitHub issue for feedback.') + set_config_module_install(False) diff --git a/extension/place_or_dev_extension_here b/extension/place_or_dev_extension_here new file mode 100644 index 00000000..8dfb31e3 --- /dev/null +++ b/extension/place_or_dev_extension_here @@ -0,0 +1,43 @@ +An extension may contains at least the following files: + + +# meta.json +```json +{ + "extension_name": "", + "developer_name": "", + "developer_url": "", + "version": "0.0.1", + "description_markdown": "" +} +``` + + +# your_extension.py +```python +# Set workspace to Windrecorder dir +import sys +import os +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.append(parent_parent_dir) +os.chdir("..") +os.chdir("..") + +# extension code below +``` + + +# bash_for_user.bat +``` +@echo off +echo Loading extension, please stand by. +echo. + +cd /d %~dp0 +for /F "tokens=* USEBACKQ" %%A in (`python -m poetry env info --path`) do call %%A\Scripts\activate.bat +chcp 65001 +cls + +:: extension code below +``` \ No newline at end of file diff --git a/onboard_setting.py b/onboard_setting.py index a43b63ed..bf21ad39 100644 --- a/onboard_setting.py +++ b/onboard_setting.py @@ -12,7 +12,7 @@ upgrade_migration_routine.main() # 全部向导的步骤数 -ALLSTEPS = 5 +ALLSTEPS = 6 # 清理缓存 if os.path.exists("cache"): @@ -250,9 +250,20 @@ def config_indicator(config_element, expect_result): divider() subprocess.run("pause", shell=True) +# 扩展介绍 +while True: + print_header(step=5) + print(_t("qs_et_describe")) + print() + for key, value in file_utils.get_extension().items(): + print(f" - {key}") + break + +divider() +subprocess.run("pause", shell=True) # 完成初始化设定 -print_header(step=5) +print_header(step=6) print(_t("qs_end_describe")) print(_t("qs_end_slogan")) print(_t("qs_end_feedback")) diff --git a/pyproject.toml b/pyproject.toml index 740e5b72..54948448 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,8 @@ pystray = "^0.19.5" customtkinter = "^5.2.1" psutil = "^5.9.5" scikit-image = "^0.22.0" +faiss-cpu = "^1.7.4" +tqdm = "^4.65.0" [tool.poetry.group.dev.dependencies] pre-commit = "^3.5.0" diff --git a/record_screen.py b/record_screen.py index 001596a6..dda6390f 100644 --- a/record_screen.py +++ b/record_screen.py @@ -23,10 +23,17 @@ from windrecorder.exceptions import LockExistsException from windrecorder.lock import FileLock +if config.img_embed_module_install: + try: + from windrecorder import img_embed_manager + except ModuleNotFoundError: + config.set_and_save_config("img_embed_module_install", False) + pass #TODO log here + # 全局状态变量 monitor_idle_minutes = 0 last_screenshot_array = None -idle_maintain_time_gap = datetime.timedelta(hours=8) # 与上次闲时维护至少相隔 +idle_maintain_time_gap = datetime.timedelta(minutes=40) # 与上次闲时维护至少相隔 idle_maintaining_in_process = False # 维护中的锁 last_idle_maintain_time = datetime.datetime.now() @@ -56,12 +63,21 @@ def idle_maintain_process_main(): idle_maintaining_in_process = True try: threading.Thread(target=ocr_manager.ocr_manager_main, daemon=True).start() + # 图像语义嵌入 + if config.enable_img_embed_search and config.img_embed_module_install: + try: + img_emb_lock = FileLock(config.img_emb_lock_path, str(getpid()), timeout_s=30*60) + with img_emb_lock: + img_embed_manager.all_videofile_do_img_embedding_routine() + except LockExistsException: + print('another img embedding indexing is running.') + # 清理过时视频 ocr_manager.remove_outdated_videofiles() # 压缩过期视频 ocr_manager.compress_outdated_videofiles() # 生成随机词表 - wordcloud.generate_all_word_lexicon_by_month() + # wordcloud.generate_all_word_lexicon_by_month() except Exception as e: print(f"Error on idle maintain: {e}") finally: diff --git a/windrecorder/config.py b/windrecorder/config.py index 989cf5cb..30e45a16 100644 --- a/windrecorder/config.py +++ b/windrecorder/config.py @@ -15,7 +15,7 @@ class Config: def __init__( self, db_path, - db_filename, + vdb_img_path, record_videos_dir, record_seconds, record_framerate, @@ -50,6 +50,7 @@ def __init__( maintain_lock_subdir, record_lock_name, tray_lock_name, + img_emb_lock_name, last_idle_maintain_file_path, iframe_dir, log_dir, @@ -61,11 +62,13 @@ def __init__( thumbnail_generation_jpg_quality, show_oneday_left_side_stat, webui_access_password_md5, + enable_img_embed_search, + img_embed_search_recall_result_per_db, + img_embed_module_install, **other_field, ) -> None: self.db_path = db_path - self.db_filename = db_filename - self.db_filepath = os.path.join(self.db_path, self.db_filename) + self.vdb_img_path = vdb_img_path self.record_videos_dir = record_videos_dir self.record_seconds = record_seconds self.record_framerate = record_framerate @@ -97,6 +100,7 @@ def __init__( self.maintain_lock_path = os.path.join(lock_file_dir, maintain_lock_subdir) self.record_lock_path = os.path.join(lock_file_dir, record_lock_name) self.tray_lock_path = os.path.join(lock_file_dir, tray_lock_name) + self.img_emb_lock_path = os.path.join(lock_file_dir, img_emb_lock_name) self.last_idle_maintain_file_path = last_idle_maintain_file_path self.iframe_dir = iframe_dir self.compress_encoder = compress_encoder @@ -114,10 +118,15 @@ def __init__( self.thumbnail_generation_jpg_quality = thumbnail_generation_jpg_quality self.show_oneday_left_side_stat = show_oneday_left_side_stat self.webui_access_password_md5 = webui_access_password_md5 + self.enable_img_embed_search = enable_img_embed_search + self.img_embed_search_recall_result_per_db = img_embed_search_recall_result_per_db + self.img_embed_module_install = img_embed_module_install def set_and_save_config(self, attr: str, value): if not hasattr(self, attr): - raise AttributeError("{} not exist in config!".format(attr)) + print("{} not exist in config!".format(attr)) + return + # raise AttributeError("{} not exist in config!".format(attr)) setattr(self, attr, value) self.save_config() @@ -135,8 +144,6 @@ def save_config(self): json.dump(config_json, f, indent=2, ensure_ascii=False) def filter_unwanted_field(self, config_json): - del config_json["db_filepath"] - del config_json["compress_preset"] return config_json diff --git a/windrecorder/db_manager.py b/windrecorder/db_manager.py index 80efa914..04914f16 100644 --- a/windrecorder/db_manager.py +++ b/windrecorder/db_manager.py @@ -4,6 +4,7 @@ import shutil import sqlite3 from itertools import product +from subprocess import CalledProcessError import numpy as np import pandas as pd @@ -457,6 +458,56 @@ def is_videofile_ondisk(filename, video_ondisk_str): return df + # 根据视频文件名字返回对应行列 dataframe (rowid included) + def db_get_row_from_vid_filename(self, vid_filename): + vid_filepath = file_utils.convert_vid_filename_as_vid_filepath(vid_filename) + vid_datetime_start = utils.date_to_datetime(vid_filename[:19]) + if os.path.exists(vid_filepath): # 视频文件存在情况下,尝试拿其真实时长,若无用 config 录制值兜底 + try: + vid_datetime_end = vid_datetime_start + datetime.timedelta( + seconds=int(float(utils.get_vidfilepath_info(vid_filepath)["duration"])) + ) + except CalledProcessError: + vid_datetime_end = vid_datetime_start + datetime.timedelta(seconds=config.record_seconds) + else: + vid_datetime_end = vid_datetime_start + datetime.timedelta(seconds=config.record_seconds) + # 根据datetime定位数据库(考虑需跨数据库情况) + db_name_list = self.db_get_dbfilename_by_datetime(vid_datetime_start, vid_datetime_end) + + df_origin = pd.DataFrame() + for item in db_name_list: + db_filepath = os.path.join(self.db_path, item) + db_filepath = self.get_temp_dbfilepath(db_filepath) + conn = sqlite3.connect(db_filepath) + + # 使用pandas的read_sql_query函数执行查询并将结果转换为DataFrame + query = f"SELECT rowid, * FROM video_text WHERE videofile_name LIKE '%{vid_filename[:19]}%'" + df = pd.read_sql_query(query, conn) + + conn.close() + df_origin = pd.concat([df_origin, df]) + + return df_origin + + def db_get_rowid_and_similar_tuple_list_rows(self, rowid_probs_list, db_filename): + """ + 根据 rowid - 相似度 元组构成的 list 提取数据库文件对应行与标注对应相似度,合在以 dataframe 形式返回 + """ + db_filepath = os.path.join(self.db_path, db_filename) + db_filepath = self.get_temp_dbfilepath(db_filepath) + conn = sqlite3.connect(db_filepath) + rowid_list = [tuple[0] for tuple in rowid_probs_list] + probs_list = [tuple[1] for tuple in rowid_probs_list] + rowid_str = ','.join(map(str, rowid_list)) # 将 rowid 列表转换为逗号分隔的字符串 + + # 构建SQL查询语句 + query = f"SELECT * FROM video_text WHERE rowid IN ({rowid_str})" + result_df = pd.read_sql_query(query, conn) + conn.close() + + result_df["probs"] = probs_list + return result_df + # 列出所有数据 def db_print_all_data(self): print("dbManager: List all data in all databases") @@ -661,7 +712,7 @@ def get_temp_dbfilepath(self, db_filepath): maintaining = os.path.isfile(config.maintain_lock_path) - if not db_filename.endswith("_TEMP_READ.db"): + if not "_TEMP_READ" in db_filename: db_filename_temp = os.path.splitext(db_filename)[0] + "_TEMP_READ.db" # 创建临时文件名 filepath_temp_read = os.path.join(self.db_path, db_filename_temp) # 创建读取的临时路径 if os.path.exists(filepath_temp_read): # 检测是否已存在临时数据库 diff --git a/windrecorder/file_utils.py b/windrecorder/file_utils.py index 9d9e1550..7e7d7400 100644 --- a/windrecorder/file_utils.py +++ b/windrecorder/file_utils.py @@ -1,3 +1,4 @@ +import json import os import shutil import time @@ -35,24 +36,16 @@ def ensure_dir(folder_name): print(f"files: folder existed:{folder_name}") -# 将数据库的视频名加上-OCRED标志,使之能正常读取到 -def add_OCRED_suffix(video_name): - video_name = video_name.replace("-INDEX", "") - vidname = os.path.splitext(video_name)[0] + "-OCRED" + os.path.splitext(video_name)[1] - return vidname - - -# 将数据库的视频名加上-COMPRESS-OCRED标志,使之能正常读取到 -def add_COMPRESS_OCRED_suffix(video_name): - vidname = os.path.splitext(video_name)[0] + "-COMPRESS-OCRED" + os.path.splitext(video_name)[1] - return vidname - - # 输入一个视频文件名,返回其%Y-%m的年月信息作为子文件夹 def convert_vid_filename_as_YYYY_MM(vid_filename): return vid_filename[:7] +# 输入一个视频文件名,返回其完整的相对路径 +def convert_vid_filename_as_vid_filepath(vid_filename): + return os.path.join(config.record_videos_dir, convert_vid_filename_as_YYYY_MM(vid_filename), vid_filename) + + # 查询videos文件夹下的文件数量、未被ocr的文件数量 def get_videos_and_ocred_videos_count(folder_path): count = 0 @@ -61,8 +54,8 @@ def get_videos_and_ocred_videos_count(folder_path): for root, dirs, files in os.walk(folder_path): for file in files: count += 1 - if not file.split(".")[0].endswith("-OCRED"): - if not file.split(".")[0].endswith("-ERROR"): + if "-OCRED" not in file.split(".")[0]: + if "-ERROR" not in file.split(".")[0]: nocred_count += 1 return count, nocred_count @@ -82,16 +75,16 @@ def find_filename_in_dir(dir, search_str): # 检查视频文件是否存在 def check_video_exist_in_videos_dir(video_name): - videofile_path_month_dir = convert_vid_filename_as_YYYY_MM(video_name) - video_path = os.path.join(config.record_videos_dir, videofile_path_month_dir, video_name) - ocred_video_name = os.path.splitext(video_name)[0] + "-OCRED" + os.path.splitext(video_name)[1] - ocred_path = os.path.join(config.record_videos_dir, videofile_path_month_dir, ocred_video_name) - - if os.path.exists(video_path): - return video_name - elif os.path.exists(ocred_path): - return ocred_video_name - else: + try: + exist_videofiles_list = os.listdir(os.path.join(config.record_videos_dir, convert_vid_filename_as_YYYY_MM(video_name))) + video_filename_list = utils.find_strings_list_with_substring( + exist_videofiles_list, video_name.split(".")[0] + ) # 获取文件夹列表中对应文件名 + if video_filename_list: + return video_filename_list[0] + else: + return None + except FileNotFoundError: return None @@ -159,11 +152,16 @@ def get_file_path_list_first_level(dir): return file_names +# 取得文件夹下的第一级文件夹列表 +def get_file_dir_list_first_level(dir): + return [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] + + # 根据已有的文件列表,返回指定时间段的视频文件夹内、已索引了的视频路径列表(包括未压缩的与已压缩的) def get_videofile_path_list_by_time_range(filepath_list, start_datetime=None, end_datetime=None): filepath_list_daterange = [] for filepath in filepath_list: - if filepath.endswith("-OCRED.mp4"): + if "-OCRED.mp4" in filepath: if start_datetime is None or end_datetime is None: # 如果不指定时间段,返回所有结果 filepath_list_daterange.append(filepath) else: @@ -225,3 +223,17 @@ def read_dataframe_from_path(file_path="cache/temp.csv"): dataframe = pd.read_csv(file_path) # 使用read_csv()方法读取CSV文件(可根据文件格式选择对应的读取方法) return dataframe + + +# 读取 extension 文件夹下所有插件名与对应 meta info +def get_extension(extension_filepath="extension"): + dir_list = get_file_dir_list_first_level(extension_filepath) + extension_dict = {} + for dir in dir_list: + try: + with open(f"{extension_filepath}\\{dir}\\meta.json", encoding="utf-8") as file: + data = json.load(file) + extension_dict[data["extension_name"]] = data + except Exception as e: + print(e) + return extension_dict diff --git a/windrecorder/img_embed_manager.py b/windrecorder/img_embed_manager.py new file mode 100644 index 00000000..e4607705 --- /dev/null +++ b/windrecorder/img_embed_manager.py @@ -0,0 +1,330 @@ +# 用来将图像转换为 embedding +# 处理 faiss 数据库事务(建立、添加索引、搜索) +# 处理向量搜索与 sqlite 数据库召回 + +# vdb: VectorDatabase + +# import 前确保 config.img_embed_module_install is True +# if config.img_embed_module_install: +# try: +# from windrecorder import img_embed_manager +# except ModuleNotFoundError: +# config.set_and_save_config("img_embed_module_install", False) + +import os +import shutil +import datetime +import math + +import faiss +import numpy as np +import pandas as pd +import torch +import uform +from PIL import Image +from tqdm import tqdm + +from windrecorder import file_utils, utils +from windrecorder.config import config +from windrecorder.db_manager import db_manager +from windrecorder.ocr_manager import extract_iframe + +DEBUG_MODULE_NAME = "img_embed_manager: " + +is_cuda_available = torch.cuda.is_available() +device = torch.device("cuda" if is_cuda_available else "cpu") + +def get_model(mode="cpu"): + """ + 加载模型 + """ + model = uform.get_model("unum-cloud/uform-vl-multilingual-v2") + if mode == "cpu": + print(f"{DEBUG_MODULE_NAME} emb run on cpu.") + if mode == "cuda": + print(f"{DEBUG_MODULE_NAME} emb run on cuda.") + if is_cuda_available: + model.to(device=device) + else: + print(f"{DEBUG_MODULE_NAME} cude not available, emb run on cpu.") + + return model + + +def embed_img(model: uform.models.VLM, img_filepath): + """ + 将图像转为 embedding vector + """ + image = Image.open(img_filepath) + image_data = model.preprocess_image(image) + if is_cuda_available: + image_features, image_embedding = model.encode_image(image_data.to(device=device), return_features=True) + else: + image_features, image_embedding = model.encode_image(image_data, return_features=True) + return image_embedding + + +def embed_text(model: uform.models.VLM, text_query): + """ + 将文本转为 embedding vector + 注意:model 必须运行在 cpu 模式下 + """ + # 对文本进行编码 + text_data = model.preprocess_text(text_query) + text_features, text_embedding = model.encode_text(text_data, return_features=True) + + # 预处理张量 + text_np = text_embedding.detach().cpu().numpy() + text_np = np.float32(text_np) + faiss.normalize_L2(text_np) + return text_np + + +def get_vdb_filename_via_video_filename(video_filename): + """ + 根据视频名获得 vdb 本地数据库文件名 + 构成:username_YYYY-MM_imgemb.index + """ + return f"{config.user_name}_{file_utils.convert_vid_filename_as_YYYY_MM(video_filename)}_imgemb.index" + + +class VectorDatabase: + """ + 向量数据库事务 + 以 IndexIDMap 存储,对应关系为 向量 - sqlite 的 ROWID + """ + + def __init__(self, vdb_filename, db_dir=config.vdb_img_path, dimension=256): # uform 使用 256d 向量 + """ + 初始化新建/载入数据库 + + :param vdb_filename, 向量数据库名字 + :param db_dir, 向量数据库路径 + """ + self.dimension = dimension + self.vdb_filepath = os.path.join(db_dir, vdb_filename) + self.all_ids_list = [] + file_utils.ensure_dir(db_dir) + if os.path.exists(self.vdb_filepath): + self.index = faiss.read_index(self.vdb_filepath) + self.all_ids_list = faiss.vector_to_array(self.index.id_map).tolist() # 获得向量数据库中已有 ROWID 列表,以供写入时比对 + else: + self.index = faiss.IndexIDMap(faiss.IndexFlatL2(self.dimension)) + + def add_vector(self, vector, rowid: int): + """ + 添加向量到 index + + :param vector: 图像 embedding 后的向量 + :param rowid: sqlite 对应的 ROWID + """ + vector = vector.detach().cpu().numpy() # 转换为numpy数组 + vector = np.float32(vector) # 转换为float32类型的numpy数组 + faiss.normalize_L2(vector) # 规范化向量,避免在搜索时出现错误的结果 + + if rowid in self.all_ids_list: # 如果 rowid 已经存在于向量数据库,删除后再更新 + self.index.remove_ids(np.array([rowid])) + self.index.add_with_ids(vector, np.array([rowid])) # 踩坑:使用faiss来管理就好,先用list/dict缓存再集中写入的思路会OOM + + def search_vector(self, vector, k=20): + """在数据库中查询最近的k个向量,返回对应 (rowid, 相似度) 列表""" + probs, indices = self.index.search(vector, k) + return [(i, probs[0][j]) for j, i in enumerate(indices[0])] + + def save_to_file(self): + """将向量数据库写入本地文件""" + faiss.write_index(self.index, self.vdb_filepath) + self.all_ids_list = faiss.vector_to_array(self.index.id_map).tolist() # 更新 ROWID 列表 + + +def find_closest_iframe_img_dict_item(target: str, img_dict: dict, threshold=3): + """ + 寻找 dict {sqlite_ROWID:图像文件名} 中最邻近输入图像名的一项 + 如输入 "123.jpg",返回字典中最接近的 "125.jpg" + """ + closest_item = None + min_difference = float('inf') + + for key, value in img_dict.items(): + difference = abs(int(value.split(".")[0]) - int(target.split(".")[0])) + if difference <= threshold and difference < min_difference: + closest_item = value + min_difference = difference + + return closest_item + + +def embed_img_in_iframe_by_rowid_dict(model: uform.models.VLM, img_dict: dict, img_dir_filepath, vdb: VectorDatabase): + """ + 流程:根据 dict {sqlite_ROWID:图像文件名} 对应关系, + 将(i_frame 临时)文件夹中的对应图像转为对应的 embedding 并写入 vdb.index + """ + for rowid, img_filename in img_dict.items(): + print(f"{DEBUG_MODULE_NAME} Embedding {rowid=}, {img_filename=}") + img_filepath = os.path.join(img_dir_filepath, img_filename) + if not os.path.exists(img_filepath): + # 提取的图像列表有时出于换了提取iframe方式、cv可能的随机性等缘故,可能无法保证与db过去记录的完全一致,在 embedding 时有则 embed,无则寻找最近的阈值、再无则跳过。但考虑到相似图像仍会出现在附近时间范围,结果应尚可。 + closest_img_filename = find_closest_iframe_img_dict_item(target=img_filename, img_dict=img_dict) + if closest_img_filename is None: + print(f"{img_filepath} closest item not found, skipped.") + continue + else: + img_filepath = os.path.join(img_dir_filepath, closest_img_filename) + if not os.path.exists(img_filepath): + print(f"{img_filepath} not existed, skipped.") + continue + print(f"{img_filepath} replaced.") + vdb.add_vector(vector=embed_img(model, img_filepath), rowid=rowid) + + vdb.save_to_file() + + +def embed_vid_file(model: uform.models.VLM, vdb: VectorDatabase, vid_file_name, video_saved_dir=config.record_videos_dir, iframe_path=config.iframe_dir): + """ + 流程:输入一个视频文件路径,根据 sqlite 数据库,获得 dict {sqlite_ROWID:图像文件名} + 建议用 try 调用,避免因索引数据可能不全报错而阻塞。 + + param: vid_file_name, 视频文件名,看起来像"2023-10-01_12-04-28-OCRED.mp4" + """ + vid_filepath = os.path.join(video_saved_dir, file_utils.convert_vid_filename_as_YYYY_MM(vid_file_name), vid_file_name) + + # 获取视频名在 sqlite db 中的对应 iframe cnt index + img_db_recorded_dict = {} + df_video_related = db_manager.db_get_row_from_vid_filename(vid_file_name) + for index, row in df_video_related.iterrows(): + img_db_recorded_dict[row['rowid']] = row['picturefile_name'] + if len(img_db_recorded_dict) == 0: + return False + + # 判断是否存在图片缓存文件,若无则提取 + iframe_sub_path = os.path.join(iframe_path, os.path.splitext(vid_file_name)[0][:19]) # FIXME 硬编码取了文件名的日期范围 + iframe_img_list = [] + if os.path.exists(iframe_sub_path): + iframe_img_list = os.listdir(iframe_sub_path) + + if not all(element in iframe_img_list for element in list(img_db_recorded_dict.values())): # 已有缓存图像文件是否包含了sqlite db中记录的图像文件,否则重新提取 + # 清理缓存 + try: + shutil.rmtree(iframe_sub_path) + except FileNotFoundError: + pass + file_utils.ensure_dir(iframe_sub_path) + extract_iframe(video_file=vid_filepath, iframe_path=iframe_sub_path) + # FIXME 提取后需要对图像进行遮减处理? + + # 因为是原子操作,不用添加回滚机制,完成了所有的索引才写入 faiss index db file + embed_img_in_iframe_by_rowid_dict(model=model, img_dict=img_db_recorded_dict, img_dir_filepath=iframe_sub_path, vdb=vdb) + # 清理图像缓存 + try: + shutil.rmtree(iframe_sub_path) + except FileNotFoundError: + pass + + os.rename(vid_filepath, vid_filepath.replace("-OCRED", "-IMGEMB-OCRED")) + return True + + +def all_videofile_do_img_embedding_routine(video_queue_count = 14): + """ + 流程:处理未嵌入的视频,提取嵌入视频 iframe embedding 到向量数据库。默认计算时间控制在 30 分钟左右内(即索引 12~15 个视频) + """ + video_process_count = 0 + + model = get_model(mode="cuda") + + video_dirs = os.listdir(config.record_videos_dir)[::-1] # 倒序列表,以先索引较新的视频 + for video_dir in tqdm(video_dirs): + videos_names = os.listdir(os.path.join(config.record_videos_dir, video_dir))[::-1] + for video_name in tqdm(videos_names): + print(f"{DEBUG_MODULE_NAME} img_embed({video_process_count}/{video_queue_count}): embedding {video_dir}, {video_name}") + # 确认视频已被 OCR 索引,且没含有 -IMGEMB 标签 + # 如果视频被压缩了,目前跳过;TODO 未来如果使用时间戳手段提取、或者可以接受iframe提取的时域误差,则不需要这条规则了 + if not "-OCRED" in video_name: + continue + if "-IMGEMB" in video_name or "-COMPRESS" in video_name: + continue + vdb = VectorDatabase(vdb_filename=get_vdb_filename_via_video_filename(video_name)) + embed_vid_file(model=model, vdb=vdb, vid_file_name=video_name) + video_process_count += 1 + if video_process_count > video_queue_count: + break + if video_process_count > video_queue_count: + break + +def get_vdbs_filename_via_time_range(start_datetime:datetime.datetime, end_datetime:datetime.datetime): + """ + 根据输入输出时间范围获取 vdb filename list + """ + start_datetime = utils.set_full_datetime_to_YYYY_MM(start_datetime) + end_datetime = utils.set_full_datetime_to_YYYY_MM(end_datetime) + + file_utils.ensure_dir(config.vdb_img_path) + vdb_filename_list = file_utils.get_file_path_list_first_level(config.vdb_img_path) + vdb_filename_list = [item for item in vdb_filename_list if (item.startswith(config.user_name) and item.endswith("_imgemb.index"))] # 去除非当前用户、且非 vdb 的项 + if len(vdb_filename_list) == 0: + return None + + vdb_filename_list_datetime = [utils.extract_date_from_db_filename(file) for file in vdb_filename_list] + vdb_filename_list_datetime_dict = dict(sorted(zip(vdb_filename_list, vdb_filename_list_datetime), key=lambda x: x[1])) + result = [] + for key, value in vdb_filename_list_datetime_dict.items(): + if start_datetime <= value <= end_datetime: + result.append(key) + return result + + +def query_text_in_img_vdbs(model: uform.models.VLM, text_query, start_datetime, end_datetime): + """ + 流程:在 vdb list 中搜索文本嵌入,提取对应 sqlite rowid 项,合并排序返回 df + model 需要运行在 cpu mode 下 + """ + vdb_filenames = get_vdbs_filename_via_time_range(start_datetime=start_datetime, end_datetime=end_datetime) + print(f"{DEBUG_MODULE_NAME} quering {text_query}, {start_datetime=}, {end_datetime=}, {vdb_filenames=}") + if vdb_filenames is None: + return pd.DataFrame(), 0, 0 + text_vector = embed_text(model=model, text_query=text_query) + + df_list = [] + for vdb_filename in vdb_filenames: + print(f'{DEBUG_MODULE_NAME} recalling {vdb_filename}') + vdb = VectorDatabase(vdb_filename=vdb_filename) + res_tuple_list = vdb.search_vector(text_vector, k=config.img_embed_search_recall_result_per_db) + res_tuple_list = [t for t in res_tuple_list if t[0] != -1] # 相似度结果不足时,会以 -1 的 index 填充,在进 sqlite 搜索前需过滤 + + len_prefix = len(config.user_name)+1 + db_filename = f"{vdb_filename[0:len_prefix+7]}_wind.db" + df = db_manager.db_get_rowid_and_similar_tuple_list_rows(rowid_probs_list=res_tuple_list, db_filename=db_filename) + df_list.append(df) + + merged_df = pd.concat(df_list) + sorted_df = merged_df.sort_values(by='probs', ascending=True) + sorted_df = sorted_df.reset_index(drop=True) + row_count = len(sorted_df) + page_count_all = int(math.ceil(int(row_count) / int(config.max_page_result))) + return sorted_df, row_count, page_count_all + + +# 测试用例 +if __name__ == "__main__": + # 1. 准备一组测试图片放在 i_frames 目录下 + img_dataset_filepath = "i_frames" + file_names = os.listdir(img_dataset_filepath) + test_dataset_dict = {} + for item in file_names: + test_dataset_dict[len(test_dataset_dict)] = item + + # 2. 将图片嵌入为向量 + model = get_model(mode="cuda") + vdb = VectorDatabase(vdb_filename="test.index", db_dir="") + vector = embed_img_in_iframe_by_rowid_dict( + model=model, img_dict=test_dataset_dict, img_dir_filepath=img_dataset_filepath, vdb=vdb + ) + + # 3. 使用语义查询 ROWID / 图片 + model = get_model("cpu") + text_query_vector = embed_text(model=model, text_query="棕色头发的人") + res = vdb.search_vector(vector=text_query_vector) + res_parse = [] + for item in res: + res_parse.append((test_dataset_dict[item[0]], item[1])) + print(f"{res_parse=}") diff --git a/windrecorder/ocr_manager.py b/windrecorder/ocr_manager.py index e1a4b4fd..b7bf1979 100644 --- a/windrecorder/ocr_manager.py +++ b/windrecorder/ocr_manager.py @@ -24,17 +24,7 @@ # ocr_short_side = int(config.ocr_short_size) -# 检查文件是否被占用 -# def is_file_in_use(file_path): -# try: -# fd = os.open(file_path, os.O_RDWR|os.O_EXCL) -# os.close(fd) -# return False -# except OSError: -# return True - - -# 使用 win32file 的判断实现 +# 使用 win32file 的判断实现,检查文件是否被占用 def is_file_in_use(file_path): try: vHandle = win32file.CreateFile( @@ -324,7 +314,7 @@ def ocr_core_logic(file_path, vid_file_name, iframe_path): ] dataframe_all = pd.DataFrame(columns=dataframe_column_names) - # todo: os.listdir 应该进行正确的数字排序、以确保是按视频顺序索引的 + # TODO: os.listdir 应该进行正确的数字排序、以确保是按视频顺序索引的 for img_file_name in os.listdir(iframe_path): print("_____________________") print("processing IMG - OCR:" + img_file_name) @@ -435,7 +425,8 @@ def ocr_process_single_video(video_path, vid_file_name, iframe_path, optimize_fo print(f"ocr_manager: --------- {file_path} Finished! ---------") finally: # 清理文件 - shutil.rmtree(iframe_sub_path) + # shutil.rmtree(iframe_sub_path) 先不清理文件,留给 img embed 流程继续使用,由它清理 + pass def convert_temp_optimize_vidfile_for_ocr(vid_filepath): @@ -473,7 +464,7 @@ def ocr_process_videos(video_path, iframe_path): print("processing VID:" + full_file_path) # 检查视频文件是否已被索引 - if not file.endswith(".mp4") or file.endswith("-OCRED.mp4") or file.endswith("-ERROR.mp4"): + if not file.endswith(".mp4") or "-OCRED" in file or "-ERROR" in file: continue # 判断文件是否正在被占用 @@ -529,7 +520,7 @@ def compress_outdated_videofiles(): if len(video_filepath_list_outdate) > 0: for item in video_filepath_list_outdate: - if not item.endswith("-COMPRESS-OCRED.mp4") and item.endswith("-OCRED.mp4"): + if not "-COMPRESS" in item and "-OCRED" in item: print(f"ocr_manager: compressing {item}") record.compress_video_resolution(item, config.video_compress_rate) send2trash(item) @@ -538,7 +529,7 @@ def compress_outdated_videofiles(): # 备份数据库 def backup_dbfile(db_filepath, keep_items_num=15, make_new_backup_timegap=datetime.timedelta(hours=8)): - if db_filepath.endswith("_TEMP_READ.db"): + if "_TEMP_READ" in db_filepath: return False db_backup_filepath = "cache\\db_backup" diff --git a/windrecorder/oneday.py b/windrecorder/oneday.py index a48602cd..913c1434 100644 --- a/windrecorder/oneday.py +++ b/windrecorder/oneday.py @@ -91,7 +91,6 @@ def get_day_statistic_chart_overview(self, df, start_dt, end_dt): df_C.loc[len(df_C)] = [step, len(filtered)] df_C["hour"] = df_C["hour"].round(1) - # df_C['hour'] = df_C['hour'].apply(int) return df_C @@ -139,15 +138,15 @@ def find_closest_video_by_database(self, df, time): def get_result_df_video_time(self, df, index): video_name = df.loc[index, "videofile_name"] video_search_result_timestamp = df.loc[index, "videofile_time"] - check_on_disk_path = file_utils.check_video_exist_in_videos_dir(video_name) - if check_on_disk_path is None: + video_filename = file_utils.check_video_exist_in_videos_dir(video_name) + if video_filename is None: # 磁盘上没有文件 return False, video_name, None else: # 磁盘上有视频文件 video_name_timestamp = utils.calc_vid_name_to_timestamp(video_name) local_video_timestamp = video_search_result_timestamp - video_name_timestamp - return True, check_on_disk_path, local_video_timestamp + return True, video_filename, local_video_timestamp # 生成当天时间线预览图 def generate_preview_timeline_img( diff --git a/windrecorder/ui/oneday.py b/windrecorder/ui/oneday.py index 8fb6deee..c06b1beb 100644 --- a/windrecorder/ui/oneday.py +++ b/windrecorder/ui/oneday.py @@ -228,14 +228,14 @@ def update_day_timeline_thumbnail(): get_generate_result = update_day_timeline_thumbnail() # 移除非今日的-today.png for filename in os.listdir(config.timeline_result_dir): - if filename.endswith("-today-.png") and filename != real_today_day_cloud_and_TL_img_name: + if "-today-" in filename and filename != real_today_day_cloud_and_TL_img_name: file_path = os.path.join(config.timeline_result_dir, filename) try: os.remove(file_path) print(f"webui: Deleted file: {file_path}") except Exception as e: print(f"webui: {e}") - elif current_day_TL_img_path.endswith("-today-.png"): + elif "-today-" in current_day_TL_img_path: # 如果已存在今日的,重新生成覆盖更新 if not file_utils.is_file_modified_recently(current_day_TL_img_path): # 如果修改日期超过30分钟则更新 @@ -404,7 +404,7 @@ def update_day_word_cloud(): update_day_word_cloud() # 移除非今日的-today.png for filename in os.listdir(config.wordcloud_result_dir): - if filename.endswith("-today-.png") and filename != real_today_day_cloud_and_TL_img_name: + if "-today-" in filename and filename != real_today_day_cloud_and_TL_img_name: file_path = os.path.join(config.wordcloud_result_dir, filename) os.remove(file_path) print(f"webui: Deleted file: {file_path}") diff --git a/windrecorder/ui/search.py b/windrecorder/ui/search.py index 118de388..6e426b13 100644 --- a/windrecorder/ui/search.py +++ b/windrecorder/ui/search.py @@ -13,17 +13,34 @@ from windrecorder.ui import components from windrecorder.utils import get_text as _t +if config.img_embed_module_install: + try: + from windrecorder.img_embed_manager import query_text_in_img_vdbs, get_model + except ModuleNotFoundError: + config.set_and_save_config("img_embed_module_install", False) + +# 使用 streamlit state 来进行通信 + def render(): search_col, video_col = st.columns([1, 2]) with search_col: - # 初始化一些全局状态 + # 初始化全局状态 + # 通用状态 if "db_global_search_result" not in st.session_state: st.session_state["db_global_search_result"] = pd.DataFrame() + if "page_index" not in st.session_state: + st.session_state.page_index = 1 if "max_page_count" not in st.session_state: st.session_state.max_page_count = 1 if "all_result_counts" not in st.session_state: st.session_state.all_result_counts = 1 + if "cache_videofile_ondisk_list" not in st.session_state: # 减少io查询,预拿视频文件列表供比对是否存在 + st.session_state.cache_videofile_ondisk_list = file_utils.get_file_path_list(config.record_videos_dir) + if "timeCost_globalSearch" not in st.session_state: # 统计搜索使用时长 + st.session_state.timeCost_globalSearch = 0 + + # OCR 文本搜索 if "search_content" not in st.session_state: st.session_state.search_content = "" if "search_content_exclude" not in st.session_state: @@ -34,75 +51,14 @@ def render(): st.session_state.search_date_range_in = datetime.datetime.today() - datetime.timedelta(seconds=86400) if "search_date_range_out" not in st.session_state: st.session_state.search_date_range_out = datetime.datetime.today() - if "cache_videofile_ondisk_list" not in st.session_state: # 减少io查询,预拿视频文件列表供比对是否存在 - st.session_state.cache_videofile_ondisk_list = file_utils.get_file_path_list(config.record_videos_dir) - if "timeCost_globalSearch" not in st.session_state: # 统计搜索使用时长 - st.session_state.timeCost_globalSearch = 0 - - # 获得全局搜索结果 - def do_global_keyword_search(): - # 如果搜索所需入参状态改变了,进行搜索 - if ( - st.session_state.search_content_lazy == st.session_state.search_content - and st.session_state.search_content_exclude_lazy == st.session_state.search_content_exclude - and st.session_state.search_date_range_in_lazy == st.session_state.search_date_range_in - and st.session_state.search_date_range_out_lazy == st.session_state.search_date_range_out - ): - return - - st.session_state.timeCost_globalSearch = time.time() # 预埋搜索用时 - - # 更新懒状态 - st.session_state.search_content_lazy = st.session_state.search_content - st.session_state.search_content_exclude_lazy = st.session_state.search_content_exclude - st.session_state.search_date_range_in_lazy = st.session_state.search_date_range_in - st.session_state.search_date_range_out_lazy = st.session_state.search_date_range_out - - # 重置每次进行新搜索需要重置的状态 - st.session_state.page_index = 1 - - # 进行搜索,取回结果 - ( - st.session_state.db_global_search_result, - st.session_state.all_result_counts, - st.session_state.max_page_count, - ) = db_manager.db_search_data( - st.session_state.search_content, - st.session_state.search_date_range_in, - st.session_state.search_date_range_out, - keyword_input_exclude=st.session_state.search_content_exclude, - ) - - st.session_state.timeCost_globalSearch = round(time.time() - st.session_state.timeCost_globalSearch, 5) # 回收搜索用时 - - title_col, random_word_btn_col = st.columns([10, 1]) - with title_col: - st.markdown(_t("gs_md_search_title")) - with random_word_btn_col: - if not wordcloud.check_if_word_lexicon_empty(): - if st.button("🎲", use_container_width=True, help=_t("gs_text_randomwalk")): - try: - st.session_state.search_content = utils.get_random_word_from_lexicon() - st.session_state.use_random_search = True - except Exception as e: - print("[Exception] gs_text_randomwalk:") - print(e) - st.session_state.search_content = "" - st.session_state.use_random_search = False - else: - st.session_state.use_random_search = False - st.empty() - - components.web_onboarding() # 初始化时间搜索范围组件(懒加载) if "search_latest_record_time_int" not in st.session_state: st.session_state["search_latest_record_time_int"] = db_manager.db_latest_record_time() if "search_earlist_record_time_int" not in st.session_state: st.session_state["search_earlist_record_time_int"] = db_manager.db_first_earliest_record_time() - - # 优化streamlit强加载机制导致的索引时间:改变了再重新搜索,而不是每次提交了更改都进行搜索 # 初始化懒状态 + # 优化streamlit强加载机制导致的索引时间:改变了再重新搜索,而不是每次提交了更改都进行搜索 if "search_content_lazy" not in st.session_state: st.session_state.search_content_lazy = "" if "search_content_exclude_lazy" not in st.session_state: @@ -120,49 +76,50 @@ def do_global_keyword_search(): - datetime.timedelta(seconds=86400) ) - keyword_col, exclude_col, date_range_col, page_col = st.columns([2, 1, 2, 1.5]) - with keyword_col: # 输入搜索关键词 - input_value = st.text_input(_t("text_search_keyword"), help=_t("gs_input_search_help")) - st.session_state.search_content = ( - st.session_state.search_content if st.session_state.use_random_search else input_value - ) - with exclude_col: # 排除关键词 - st.session_state.search_content_exclude = st.text_input( - _t("gs_input_exclude"), "", help=_t("gs_input_exclude_help") - ) - with date_range_col: # 选择时间范围 - try: - ( - st.session_state.search_date_range_in, - st.session_state.search_date_range_out, - ) = st.date_input( - _t("text_search_daterange"), - ( - datetime.datetime(1970, 1, 2) - + datetime.timedelta(seconds=st.session_state.search_earlist_record_time_int) - - datetime.timedelta(seconds=86400), - datetime.datetime(1970, 1, 2) - + datetime.timedelta(seconds=st.session_state.search_latest_record_time_int) - - datetime.timedelta(seconds=86400), - ), - format="YYYY-MM-DD", - ) - except Exception: - # 处理没选择完整选择时间段 - st.warning(_t("gs_text_pls_choose_full_date_range")) - - with page_col: - # 结果翻页器 - st.session_state.page_index = st.number_input( - _t("gs_input_result_page"), - min_value=1, - step=1, - max_value=st.session_state.max_page_count + 1, + def clean_lazy_state_after_change_search_method(): + """ + 在切换搜索方式后,清理之前搜索留下的 tab 下其他 UI 部分使用到的数据 + """ + st.session_state.search_content = "" + + # 绘制抬头部分的 UI + search_method_list = [_t("gs_option_ocr_text_search"), _t("gs_option_img_emb_search")] + title_col, search_method = st.columns([5, 1.5]) + with title_col: + st.markdown(_t("gs_md_search_title")) + with search_method: + st.session_state.search_method_selected = st.selectbox( + "Search Method", + search_method_list, + label_visibility="collapsed", + on_change=clean_lazy_state_after_change_search_method, ) + # with random_word_btn_col: + # # 暂时移除“随便走走”功能 + # if st.toggle("🎲", help=_t("gs_text_randomwalk"), disabled=wordcloud.check_if_word_lexicon_empty()): + # try: + # st.session_state.search_content = utils.get_random_word_from_lexicon() + # st.session_state.use_random_search = True + # except Exception as e: + # print("[Exception] gs_text_randomwalk:") + # print(e) + # st.session_state.search_content = "" + # st.session_state.use_random_search = False + # else: + # st.session_state.use_random_search = False - do_global_keyword_search() + components.web_onboarding() + + match search_method_list.index(st.session_state.search_method_selected): + case 0: + ui_ocr_text_search() + case 1: + if config.enable_img_embed_search and config.img_embed_module_install: + ui_vector_img_search() + else: + st.warning("未启用或未安装图像语义检索模块,请前往设置页启用。若设置中无相关选项,请先安装图像语义模块。安装脚本位于 Windrecorder 目录下:extension\\install_img_embedding_module\\install_img_embedding_module.bat") - # 进行搜索 + # 搜索结果表格的 UI if not len(st.session_state.search_content) == 0: df = db_manager.db_search_data_page_turner(st.session_state.db_global_search_result, st.session_state.page_index) @@ -198,23 +155,171 @@ def do_global_keyword_search(): st.info(_t("gs_text_intro")) # 搜索内容为空时显示指引 with video_col: - # 选择视频 + # 右侧选择展示视频的 UI if not len(st.session_state.search_content) == 0: show_and_locate_video_timestamp_by_df(df, result_choose_num) else: st.empty() +# 搜索页的 UI 通用输入组件 +def ui_component_date_range_selector(): + """ + 组件-日期选择器 + """ + try: + ( + st.session_state.search_date_range_in, + st.session_state.search_date_range_out, + ) = st.date_input( + _t("text_search_daterange"), + ( + datetime.datetime(1970, 1, 2) + + datetime.timedelta(seconds=st.session_state.search_earlist_record_time_int) + - datetime.timedelta(seconds=86400), + datetime.datetime(1970, 1, 2) + + datetime.timedelta(seconds=st.session_state.search_latest_record_time_int) + - datetime.timedelta(seconds=86400), + ), + format="YYYY-MM-DD", + ) + except Exception: + # 处理没选择完整选择时间段 + st.warning(_t("gs_text_pls_choose_full_date_range")) + + +def ui_component_pagination(): + """ + 组件-搜索结果翻页器 + """ + st.session_state.page_index = st.number_input( + _t("gs_input_result_page"), + min_value=1, + step=1, + max_value=st.session_state.max_page_count + 1, + ) + + +# UI 布局 +def ui_ocr_text_search(): + """ + 使用文本进行全局 OCR 搜索 + """ + + # 获得全局搜索结果 + def do_global_keyword_search(): + # 如果搜索所需入参状态改变了,进行搜索 + if ( + st.session_state.search_content_lazy == st.session_state.search_content + and st.session_state.search_content_exclude_lazy == st.session_state.search_content_exclude + and st.session_state.search_date_range_in_lazy == st.session_state.search_date_range_in + and st.session_state.search_date_range_out_lazy == st.session_state.search_date_range_out + or len(st.session_state.search_content) == 0 + ): + return + + # 更新懒状态 + st.session_state.search_content_lazy = st.session_state.search_content + st.session_state.search_content_exclude_lazy = st.session_state.search_content_exclude + st.session_state.search_date_range_in_lazy = st.session_state.search_date_range_in + st.session_state.search_date_range_out_lazy = st.session_state.search_date_range_out + + # 重置每次进行新搜索需要重置的状态 + st.session_state.page_index = 1 + + with st.spinner(_t("gs_text_searching")): + st.session_state.timeCost_globalSearch = time.time() # 预埋搜索用时 + # 进行搜索,取回结果 + ( + st.session_state.db_global_search_result, + st.session_state.all_result_counts, + st.session_state.max_page_count, + ) = db_manager.db_search_data( + st.session_state.search_content, + st.session_state.search_date_range_in, + st.session_state.search_date_range_out, + keyword_input_exclude=st.session_state.search_content_exclude, + ) + st.session_state.timeCost_globalSearch = round(time.time() - st.session_state.timeCost_globalSearch, 5) # 回收搜索用时 + + # 文本搜索 UI + col_keyword, col_exclude, col_date_range, col_page = st.columns([2, 1, 2, 1.5]) + with col_keyword: # 输入搜索关键词 + input_value = st.text_input(_t("text_search_keyword"), help=_t("gs_input_search_help")) + st.session_state.search_content = ( + st.session_state.search_content if st.session_state.use_random_search else input_value + ) + with col_exclude: # 排除关键词 + st.session_state.search_content_exclude = st.text_input(_t("gs_input_exclude"), "", help=_t("gs_input_exclude_help")) + with col_date_range: # 选择时间范围 + ui_component_date_range_selector() + with col_page: # 搜索结果翻页 + ui_component_pagination() + + do_global_keyword_search() + + +def ui_vector_img_search(): + """ + 图像语义搜索:使用自然语言匹配检索图像 + """ + # 预加载文本嵌入模型,这样每次搜索就不需要重复加载、提升时间 + if "text_embed_model" not in st.session_state: + with st.spinner(_t("gs_text_loading_text_embed_model")): + st.session_state["text_embed_model"] = get_model(mode='cpu') + + # 获得全局图像语义搜索结果 + def do_global_vector_img_search(): + # 如果搜索所需入参状态改变了,进行搜索 + if ( + st.session_state.search_content_lazy == st.session_state.search_content + and st.session_state.search_date_range_in_lazy == st.session_state.search_date_range_in + and st.session_state.search_date_range_out_lazy == st.session_state.search_date_range_out + or len(st.session_state.search_content) == 0 + ): + return + + # 更新懒状态 + st.session_state.search_content_lazy = st.session_state.search_content + st.session_state.search_date_range_in_lazy = st.session_state.search_date_range_in + st.session_state.search_date_range_out_lazy = st.session_state.search_date_range_out + + # 重置每次进行新搜索需要重置的状态 + st.session_state.page_index = 1 + + with st.spinner(_t("gs_text_searching")): + st.session_state.timeCost_globalSearch = time.time() # 预埋搜索用时 + # 进行搜索,取回结果 + ( + st.session_state.db_global_search_result, + st.session_state.all_result_counts, + st.session_state.max_page_count, + ) = query_text_in_img_vdbs( + model=st.session_state.text_embed_model, + text_query=st.session_state.search_content, + start_datetime=st.session_state.search_date_range_in, + end_datetime=st.session_state.search_date_range_out, + ) + st.session_state.timeCost_globalSearch = round(time.time() - st.session_state.timeCost_globalSearch, 5) # 回收搜索用时 + + # 图像语义搜索 UI + col_text_query_content, col_date_range, col_page = st.columns([3, 2, 1.5]) + with col_text_query_content: # 用自然语言描述图像 + st.session_state.search_content = st.text_input(_t("gs_input_img_emb_search"), help=_t("gs_text_img_emb_help")) + with col_date_range: # 选择时间范围 + ui_component_date_range_selector() + with col_page: # 搜索结果翻页 + ui_component_pagination() + + do_global_vector_img_search() + + # 选择播放视频的行数 的滑杆组件 def result_selector(df, result_cnt): if result_cnt == 1: # 如果结果只有一个,直接显示结果而不显示滑杆 return 0 elif result_cnt > 1: - # shape是一个元组,索引0对应行数,索引1对应列数。 - # df.shape[0] - # print("webui: total_raw:" + str(total_raw)) - slider_min_num_display = df.index.min() slider_max_num_display = df.index.max() select_num = slider_min_num_display @@ -250,37 +355,18 @@ def show_and_locate_video_timestamp_by_df(df, num): if len(df) == 0: return - # todo 获取有多少行结果 对num进行合法性判断 - videofile_path_month_dir = file_utils.convert_vid_filename_as_YYYY_MM(df.iloc[num]["videofile_name"]) # 获取对应的日期目录 - videofile_path = os.path.join( - config.record_videos_dir, - videofile_path_month_dir, - file_utils.add_OCRED_suffix(df.iloc[num]["videofile_name"]), - ) - videofile_path_COMPRESS = os.path.join( - config.record_videos_dir, - videofile_path_month_dir, - file_utils.add_COMPRESS_OCRED_suffix(df.iloc[num]["videofile_name"]), - ) - print("webui: videofile_path: " + videofile_path) - vid_timestamp = utils.calc_vid_inside_time(df, num) - print("webui: vid_timestamp: " + str(vid_timestamp)) - - st.session_state.vid_vid_timestamp = 0 - st.session_state.vid_vid_timestamp = vid_timestamp - # st.session_state.vid_vid_timestamp - # 判断视频文件是否存在 - if os.path.isfile(videofile_path): # 是否存在未压缩的 - video_file = open(videofile_path, "rb") - video_bytes = video_file.read() - with st.empty(): - st.video(video_bytes, start_time=st.session_state.vid_vid_timestamp) - st.markdown(f"`{videofile_path}`") - elif os.path.isfile(videofile_path_COMPRESS): # 是否存在已压缩的 - video_file = open(videofile_path_COMPRESS, "rb") + # TODO 获取有多少行结果 对num进行合法性判断 + df_videofile_name = df.iloc[num]["videofile_name"] + video_filename = file_utils.check_video_exist_in_videos_dir(df_videofile_name) + if video_filename: + vid_timestamp = utils.calc_vid_inside_time(df, num) + st.session_state.vid_vid_timestamp = vid_timestamp + + video_filepath = file_utils.convert_vid_filename_as_vid_filepath(video_filename) + video_file = open(video_filepath, "rb") video_bytes = video_file.read() with st.empty(): st.video(video_bytes, start_time=st.session_state.vid_vid_timestamp) - st.markdown(f"`{videofile_path_COMPRESS}`") + st.markdown(f"`{video_filepath}`") else: - st.warning(f"Video File **{videofile_path}** not on disk.", icon="🦫") + st.warning(_t("gs_text_video_file_not_on_disk").format(df_videofile_name=df_videofile_name), icon="🦫") \ No newline at end of file diff --git a/windrecorder/ui/setting.py b/windrecorder/ui/setting.py index ef9e985d..bd421ed8 100644 --- a/windrecorder/ui/setting.py +++ b/windrecorder/ui/setting.py @@ -13,6 +13,12 @@ from windrecorder.config import config from windrecorder.utils import get_text as _t +if config.img_embed_module_install: + try: + from windrecorder import img_embed_manager + except ModuleNotFoundError: + config.set_and_save_config("img_embed_module_install", False) + lang_map = utils.d_lang["lang_map"] @@ -28,6 +34,13 @@ def set_config_lang(lang_name): def render(): + # 初始化全局状态 + if "is_cuda_available" not in st.session_state: + if config.img_embed_module_install: + st.session_state.is_cuda_available = img_embed_manager.is_cuda_available + else: + st.session_state.is_cuda_available = False + st.markdown(_t("set_md_title")) col1b, col2b, col3b = st.columns([1, 0.5, 1.5]) @@ -85,6 +98,18 @@ def update_database_clicked(): help=_t("set_input_exclude_word_help"), ) + if config.img_embed_module_install: + option_enable_img_embed_search = st.checkbox( + _t("set_checkbox_enable_img_emb"), + help=_t("set_text_enable_img_emb_help"), + value=config.enable_img_embed_search, + ) + else: + option_enable_img_embed_search = False + + if not st.session_state.is_cuda_available and option_enable_img_embed_search: + st.warning(_t("set_text_img_emb_not_suppport_cuda")) + # 更新数据库按钮 if update_db_btn: try: @@ -111,6 +136,8 @@ def update_database_clicked(): st.button(_t("set_btn_got_it"), key="setting_reset") st.divider() + + # OCR 时忽略屏幕四边的区域范围 col1pb, col2pb = st.columns([1, 1]) with col1pb: st.markdown(_t("set_md_ocr_ignore_area"), help=_t("set_md_ocr_ignore_area_help")) @@ -196,10 +223,11 @@ def update_database_clicked(): with col1_ui2: config_max_search_result_num = st.number_input( _t("set_input_max_num_search_page"), - min_value=1, + min_value=5, max_value=500, value=config.max_page_result, ) + # 「一天之时」时间轴的横向缩略图数量 with col2_ui2: config_oneday_timeline_num = st.number_input( _t("set_input_oneday_timeline_thumbnail_num"), @@ -213,6 +241,20 @@ def update_database_clicked(): f'🔒 {_t("set_pwd_text")}', value=config.webui_access_password_md5, help=_t("set_pwd_help"), type="password" ) + # imgemb 选项 + if config.img_embed_module_install: + col1_imgemb, col2_imgemb = st.columns([1, 1]) + with col1_imgemb: + config_img_embed_search_recall_result_per_db = st.number_input( + _t("set_input_img_emb_max_recall_count"), + min_value=5, + max_value=100, + value=config.img_embed_search_recall_result_per_db, + help=_t("set_text_help_img_emb_max_recall_count"), + ) + with col2_imgemb: + st.empty() + # 选择语言 lang_selection = list(lang_map.values()) lang_index = lang_selection.index(lang_map[config.lang]) @@ -235,6 +277,7 @@ def update_database_clicked(): # config.set_and_save_config("ocr_engine", config_ocr_engine) config.set_and_save_config("ocr_lang", config_ocr_lang) config.set_and_save_config("exclude_words", utils.string_to_list(exclude_words)) + config.set_and_save_config("enable_img_embed_search", option_enable_img_embed_search) config.set_and_save_config("show_oneday_wordcloud", option_show_oneday_wordcloud) config.set_and_save_config("show_oneday_left_side_stat", option_show_oneday_wintitle) config.set_and_save_config("use_similar_ch_char_to_search", config_use_similar_ch_char_to_search) diff --git a/windrecorder/utils.py b/windrecorder/utils.py index cb170404..eef152e9 100644 --- a/windrecorder/utils.py +++ b/windrecorder/utils.py @@ -48,6 +48,25 @@ def get_screen_resolution(): return pyautogui.size() +# 获取视频文件信息 +def get_vidfilepath_info(vid_filepath) -> dict: + """ + 获取视频文件信息 + + 常用: + - duration(持续时长 秒) + - width height + + 当获取失败时,可能抛出错误:CalledProcessError: Command 'ffprobe' returned non-zero exit status 1. + """ + result = subprocess.check_output( + f'ffprobe -v quiet -show_streams -select_streams v:0 -of json "{vid_filepath}"', shell=True + ).decode() + + fields = json.loads(result)["streams"][0] + return fields + + # 将输入的文件( %Y-%m-%d_%H-%M-%S str)时间转为时间戳秒数 def date_to_seconds(date_str): # 这里我们先定义了时间格式,然后设置一个epoch基准时间为1970年1月1日。使用strptime()将输入的字符串解析为datetime对象,然后计算这个时间和epoch时间的时间差,转换为秒数返回。 @@ -200,7 +219,7 @@ def set_full_datetime_to_day_time(dt): # 将完整的datetime只保留年月的datetime def set_full_datetime_to_YYYY_MM(dt): - return dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0) + return datetime.datetime(year=dt.year, month=dt.month, day=1, hour=0, minute=0, second=0, microsecond=0) # 将完整的datetime只保留年月日的datetime @@ -267,10 +286,8 @@ def kill_recording(): # 通过数据库内项目计算视频对应时间戳 def calc_vid_inside_time(df, num): fulltime = df.iloc[num]["videofile_time"] - vidfilename = os.path.splitext(df.iloc[num]["videofile_name"])[0] + vidfilename = os.path.splitext(df.iloc[num]["videofile_name"])[0][:19] # 用记录时的总时间减去视频文件时间(开始记录的时间)即可得到相对的时间 - vidfilename = vidfilename.replace("-INDEX", "") - vidfilename = vidfilename.replace("-ERROR", "") vid_timestamp = fulltime - date_to_seconds(vidfilename) print(f"utils: video file fulltime:{fulltime}\n" f" vidfilename:{vidfilename}\n" f" vid_timestamp:{vid_timestamp}\n") return vid_timestamp @@ -561,8 +578,6 @@ def extract_date_from_db_filename(db_file_name, user_name=config.user_name): db_file_name = db_file_name[len(prefix) :] db_file_name = db_file_name[:7] - # if db_file_name.endswith(suffix): - # db_file_name = db_file_name[:-(len(suffix))] db_file_name_datetime = datetime.datetime.strptime(db_file_name, "%Y-%m") db_file_name_datetime = set_full_datetime_to_YYYY_MM(db_file_name_datetime) @@ -668,3 +683,12 @@ def get_process_id(process_name): if proc.info["name"] == process_name: return proc.info["pid"] return None + + +# 查找列表项中包含字符串的项 +def find_strings_list_with_substring(string_list, substring): + result = [] + for string in string_list: + if substring in string: + result.append(string) + return result