add img embedding

add natural language image search tweak manager running logic add: log improve img emb search load and ux add: lock during img embedding tweak onboard setting fix: handle error with no search results Update embedding_img_for_all_videofiles.py add i18n add: webui img embed search add manual img emb script add idle routine refactor video file on disk checking eliminate unnecessary endswith add: video file embedding process remove random walk, tweak code, WIP, sorry. add db_get_row_from_vid_filename tweak webui add image search webui tweak search ux; add image embed lib add install script update onboard setting add extension readme Update languages.json
yuka-friends · Feb 9, 2024 · 8ab6320 · 8ab6320
1 parent cfcb2c9
commit 8ab6320
Show file tree

Hide file tree

Showing 22 changed files with 1,148 additions and 209 deletions.
diff --git a/config/src/config_default.json b/config/src/config_default.json
@@ -11,6 +11,7 @@
   "flag_mark_note_filename": "flag_mark_note.csv",
   "thumbnail_generation_size_width": 70,
   "thumbnail_generation_jpg_quality": 30,
+  "vdb_img_path": "./db_imgemb",
   "search_max_num": "50",
   "lang": "sc",
   "ocr_lang": "zh-Hans-CN",
@@ -50,10 +51,14 @@
   "maintain_lock_subdir": "LOCK_MAINTAIN",
   "record_lock_name": "LOCK_FILE_RECORD.MD",
   "tray_lock_name": "LOCK_FILE_TRAY.MD",
+  "img_emb_lock_name": "LOCK_FILE_IMG_EMB.MD",
   "last_idle_maintain_file_path": "cache\\LAST_IDLE_MAINTAIN.MD",
   "iframe_dir": "cache\\i_frames",
   "log_dir": "cache\\logs",
   "win_title_dir": "cache\\win_title",
   "show_oneday_left_side_stat": true,
-  "webui_access_password_md5": ""
+  "webui_access_password_md5": "",
+  "enable_img_embed_search": true,
+  "img_embed_search_recall_result_per_db": 20,
+  "img_embed_module_install": false
 }
diff --git a/config/src/languages.json b/config/src/languages.json
@@ -61,6 +61,13 @@
         "gs_text_intro": "This is the global search page where you can search all the recorded content to date. Press Enter to search after entering the keywords.",
         "gs_slider_to_rewind_result": "Drag to rewind search results",
         "gs_text_randomwalk": "random walk",
+        "gs_text_video_file_not_on_disk": "Video File **{df_videofile_name}** not on disk.",
+        "gs_option_ocr_text_search": "Text OCR search",
+        "gs_option_img_emb_search": "Image semantic search",
+        "gs_input_img_emb_search": "Use natural language to describe images",
+        "gs_text_img_emb_help": "Use natural language to describe the content of the screen. The more precise the description, the closer the result will be. Up to 21 languages such as Chinese, English, Japanese, and Korean are supported here (detailed uform document). The video needs to be embedded after indexing Only by searching, please see the setting item description on the settings page for details.",
+        "gs_text_searching": "Searching, please stand by...",
+        "gs_text_loading_text_embed_model": "Loading Text Embedding model...",
 
         "stat_md_month_title": "### 🌖 Monthly Statistics",
         "stat_md_year_title": "### 🎏 {stat_year_title} Record",
@@ -147,7 +154,12 @@
         "set_pwd_text": "webui access password (leave blank to disable)",
         "set_pwd_help": "After enabling this setting, you will be asked to provide a password when accessing webui. This setting will not encrypt your data, but only protects the entrance to webui to avoid access by unfamiliar users in the same LAN.",
         "set_pwd_forget_help": "Forgot your password? Delete the webui_access_password_md5 item in config_user.json to reset password.",
-
+        "set_checkbox_enable_img_emb": "Enable image semantic retrieval",
+        "set_text_enable_img_emb_help": "Image semantic retrieval is a method of image retrieval based on the semantic content of images through computer vision technology. It can retrieve queries from large-scale image databases based on the semantic description of the content of the image. Related images. Windrecorder uses uform-vl-multilingual-v2 to embed the index. When this option is turned on, the program will build an image embed index for the video in its free time, and then it can be searched globally. You can also search it through the program directory The extension/index_img_embedding_for_all_videofilesscript is indexed manually.",
+        "set_text_img_emb_not_suppport_cuda": "Your device does not appear to support CUDA and may have lower performance when using the CPU to semantically index images.",
+        "set_input_img_emb_max_recall_count": "The maximum number of results recalled from each database in natural semantic search",
+        "set_text_help_img_emb_max_recall_count": "During natural semantic search, a specified number of results will be recalled from the database for each time period. Too high or too low a number may lead to a decrease in query accuracy.",
+
         "qs_config_indicator": "   ← Current options",
         "qs_la_text_same_as_previous": "The interface language remains the same as before: English",
         "qs_un_set_your_username": "Set your username as a database identifier.",
@@ -170,6 +182,7 @@
         "qs_mo_describe": "Note: Due to the lack of official support for multiple monitors in pyautogui, Windrecorder will only record the screen set as the 'primary display' in Windows.\n",
         "qs_mo_detect": "The detected resolution of the primary display is: {monitor_width}x{monitor_height}",
         "qs_mo_cta": "This setting will be automatically detected each time you start recording, so you don't need to choose or set it separately.",
+        "qs_et_describe": "Windrecorder also provides some extension functions, which you can later install/use in the extension directory.",
         "qs_end_describe": "Congratulations! You have completed all initial settings. Don’t worry, you can adjust the settings anytime within the app! \n\nNow, you can open [start_app.bat] in the directory to start using it. \n",
         "qs_end_slogan": "> Capture and preserve the fleeting moments of the wind, as seen through your eyes.",
         "qs_end_feedback": "> Encountered a problem or have suggestions? Feel free to submit issues and PRs at https://github.com/yuka-friends/Windrecorder.",
@@ -261,6 +274,13 @@
         "gs_text_intro": "这里是全局搜索页，可以搜索到迄今记录的所有内容。输入关键词后回车即可搜索。",
         "gs_slider_to_rewind_result": "拖动回溯搜索结果",
         "gs_text_randomwalk": "随便走走",
+        "gs_text_video_file_not_on_disk": "磁盘上没有找到 **{df_videofile_name}**",
+        "gs_option_ocr_text_search": "文本 OCR 搜索",
+        "gs_option_img_emb_search": "图像语义搜索",
+        "gs_input_img_emb_search": "使用自然语言描述图像",
+        "gs_text_img_emb_help": "用自然语言描述画面内容，描述越精确、结果将越接近。此处支持中、英、日、韩等多达 21 种语言输入（详细 uform 文档）。视频需要被嵌入索引后才能搜索得到，详见设置页设置项说明。",
+        "gs_text_searching": "搜索中，请稍后……",
+        "gs_text_loading_text_embed_model": "加载文本嵌入模型中，请稍后……",
 
         "stat_md_month_title": "### 🌖 当月数据统计",
         "stat_md_year_title": "### 🎏 {stat_year_title} 记录",
@@ -347,6 +367,11 @@
         "set_pwd_text": "webui 访问密码（留空则不启用）",
         "set_pwd_help": "启用此项设置后，会在访问 webui 时要求提供密码。此项设置不会加密你的数据，仅保护 webui 的使用入口，以避免同局域网内陌生用户访问。",
         "set_pwd_forget_help": "忘记密码？请将 config_user.json 中的 webui_access_password_md5 项删除重置。",
+        "set_checkbox_enable_img_emb": "启用图像语义检索",
+        "set_text_enable_img_emb_help": "图像语义检索是一种通过计算机视觉技术、基于图像的语义内容进行图像检索的方法。它可以做到根据对图像的内容语义描述，从大规模的图像数据库中检索出查询出相关的图像。Windrecorder 使用 uform-vl-multilingual-v2 来嵌入索引。开启该选项后，程序将在空闲时间对视频建立图像嵌入索引，之后便能对此进行全局搜索。你也可以通过程序目录下的 extension/index_img_embedding_for_all_videofiles脚本手动索引。",
+        "set_text_img_emb_not_suppport_cuda": "你的设备似乎不支持 CUDA，在使用 CPU 对图像语义索引时可能性能较低。",
+        "set_input_img_emb_max_recall_count": "自然语义搜索中，从每个数据库召回的最大结果数",
+        "set_text_help_img_emb_max_recall_count": "自然语义搜索时，会分别从每个时间段的数据库中召回指定数量结果，过高或过低的数量可能导致查询准确率降低。",
 
         "qs_config_indicator": "   ← 当前选项",
         "qs_la_text_same_as_previous": "界面语言保持与先前一致的：简体中文",
@@ -370,6 +395,7 @@
         "qs_mo_describe": "注意：由于 pyautogui 暂未官方支持多显示器，捕风记录仪将只记录 Windows 下设置的【主显示器】\n",
         "qs_mo_detect": "当前检测到的主显示器分辨率为：{monitor_width}x{monitor_height}",
         "qs_mo_cta": "此项设定将在每次录屏时自动识别，无需额外选择与设定。",
+        "qs_et_describe": "捕风记录仪 还提供了一些扩展功能，你可以稍后在 extension 目录下安装/使用。",
         "qs_end_describe": "恭喜！你已完成所有初始设定。别担心，你可以随时在应用内调整设置！\n\n现在，你可以打开目录下的 【start_app.bat】 来开始使用啦。\n",
         "qs_end_slogan": "> 一起捕捉贮藏风一般掠过的、你的目之所见。",
         "qs_end_feedback": "> 遇到问题、想反馈建议？欢迎在 https://github.com/yuka-friends/Windrecorder 提交 issue 与 PR。",
@@ -461,7 +487,14 @@
         "gs_text_intro": "これはグローバル検索ページです。これまでに記録されたすべてのコンテンツを検索できます。キーワードを入力して Enter キーを押すと検索が開始されます。",
         "gs_slider_to_rewind_result": "検索結果を巻き戻す",
         "gs_text_randomwalk": "散歩する",
-
+        "gs_text_video_file_not_on_disk": "**{df_videofile_name}** がディスク上に見つかりません",
+        "gs_option_ocr_text_search": "テキスト OCR 検索",
+        "gs_option_img_emb_search": "画像セマンティック検索",
+        "gs_input_img_emb_search": "自然言語を使用して画像を説明します",
+        "gs_text_img_emb_help": "画面の内容を自然言語で説明します。説明が正確であればあるほど、結果はより正確になります。ここでは、中国語、英語、日本語、韓国語など、最大 21 の言語がサポートされています(uform ドキュメントの詳細)。ビデオはインデックス作成後に埋め込む必要があります。検索のみで、詳細については設定ページの設定項目の説明を参照してください。",
+        "gs_text_searching": "検索中です、お待ちください...",
+        "gs_text_loading_text_embed_model": "テキスト埋め込みモデルを読み込み中...",
+
         "stat_md_month_title": "### 🌖 今月のデータ統計",
         "stat_md_year_title": "### 🎏 {stat_year_title} の記録",
         "stat_md_memory_title": "### 🧩 メモリの要約",
@@ -547,7 +580,12 @@
         "set_pwd_text": "webui アクセス パスワード (無効にする場合は空白のままにします)",
         "set_pwd_help": "この設定を有効にすると、webui にアクセスするときにパスワードの入力を求められます。この設定はデータを暗号化しませんが、同じ LAN 内の見慣れないユーザーによるアクセスを避けるために webui への入り口を保護するだけです。",
         "set_pwd_forget_help": "パスワードをお忘れですか? config_user.json の webui_access_password_md5 項目を削除してリセットしてください。",
-
+        "set_checkbox_enable_img_emb": "画像のセマンティック検索を有効にする",
+        "set_text_enable_img_emb_help": "画像意味検索は、コンピューター ビジョン テクノロジーによる画像の意味内容に基づく画像検索方法です。画像内容の意味記述に基づいて、大規模画像データベースからクエリを取得できます。関連画像Windrecorder は、uform-vl-multilingual-v2 を使用してインデックスを埋め込みます。このオプションをオンにすると、プログラムは空き時間にビデオの画像埋め込みインデックスを構築し、グローバルに検索できるようになります。プログラム ディレクトリを介して、extension/index_img_embedding_for_all_videofilesスクリプトのインデックスが手動で作成されます。",
+        "set_text_img_emb_not_suppport_cuda": "お使いのデバイスは CUDA をサポートしていないようです。CPU を使用してイメージのセマンティック インデックスを作成するとパフォーマンスが低下する可能性があります。",
+        "set_input_img_emb_max_recall_count": "自然セマンティック検索で各データベースから呼び出される結果の最大数",
+        "set_text_help_img_emb_max_recall_count": "ナチュラル セマンティック検索では、指定された数の結果が期間ごとにデータベースから呼び出されます。数値が多すぎたり低すぎたりすると、クエリの精度が低下する可能性があります。",
+
         "qs_config_indicator": "   ← 現在のオプション",
         "qs_la_text_same_as_previous": "インターフェイス言語は以前と同じで日本語です。",
         "qs_un_set_your_username": "データベースの識別子として使用するユーザー名を設定してください。",
@@ -570,6 +608,7 @@
         "qs_mo_describe": "注意：pyautoguiは公式に複数のディスプレイをサポートしていないため、WindrecorderはWindowsで設定された【メインディスプレイ】のみを記録します。\n",
         "qs_mo_detect": "現在検出されたメインディスプレイの解像度は{monitor_width}x{monitor_height}",
         "qs_mo_cta": "この設定は画面録画時に自動的に識別され、追加の選択や設定は必要ありません。",
+        "qs_et_describe": "Windrecorder にはいくつかの拡張機能も用意されており、後で拡張機能ディレクトリにインストールして使用できます。",
         "qs_end_describe": "おめでとう！ すべての初期設定が完了しました。 心配しないでください。設定はアプリ内でいつでも調整できます。 \n\nこれで、ディレクトリ内の [start_app.bat] を開いて使用を開始できます。 \n",
         "qs_end_slogan": "> 一緒に、あなたの目が見た、風のように過ぎ去るものをキャプチャしましょう。",
         "qs_end_feedback": "> 問題が発生した場合やフィードバックの提案がある場合は、https://github.com/yuka-friends/Windrecorder でissueやPRを提出してください。",

diff --git a/extension/index_img_embedding_for_all_videofiles/index_img_embedding_for_all_videofiles.bat b/extension/index_img_embedding_for_all_videofiles/index_img_embedding_for_all_videofiles.bat
@@ -0,0 +1,11 @@
+@echo off
+echo Loading extension, please stand by.
+echo.
+
+cd /d %~dp0
+for /F "tokens=* USEBACKQ" %%A in (`python -m poetry env info --path`) do call %%A\Scripts\activate.bat
+chcp 65001
+cls
+
+python "%~dp0\index_img_embedding_for_all_videofiles.py"
+pause
diff --git a/extension/index_img_embedding_for_all_videofiles/index_img_embedding_for_all_videofiles.py b/extension/index_img_embedding_for_all_videofiles/index_img_embedding_for_all_videofiles.py
@@ -0,0 +1,93 @@
+# Set workspace to Windrecorder dir
+import sys
+import os
+current_dir = os.path.dirname(os.path.abspath(__file__))
+parent_parent_dir = os.path.dirname(os.path.dirname(current_dir))
+sys.path.append(parent_parent_dir)
+os.chdir("..")
+os.chdir("..")
+
+import subprocess
+import datetime
+from os import getpid
+
+from windrecorder import file_utils, utils
+from windrecorder.config import config
+from windrecorder.exceptions import LockExistsException
+from windrecorder.lock import FileLock
+
+if config.img_embed_module_install:
+    try:
+        from windrecorder import img_embed_manager
+    except ModuleNotFoundError:
+        config.set_and_save_config("img_embed_module_install", False)
+        print('Img Embedding Module seems not installed, please install first.')
+        sys.exit()
+else:
+    print('Img Embedding Module seems not installed, please install first.')
+    sys.exit()
+
+subprocess.run("title Embedding Img for existing video files", shell=True)
+
+videos_filepath = file_utils.get_file_path_list(config.record_videos_dir)
+videos_filepath_filter = [item for item in videos_filepath if '-IMGEMB' not in item]
+videos_filepath_filter_num = len(videos_filepath_filter)
+
+per_video_embedding_time = datetime.timedelta(minutes=2) * config.record_seconds / 900   # 在使用 cuda 的情况下，每 900s 视频需要 2 分钟完成索引。其中拆 iframe 占了大部分时间
+eta_process_all_video = videos_filepath_filter_num * per_video_embedding_time
+
+
+def main():
+    while True:
+        subprocess.run("cls", shell=True)
+        if img_embed_manager.is_cuda_available:
+            print('√ Your device support CUDA acceleration.')
+        else:
+            print('X Your device seems not support CUDA acceleration, embedding performance might be slow.')
+
+        text_intro = f"""
+        
+本脚本可以将你未进行图像嵌入索引的历史视频进行索引。索引完成后，你可以使用自然语言描述来查找对应图像画面。
+This script can index your no image embedding historical videos. After indexed, you can use natural language descriptions to find corresponding images in video files.
+
+--------------------------------------------------------------------
+
+约有 {videos_filepath_filter_num} 个视频未图像嵌入索引，索引所有视频预估用时：{utils.convert_seconds_to_hhmmss(eta_process_all_video.seconds)}
+
+- 若要索引全部视频文件，请输入 Y 后回车确认。
+- 若只想先索引部分视频，请输入数字后回车确认（应小于 {videos_filepath_filter_num} ）。每个视频的索引用时预估{utils.convert_seconds_to_hhmmss(per_video_embedding_time.seconds)}，同时将会从最新的视频开始、向旧视频进行索引。
+
+提示: 索引过程中，可以随时关闭终端窗口来中止索引。别担心，已索引的进度都会被保存，下次会继续进度。
+
+There are approximately {videos_filepath_filter_num} videos without image embedding index. Estimated time to index all videos: {utils.convert_seconds_to_hhmmss(eta_process_all_video.seconds)}
+
+- To index all video files, please enter Y and press Enter to confirm.
+- If you only want to index some videos first, please enter the number and press Enter to confirm (should be less than {videos_filepath_filter_num}). The indexing time of each video is estimated {utils.convert_seconds_to_hhmmss(per_video_embedding_time.seconds)}, and indexing will start from the latest video to the old video.
+
+Tip: During the indexing process, you can close the terminal window at any time to abort the indexing. Don't worry, all indexed progress will be saved and progress will continue next time.
+
+        """
+        print(text_intro)
+        user_input = input("> ")
+        if user_input.lower() == "y":
+            img_embed_manager.all_videofile_do_img_embedding_routine(video_queue_count=videos_filepath_filter_num)
+            break
+        try:
+            val = int(user_input)
+            if 0 < val < videos_filepath_filter_num:
+                img_embed_manager.all_videofile_do_img_embedding_routine(video_queue_count=val)
+                break
+        except ValueError:
+            pass
+
+    subprocess.run("cls", shell=True)
+    print('指定的选项下视频已索引完成，你可以在 webui 使用自然语言描述来查找对应图像画面。')
+
+
+try:
+    img_emb_lock = FileLock(config.img_emb_lock_path, str(getpid()), timeout_s=None)
+    with img_emb_lock:
+        main()
+except LockExistsException:
+    subprocess.run("cls", shell=True)
+    print('Warring: Seems another img embedding indexing process is running.\n If not, please try to delete cache/lock/LOCK_FILE_IMG_EMB.MD and try again.\n')