home-assistant · frenck · Dec 11, 2024 · Dec 10, 2024 · Dec 11, 2024
diff --git a/whisper/CHANGELOG.md b/whisper/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## 2.4.0
+
+- Add "auto" for model and beam size (0) to select values based on CPU
+
 ## 2.3.1
 
 - Move `turbo` down the list closer to `large` to avoid confusion

diff --git a/whisper/DOCS.md b/whisper/DOCS.md
@@ -36,11 +36,12 @@ If you select "auto", the model will run **much** slower but will auto-detect th
 
 Whisper model that will be used for transcription. Choose `custom` to use the model name in `custom_model`, which may be a HuggingFace model ID like "Systran/faster-distil-whisper-small.en".
 
-The default model is `tiny-int8`, a compressed version of the smallest Whisper model which is able to run on a Raspberry Pi 4.
+The default model is `auto`, which selects `tiny-int8` for ARM devices like the Raspberry Pi 4 and `base-int8` otherwise.
 Compressed models (`int8`) are slightly less accurate than their counterparts, but smaller and faster. [Distilled](https://github.com/huggingface/distil-whisper) models are not compressed, but are faster and smaller than their non-distilled counterparts.
 
 Available models:
 
+- `auto` (select based on CPU)
 - `tiny-int8` (compressed)
 - `tiny`
 - `tiny.en` (English only)
@@ -70,6 +71,7 @@ Path to a converted model directory, or a CTranslate2-converted Whisper model ID
 ### Option: `beam_size`
 
 Number of candidates to consider simultaneously during transcription (see [beam search](https://en.wikipedia.org/wiki/Beam_search)).
+The default value of `0` will automatically select `1` of ARM devices like the Raspberry Pi 4 and `5` otherwise.
 
 Increasing the beam size will increase accuracy at the cost of performance.
 

diff --git a/whisper/build.yaml b/whisper/build.yaml
@@ -6,4 +6,4 @@ codenotary:
   signer: notary@home-assistant.io
   base_image: notary@home-assistant.io
 args:
-  WYOMING_WHISPER_VERSION: 2.3.0
+  WYOMING_WHISPER_VERSION: 2.4.0
diff --git a/whisper/config.yaml b/whisper/config.yaml
@@ -1,5 +1,5 @@
 ---
-version: 2.3.1
+version: 2.4.0
 slug: whisper
 name: Whisper
 description: Speech-to-text with Whisper
@@ -13,13 +13,13 @@ discovery:
 backup_exclude:
   - "models*"
 options:
-  model: tiny-int8
+  model: auto
   language: en
-  beam_size: 1
+  beam_size: 0
   debug_logging: false
 schema:
   model: |
-    list(tiny-int8|tiny|tiny.en|base-int8|base|base.en|small-int8|distil-small.en|small|small.en|distil-medium.en|medium-int8|medium|medium.en|large|large-v1|distil-large-v2|large-v2|distil-large-v3|large-v3|turbo|custom)
+    list(auto|tiny-int8|tiny|tiny.en|base-int8|base|base.en|small-int8|distil-small.en|small|small.en|distil-medium.en|medium-int8|medium|medium.en|large|large-v1|distil-large-v2|large-v2|distil-large-v3|large-v3|turbo|custom)
   custom_model: str?
   language: |
     list(auto|af|am|ar|as|az|ba|be|bg|bn|bo|br|bs|ca|cs|cy|da|de|el|en|es|et|eu|fa|fi|fo|fr|gl|gu|ha|haw|he|hi|hr|ht|hu|hy|id|is|it|ja|jw|ka|kk|km|kn|ko|la|lb|ln|lo|lt|lv|mg|mi|mk|ml|mn|mr|ms|mt|my|ne|nl|nn|no|oc|pa|pl|ps|pt|ro|ru|sa|sd|si|sk|sl|sn|so|sq|sr|su|sv|sw|ta|te|tg|th|tk|tl|tr|tt|uk|ur|uz|vi|yi|yo|zh|yue)