-
Notifications
You must be signed in to change notification settings - Fork 4.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Bug] XTTS failing to load #2955
Comments
Can confirm the bug with similar setup. Logs> Using model: xtts
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[4], line 2
1 from TTS.api import TTS
----> 2 tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
File ~\miniconda3\lib\site-packages\TTS\api.py:81, in TTS.__init__(self, model_name, model_path, config_path, vocoder_path, vocoder_config_path, progress_bar, cs_api_model, gpu)
79 if model_name is not None:
80 if "tts_models" in model_name or "coqui_studio" in model_name:
---> 81 self.load_tts_model_by_name(model_name, gpu)
82 elif "voice_conversion_models" in model_name:
83 self.load_vc_model_by_name(model_name, gpu)
File ~\miniconda3\lib\site-packages\TTS\api.py:185, in TTS.load_tts_model_by_name(self, model_name, gpu)
179 model_path, config_path, vocoder_path, vocoder_config_path, model_dir = self.download_model_by_name(
180 model_name
181 )
183 # init synthesizer
184 # None values are fetch from the model
--> 185 self.synthesizer = Synthesizer(
186 tts_checkpoint=model_path,
187 tts_config_path=config_path,
188 tts_speakers_file=None,
189 tts_languages_file=None,
190 vocoder_checkpoint=vocoder_path,
191 vocoder_config=vocoder_config_path,
192 encoder_checkpoint=None,
193 encoder_config=None,
194 model_dir=model_dir,
195 use_cuda=gpu,
196 )
File ~\miniconda3\lib\site-packages\TTS\utils\synthesizer.py:109, in Synthesizer.__init__(self, tts_checkpoint, tts_config_path, tts_speakers_file, tts_languages_file, vocoder_checkpoint, vocoder_config, encoder_checkpoint, encoder_config, vc_checkpoint, vc_config, model_dir, voice_dir, use_cuda)
107 self.output_sample_rate = self.tts_config.audio["sample_rate"]
108 else:
--> 109 self._load_tts_from_dir(model_dir, use_cuda)
110 self.output_sample_rate = self.tts_config.audio["output_sample_rate"]
File ~\miniconda3\lib\site-packages\TTS\utils\synthesizer.py:164, in Synthesizer._load_tts_from_dir(self, model_dir, use_cuda)
162 self.tts_config = config
163 self.tts_model = setup_tts_model(config)
--> 164 self.tts_model.load_checkpoint(config, checkpoint_dir=model_dir, eval=True)
165 if use_cuda:
166 self.tts_model.cuda()
File ~\miniconda3\lib\site-packages\TTS\tts\models\xtts.py:645, in Xtts.load_checkpoint(self, config, checkpoint_dir, checkpoint_path, vocab_path, eval, strict)
643 if eval:
644 self.gpt.init_gpt_for_inference(kv_cache=self.args.kv_cache)
--> 645 self.load_state_dict(load_fsspec(model_path)["model"], strict=strict)
647 if eval:
648 self.gpt.init_gpt_for_inference(kv_cache=self.args.kv_cache)
File ~\miniconda3\lib\site-packages\torch\nn\modules\module.py:2041, in Module.load_state_dict(self, state_dict, strict)
2036 error_msgs.insert(
2037 0, 'Missing key(s) in state_dict: {}. '.format(
2038 ', '.join('"{}"'.format(k) for k in missing_keys)))
2040 if len(error_msgs) > 0:
-> 2041 raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
2042 self.__class__.__name__, "\n\t".join(error_msgs)))
2043 return _IncompatibleKeys(missing_keys, unexpected_keys)
RuntimeError: Error(s) in loading state_dict for Xtts:
Missing key(s) in state_dict: "gpt.gpt.h.0.attn.bias", "gpt.gpt.h.0.attn.masked_bias", "gpt.gpt.h.1.attn.bias", "gpt.gpt.h.1.attn.masked_bias", "gpt.gpt.h.2.attn.bias", "gpt.gpt.h.2.attn.masked_bias", "gpt.gpt.h.3.attn.bias", "gpt.gpt.h.3.attn.masked_bias", "gpt.gpt.h.4.attn.bias", "gpt.gpt.h.4.attn.masked_bias", "gpt.gpt.h.5.attn.bias", "gpt.gpt.h.5.attn.masked_bias", "gpt.gpt.h.6.attn.bias", "gpt.gpt.h.6.attn.masked_bias", "gpt.gpt.h.7.attn.bias", "gpt.gpt.h.7.attn.masked_bias", "gpt.gpt.h.8.attn.bias", "gpt.gpt.h.8.attn.masked_bias", "gpt.gpt.h.9.attn.bias", "gpt.gpt.h.9.attn.masked_bias", "gpt.gpt.h.10.attn.bias", "gpt.gpt.h.10.attn.masked_bias", "gpt.gpt.h.11.attn.bias", "gpt.gpt.h.11.attn.masked_bias", "gpt.gpt.h.12.attn.bias", "gpt.gpt.h.12.attn.masked_bias", "gpt.gpt.h.13.attn.bias", "gpt.gpt.h.13.attn.masked_bias", "gpt.gpt.h.14.attn.bias", "gpt.gpt.h.14.attn.masked_bias", "gpt.gpt.h.15.attn.bias", "gpt.gpt.h.15.attn.masked_bias", "gpt.gpt.h.16.attn.bias", "gpt.gpt.h.16.attn.masked_bias", "gpt.gpt.h.17.attn.bias", "gpt.gpt.h.17.attn.masked_bias", "gpt.gpt.h.18.attn.bias", "gpt.gpt.h.18.attn.masked_bias", "gpt.gpt.h.19.attn.bias", "gpt.gpt.h.19.attn.masked_bias", "gpt.gpt.h.20.attn.bias", "gpt.gpt.h.20.attn.masked_bias", "gpt.gpt.h.21.attn.bias", "gpt.gpt.h.21.attn.masked_bias", "gpt.gpt.h.22.attn.bias", "gpt.gpt.h.22.attn.masked_bias", "gpt.gpt.h.23.attn.bias", "gpt.gpt.h.23.attn.masked_bias", "gpt.gpt.h.24.attn.bias", "gpt.gpt.h.24.attn.masked_bias", "gpt.gpt.h.25.attn.bias", "gpt.gpt.h.25.attn.masked_bias", "gpt.gpt.h.26.attn.bias", "gpt.gpt.h.26.attn.masked_bias", "gpt.gpt.h.27.attn.bias", "gpt.gpt.h.27.attn.masked_bias", "gpt.gpt.h.28.attn.bias", "gpt.gpt.h.28.attn.masked_bias", "gpt.gpt.h.29.attn.bias", "gpt.gpt.h.29.attn.masked_bias", "gpt.gpt_inference.transformer.h.0.attn.bias", "gpt.gpt_inference.transformer.h.0.attn.masked_bias", "gpt.gpt_inference.transformer.h.1.attn.bias", "gpt.gpt_inference.transformer.h.1.attn.masked_bias", "gpt.gpt_inference.transformer.h.2.attn.bias", "gpt.gpt_inference.transformer.h.2.attn.masked_bias", "gpt.gpt_inference.transformer.h.3.attn.bias", "gpt.gpt_inference.transformer.h.3.attn.masked_bias", "gpt.gpt_inference.transformer.h.4.attn.bias", "gpt.gpt_inference.transformer.h.4.attn.masked_bias", "gpt.gpt_inference.transformer.h.5.attn.bias", "gpt.gpt_inference.transformer.h.5.attn.masked_bias", "gpt.gpt_inference.transformer.h.6.attn.bias", "gpt.gpt_inference.transformer.h.6.attn.masked_bias", "gpt.gpt_inference.transformer.h.7.attn.bias", "gpt.gpt_inference.transformer.h.7.attn.masked_bias", "gpt.gpt_inference.transformer.h.8.attn.bias", "gpt.gpt_inference.transformer.h.8.attn.masked_bias", "gpt.gpt_inference.transformer.h.9.attn.bias", "gpt.gpt_inference.transformer.h.9.attn.masked_bias", "gpt.gpt_inference.transformer.h.10.attn.bias", "gpt.gpt_inference.transformer.h.10.attn.masked_bias", "gpt.gpt_inference.transformer.h.11.attn.bias", "gpt.gpt_inference.transformer.h.11.attn.masked_bias", "gpt.gpt_inference.transformer.h.12.attn.bias", "gpt.gpt_inference.transformer.h.12.attn.masked_bias", "gpt.gpt_inference.transformer.h.13.attn.bias", "gpt.gpt_inference.transformer.h.13.attn.masked_bias", "gpt.gpt_inference.transformer.h.14.attn.bias", "gpt.gpt_inference.transformer.h.14.attn.masked_bias", "gpt.gpt_inference.transformer.h.15.attn.bias", "gpt.gpt_inference.transformer.h.15.attn.masked_bias", "gpt.gpt_inference.transformer.h.16.attn.bias", "gpt.gpt_inference.transformer.h.16.attn.masked_bias", "gpt.gpt_inference.transformer.h.17.attn.bias", "gpt.gpt_inference.transformer.h.17.attn.masked_bias", "gpt.gpt_inference.transformer.h.18.attn.bias", "gpt.gpt_inference.transformer.h.18.attn.masked_bias", "gpt.gpt_inference.transformer.h.19.attn.bias", "gpt.gpt_inference.transformer.h.19.attn.masked_bias", "gpt.gpt_inference.transformer.h.20.attn.bias", "gpt.gpt_inference.transformer.h.20.attn.masked_bias", "gpt.gpt_inference.transformer.h.21.attn.bias", "gpt.gpt_inference.transformer.h.21.attn.masked_bias", "gpt.gpt_inference.transformer.h.22.attn.bias", "gpt.gpt_inference.transformer.h.22.attn.masked_bias", "gpt.gpt_inference.transformer.h.23.attn.bias", "gpt.gpt_inference.transformer.h.23.attn.masked_bias", "gpt.gpt_inference.transformer.h.24.attn.bias", "gpt.gpt_inference.transformer.h.24.attn.masked_bias", "gpt.gpt_inference.transformer.h.25.attn.bias", "gpt.gpt_inference.transformer.h.25.attn.masked_bias", "gpt.gpt_inference.transformer.h.26.attn.bias", "gpt.gpt_inference.transformer.h.26.attn.masked_bias", "gpt.gpt_inference.transformer.h.27.attn.bias", "gpt.gpt_inference.transformer.h.27.attn.masked_bias", "gpt.gpt_inference.transformer.h.28.attn.bias", "gpt.gpt_inference.transformer.h.28.attn.masked_bias", "gpt.gpt_inference.transformer.h.29.attn.bias", "gpt.gpt_inference.transformer.h.29.attn.masked_bias". Environment
|
I got this error as well, for me the fix was to update transformers: |
|
Weird that it didn't catch the correct version on install, because I was doing it in a new environment. Had to update transformers, protobuf, and a couple other things to get it going here, but it works now. Thanks. |
Indeed, force reinstalling TTS and updating transformers did the trick. Thanks, @brianb5010! |
Describe the bug
XTTS fails to synthesize speech, and seemingly fully load onto a device
To Reproduce
Run inference with XTTS on the command line, using the cloned Huggingface Gradio applet, or with the Python API
Expected behavior
Loading, operating
Logs
Environment
Additional context
None of the fixes described #2947 worked in this case
The text was updated successfully, but these errors were encountered: