diff --git a/.gitignore b/.gitignore
index f76303f..a24ff2e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,3 +77,4 @@ release/*
 *.exe
 
 deps.zip
+README.md.backup
diff --git a/EnglishPhoneticProcessor.cpp b/EnglishPhoneticProcessor.cpp
index 3aff2ba..7bb17e3 100644
--- a/EnglishPhoneticProcessor.cpp
+++ b/EnglishPhoneticProcessor.cpp
@@ -9,7 +9,7 @@ bool EnglishPhoneticProcessor::Initialize(Phonemizer* InPhn)
 
     Phoner = InPhn;
     Tokenizer.SetAllowedChars(Phoner->GetGraphemeChars());
-    Tokenizer.SetNumberText(Phoner->GetNumTxt(),Phoner->GetNumTxtLang());
+
 
 
 
@@ -17,19 +17,19 @@ bool EnglishPhoneticProcessor::Initialize(Phonemizer* InPhn)
 }
 
 
-std::string EnglishPhoneticProcessor::ProcessTextPhonetic(const std::string& InText, const std::vector<u32string> &InPhonemes, const std::vector<DictEntry>& InDict, ETTSLanguage::Enum InLanguage, bool IsTac)
+std::string EnglishPhoneticProcessor::ProcessTextPhonetic(const std::string& InText, const std::vector<u32string> &InPhonemes, const std::vector<DictEntry>& InDict, ETTSLanguageType::Enum InLanguageType, bool IsTac)
 {
     if (!Phoner)
 		return "ERROR";
 
 
 
-    vector<string> Words = Tokenizer.Tokenize(InText,InLanguage,IsTac);
+    vector<string> Words = Tokenizer.Tokenize(InText,IsTac);
 
 	string Assemble = "";
 
-    // If language is negative, this is char-based model.
-    if (InLanguage < 0)
+
+    if (InLanguageType == ETTSLanguageType::Char)
     {
         for (size_t w = 0; w < Words.size();w++)
         {
diff --git a/EnglishPhoneticProcessor.h b/EnglishPhoneticProcessor.h
index 7056289..d179de3 100644
--- a/EnglishPhoneticProcessor.h
+++ b/EnglishPhoneticProcessor.h
@@ -18,9 +18,11 @@ class EnglishPhoneticProcessor
 
 public:
     bool Initialize(Phonemizer *InPhn);
-    std::string ProcessTextPhonetic(const std::string& InText, const std::vector<std::u32string> &InPhonemes, const std::vector<DictEntry>& InDict, ETTSLanguage::Enum InLanguage, bool IsTac);
+    std::string ProcessTextPhonetic(const std::string& InText, const std::vector<std::u32string> &InPhonemes, const std::vector<DictEntry>& InDict, ETTSLanguageType::Enum InLanguageType, bool IsTac);
 	EnglishPhoneticProcessor();
     EnglishPhoneticProcessor(Phonemizer *InPhn);
 	~EnglishPhoneticProcessor();
+
+    inline TextTokenizer& GetTokenizer() {return Tokenizer;}
 };
 
diff --git a/README.md b/README.md
index 5db47d6..65872af 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,10 @@
-﻿# TensorVox
+# TensorVox
 
-[![Join the chat at https://gitter.im/TensorVox/community](https://badges.gitter.im/TensorVox/community.svg)](https://gitter.im/TensorVox/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+[![](https://dcbadge.vercel.app/api/server/yqFDAWH)](https://discord.gg/yqFDAWH)
 
 TensorVox is an application designed to enable user-friendly and lightweight neural speech synthesis in the desktop, aimed at increasing accessibility to such technology. 
 
-Powered mainly by [TensorFlowTTS](https://github.com/TensorSpeech/TensorFlowTTS) and also by [Coqui-TTS](https://github.com/coqui-ai/TTS), it is written in pure C++/Qt, using the Tensorflow C API for interacting with the models. This way, we can perform inference without having to install gigabytes worth of Python libraries, just a 100MB DLL.
+Powered mainly by [TensorFlowTTS](https://github.com/TensorSpeech/TensorFlowTTS) and also by [Coqui-TTS](https://github.com/coqui-ai/TTS) and [VITS](https://github.com/jaywalnut310/vits), it is written in pure C++/Qt, using the Tensorflow C API for interacting with Tensorflow models (first two), and LibTorch for PyTorch ones. This way, we can perform inference without having to install gigabytes worth of Python libraries, just a few DLLs.
 
 ![Interface with Tac2 model loaded](https://i.imgur.com/wtPzzNh.png)
 
@@ -20,14 +20,14 @@ If you're interested in using your own model, first you need to train then expor
 
 ## Supported architectures
 
-TensorVox supports models from two main repos:
+TensorVox supports models from three repos:
 
  - **TensorFlowTTS**: FastSpeech2, Tacotron2, both char and phoneme based and Multi-Band MelGAN. Here's a Colab notebook demonstrating how to export the LJSpeech pretrained, char-based Tacotron2 model: [<img src="https://colab.research.google.com/assets/colab-badge.svg">](https://colab.research.google.com/drive/1KLqZ1rkD4Enw7zpTgXGL6if7e5s0UeWa?usp=sharing) 
  - **Coqui-TTS:** Tacotron2 (phoneme-based IPA) and Multi-Band MelGAN, after converting from PyTorch to Tensorflow. Here's a notebook showing how to export the LJSpeech DDC model: [<img src="https://colab.research.google.com/assets/colab-badge.svg">](https://colab.research.google.com/drive/15CdGEAu_-KezV1XxwzVfQiFSm0tveBkC?usp=sharing)
+ - **jaywalnut310/VITS:** VITS, which is a fully E2E model. (Stressed IPA as phonemes) Export notebook: [<img src="https://colab.research.google.com/assets/colab-badge.svg">](https://colab.research.google.com/drive/1BSGE5DQYweXBWrwPOmb6CRPUU8H5mBvb?usp=sharing)
 
-
-Those two examples should provide you with enough guidance to understand what is needed. If you're looking to train a model specifically for this purpose then I recommend TensorFlowTTS, as it is the one with the best support.
-As for languages, out-of-the-box support is provided for English (both Coqui and TFTTS), German and Spanish (only TensorFlowTTS); that is, you won't have to modify any code. 
+Those two examples should provide you with enough guidance to understand what is needed. If you're looking to train a model specifically for this purpose then I recommend TensorFlowTTS, as it is the one with the best support, and VITS, as it's the closest thing to perfect
+As for languages, out-of-the-box support is provided for English (Coqui and TFTTS, VITS), German and Spanish (only TensorFlowTTS); that is, you won't have to do anything. You can add languages without modifying code, as long as the phoneme set are IPA (stressed or nonstressed), ARPA, or GlobalPhone, (open an issue and I'll explain it to you)
 
 
 ## Build instructions
@@ -39,16 +39,18 @@ Currently, only Windows 10 x64 (although I've heard reports of it running on 8.1
 
 **Primed build (with all provided libraries):**
 
- 1. Download [precompiled binary dependencies and includes](https://drive.google.com/file/d/1ufLQvH-Me2NLmzNBkjcyD13WTyHb35aB/view?usp=sharing)
+ 1. Download [precompiled binary dependencies and includes](https://drive.google.com/file/d/1N6IxSpsgemS94z_v82toXhiNs2tLXkz6/view?usp=sharing)
  2. Unzip it so that the `deps` folder is in the same place as the .pro and main source files.
  3. Open the project with Qt Creator, add your compiler and compile
 
-Note that to try your shiny new executable you'll need to download the program as described above and insert the `models` folder where your new build is output.
+Note that to try your shiny new executable you'll need to download a release of program as described above and replace the executable in that release with your new one, so you have all the DLLs in place.
 
 TODO: Add instructions for compile from scratch.
 
 ## Externals (and thanks)
 
+ - **LibTorch**: https://pytorch.org/cppdocs/installing.html
+
  - **Tensorflow C API**: [https://www.tensorflow.org/install/lang_c](https://www.tensorflow.org/install/lang_c)
  - **CppFlow** (TF C API -> C++ wrapper): [https://github.com/serizba/cppflow](https://github.com/serizba/cppflow) 
  - **AudioFile** (for WAV export): [https://github.com/adamstark/AudioFile](https://github.com/adamstark/AudioFile)
diff --git a/TensorVox.pro b/TensorVox.pro
index 34db052..c37464f 100644
--- a/TensorVox.pro
+++ b/TensorVox.pro
@@ -44,6 +44,7 @@ SOURCES += \
     tacotron2.cpp \
     tfg2p.cpp \
     track.cpp \
+    vits.cpp \
     voicemanager.cpp \
     voxer.cpp
 
@@ -84,6 +85,7 @@ HEADERS += \
     tacotron2.h \
     tfg2p.h \
     track.h \
+    vits.h \
     voicemanager.h \
     voxer.h
 
@@ -103,8 +105,9 @@ else: unix:!android: target.path = /opt/$${TARGET}/bin
 DEFINES += _CRT_SECURE_NO_WARNINGS
 
 INCLUDEPATH += $$PWD/deps/include
+INCLUDEPATH += $$PWD/deps/include/libtorch
 INCLUDEPATH += $$PWD/ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow
-win32: LIBS += -L$$PWD/deps/lib/ tensorflow.lib r8bsrc64.lib rnnoise64.lib LogitechLEDLib.lib LibNumberText64.lib
+win32: LIBS += -L$$PWD/deps/lib/ tensorflow.lib r8bsrc64.lib rnnoise64.lib LogitechLEDLib.lib LibNumberText64.lib c10.lib torch.lib torch_cpu.lib
 win32: LIBS += Advapi32.lib User32.lib Psapi.lib
 
 
@@ -115,7 +118,7 @@ RESOURCES += \
 
 win32:RC_ICONS += winicon.ico
 
-VERSION = 0.9.0.0
+VERSION = 1.0.0.0
 CONFIG += force_debug_info
 
 QMAKE_CXXFLAGS += /std:c++17 /utf-8 -DPSAPI_VERSION=1
diff --git a/TextTokenizer.cpp b/TextTokenizer.cpp
index cfd2425..602bdc5 100644
--- a/TextTokenizer.cpp
+++ b/TextTokenizer.cpp
@@ -140,7 +140,7 @@ void TextTokenizer::SetNumberText(Numbertext &INum, const string &Lang)
 
 
 
-vector<string> TextTokenizer::Tokenize(const std::string & InTxt,ETTSLanguage::Enum Language,bool IsTacotron)
+vector<string> TextTokenizer::Tokenize(const std::string & InTxt,bool IsTacotron)
 {
 	vector<string> ProcessedTokens;
 
diff --git a/TextTokenizer.h b/TextTokenizer.h
index 550c3c7..5cc20c9 100644
--- a/TextTokenizer.h
+++ b/TextTokenizer.h
@@ -28,7 +28,7 @@ class TextTokenizer
 
     void SetNumberText(Numbertext& INum,const std::string& Lang);
 
-    std::vector<std::string> Tokenize(const std::string& InTxt, ETTSLanguage::Enum Language = ETTSLanguage::EnglishPhn, bool IsTacotron = false);
+    std::vector<std::string> Tokenize(const std::string& InTxt, bool IsTacotron = false);
     void SetAllowedChars(const std::string &value);
 };
 
diff --git a/Voice.cpp b/Voice.cpp
index 1eebdff..259a492 100644
--- a/Voice.cpp
+++ b/Voice.cpp
@@ -4,6 +4,7 @@
 std::vector<int32_t> Voice::CharsToID(const std::string & RawInTxt)
 {
 
+    std::cout << "CharsToID: " << RawInTxt << "\n";
     std::vector<int32_t> VecPhones;
 
     std::u32string InTxt = VoxUtil::StrToU32(RawInTxt);
@@ -30,6 +31,7 @@ std::vector<int32_t> Voice::CharsToID(const std::string & RawInTxt)
 
 std::vector<int32_t> Voice::PhonemesToID(const std::string & RawInTxt)
 {
+    std::cout << "PhonemesToID: " << RawInTxt << "\n";
     ZStringDelimiter Delim(RawInTxt);
 	Delim.AddDelimiter(" ");
     std::u32string InTxt = VoxUtil::StrToU32(RawInTxt);
@@ -114,16 +116,28 @@ Voice::Voice(const std::string & VoxPath, const std::string &inName, Phonemizer
 
     VoxInfo = VoxUtil::ReadModelJSON(VoxPath + "/info.json");
 
-    if (VoxInfo.Architecture.Text2Mel == EText2MelModel::Tacotron2)
+    const int32_t Tex2MelArch = VoxInfo.Architecture.Text2Mel;
+
+    if (Tex2MelArch == EText2MelModel::Tacotron2)
         MelPredictor = std::make_unique<Tacotron2>();
-    else
+    else if (Tex2MelArch == EText2MelModel::FastSpeech2)
         MelPredictor = std::make_unique<FastSpeech2>();
+    else
+        MelPredictor = std::make_unique<VITS>();
+
+
+    std::string MelPredInit = VoxPath + "/melgen";
 
+    if (Tex2MelArch == EText2MelModel::VITS)
+        MelPredInit = VoxPath + "/vits.pt";
 
-    MelPredictor->Initialize(VoxPath + "/melgen",(ETTSRepo::Enum)VoxInfo.Architecture.Repo);
+    MelPredictor->Initialize(MelPredInit,(ETTSRepo::Enum)VoxInfo.Architecture.Repo);
 
 
-    Vocoder.Initialize(VoxPath + "/vocoder");
+
+    if (Tex2MelArch != EText2MelModel::VITS) // No vocoder necessary for fully E2E TTS
+        Vocoder.Initialize(VoxPath + "/vocoder");
+
 
     if (InPhn)
         Processor.Initialize(InPhn);
@@ -147,16 +161,22 @@ Voice::Voice(const std::string & VoxPath, const std::string &inName, Phonemizer
 void Voice::AddPhonemizer(Phonemizer *InPhn)
 {
     Processor.Initialize(InPhn);
+    Processor.GetTokenizer().SetNumberText(NumTxt,VoxCommon::CommonLangConst);
 
 
 }
 
+void Voice::LoadNumberText(const std::string &NumTxtPath)
+{
+    NumTxt.load(VoxCommon::CommonLangConst,NumTxtPath);
+}
+
 std::string Voice::PhonemizeStr(const std::string &Prompt)
 {
 
 
     return Processor.ProcessTextPhonetic(Prompt,Phonemes,CurrentDict,
-                                                            (ETTSLanguage::Enum)VoxInfo.Language,
+                                                           (ETTSLanguageType::Enum)VoxInfo.LangType,
                                                            true); // default voxistac to true to preserve punctuation.
 
 }
@@ -167,14 +187,16 @@ VoxResults Voice::Vocalize(const std::string & Prompt, float Speed, int32_t Spea
 
 
 
-    bool VoxIsTac = VoxInfo.Architecture.Text2Mel == EText2MelModel::Tacotron2;
+    const int32_t Text2MelN = VoxInfo.Architecture.Text2Mel;
+
+    bool VoxIsTac = Text2MelN != EText2MelModel::FastSpeech2;
 
     std::string PromptToFeed = Prompt;
-    if (VoxInfo.Language > -1)
+    if (VoxInfo.LangType != ETTSLanguageType::Char)
         PromptToFeed += VoxInfo.EndPadding;
 
     std::string PhoneticTxt = Processor.ProcessTextPhonetic(PromptToFeed,Phonemes,CurrentDict,
-                                                            (ETTSLanguage::Enum)VoxInfo.Language,
+                                                            (ETTSLanguageType::Enum)VoxInfo.LangType,
                                                            VoxIsTac);
     TFTensor<float> Mel;
     TFTensor<float> Attention;
@@ -184,7 +206,7 @@ VoxResults Voice::Vocalize(const std::string & Prompt, float Speed, int32_t Spea
 
     // Note to self: always check for negative or positive language by checking that it is lower than 0
     // if we try greater than 0, English is missed.
-    if (VoxInfo.Language < 0){
+    if (VoxInfo.LangType == ETTSLanguageType::Char){
         InputIDs = CharsToID(PhoneticTxt);
         InputIDs.push_back(std::stoi(VoxInfo.EndPadding));
 
@@ -192,7 +214,7 @@ VoxResults Voice::Vocalize(const std::string & Prompt, float Speed, int32_t Spea
     }
     else
     {
-        if (VoxInfo.s_Language.find("IPA") != std::string::npos)
+        if (VoxInfo.LangType == ETTSLanguageType::IPA)
             InputIDs = CharsToID(PhoneticTxt);
         else
             InputIDs = PhonemesToID(PhoneticTxt);
@@ -206,22 +228,39 @@ VoxResults Voice::Vocalize(const std::string & Prompt, float Speed, int32_t Spea
     std::vector<int32_t> IntArgs;
 
 
-    if (VoxIsTac)
+
+    if (Text2MelN == EText2MelModel::Tacotron2)
     {
 
         Mel = ((Tacotron2*)MelPredictor.get())->DoInference(InputIDs,FloatArgs,IntArgs,SpeakerID, EmotionID);
         Attention = ((Tacotron2*)MelPredictor.get())->Attention;
 
     }
-    else
+    else if (Text2MelN == EText2MelModel::FastSpeech2)
     {
 
         FloatArgs = {Speed,Energy,F0};
 
         Mel = ((FastSpeech2*)MelPredictor.get())->DoInference(InputIDs,FloatArgs,IntArgs,SpeakerID, EmotionID);
 
+    }else
+    {
+        FloatArgs = {Speed};
+        TFTensor<float> Audio = MelPredictor.get()->DoInference(InputIDs,FloatArgs,IntArgs,SpeakerID,EmotionID);
+        Attention = ((VITS*)MelPredictor.get())->Attention;
+
+        std::vector<float> AudioData = Audio.Data;
+
+        Mel.Shape.push_back(-1); // Tell the plotter that we have no mel to plot
+
+        // As VITS is fully E2E, we return here
+
+        return {AudioData,Attention,Mel};
+
     }
 
+    // Vocoder inference
+
 
 	TFTensor<float> AuData = Vocoder.DoInference(Mel);
     std::vector<float> AudioData;
@@ -269,7 +308,7 @@ void Voice::SetDictEntries(const std::vector<DictEntry> &InEntries)
 {
     for (const DictEntry& Entr : InEntries)
     {
-        if (Entr.Language != VoxInfo.s_Language)
+        if (Entr.Language != VoxInfo.s_Language_Fullname)
             continue;
 
         CurrentDict.push_back(Entr);
diff --git a/Voice.h b/Voice.h
index f63d982..cfb021c 100644
--- a/Voice.h
+++ b/Voice.h
@@ -4,6 +4,8 @@
 #include "tacotron2.h"
 #include "MultiBandMelGAN.h"
 #include "EnglishPhoneticProcessor.h"
+#include "vits.h"
+#include "Numbertext.hxx"
 
 #include "phoneticdict.h"
 
@@ -51,6 +53,8 @@ class Voice
     std::string ModelInfo;
 
     std::vector<int32_t> CharsToID(const std::string &RawInTxt);
+
+    Numbertext NumTxt;
 public:
 	/* Voice constructor, arguments obligatory.
 	 -> VoxPath: Path of folder where models are contained. 
@@ -70,6 +74,7 @@ class Voice
     Voice(const std::string& VoxPath, const std::string& inName,Phonemizer* InPhn);
 
     void AddPhonemizer(Phonemizer* InPhn);
+    void LoadNumberText(const std::string& NumTxtPath);
 
 
     std::string PhonemizeStr(const std::string& Prompt);
diff --git a/VoxCommon.cpp b/VoxCommon.cpp
index 3054edc..fbc4458 100644
--- a/VoxCommon.cpp
+++ b/VoxCommon.cpp
@@ -4,14 +4,34 @@ using namespace nlohmann;
 #include <codecvt>
 #include <locale>         // std::wstring_convert
 
-const std::vector<std::string> Text2MelNames = {"FastSpeech2","Tacotron2"};
-const std::vector<std::string> VocoderNames = {"Multi-Band MelGAN","MelGAN-STFT"};
-const std::vector<std::string> RepoNames = {"TensorflowTTS","Coqui-TTS"};
+const std::vector<std::string> Text2MelNames = {"FastSpeech2","Tacotron2","VITS"};
+const std::vector<std::string> VocoderNames = {"Multi-Band MelGAN","MelGAN-STFT",""};
+const std::vector<std::string> RepoNames = {"TensorflowTTS","Coqui-TTS","jaywalnut310"};
 
 const std::vector<std::string> LanguageNames = {"English","Spanish", "German", "EnglishIPA"};
 const std::vector<std::string> LangaugeNamesNumToWords = {"en", "es","de","en"};
 
 
+
+
+#include "ext/ZCharScanner.h"
+
+const std::map<int32_t,std::string> LegacyToV1Lang = {
+    {-3,"German-Char"},
+    {0,"English-ARPA"},
+    {-1,"English-Char"},
+    {3,"English-IPA"},
+    {1,"Spanish-GlobalPhone"}
+                                                           };
+
+const std::map<std::string,int32_t> V1LangTypes ={
+  {"IPA",ETTSLanguageType::IPA},
+  {"IPAStressed",ETTSLanguageType::IPA},
+  {"ARPA",ETTSLanguageType::ARPA},
+  {"Char",ETTSLanguageType::Char},
+  {"GlobalPhone",ETTSLanguageType::GlobalPhone}
+};
+
 void VoxUtil::ExportWAV(const std::string & Filename, const std::vector<float>& Data, unsigned SampleRate) {
 	AudioFile<float>::AudioBuffer Buffer;
 	Buffer.resize(1);
@@ -83,11 +103,30 @@ VoiceInfo VoxUtil::ReadModelJSON(const std::string &InfoFilename)
     CuArch.s_Vocoder = VocoderNames[CuArch.Vocoder];
 
     // Language value for the info
-    int32_t RawLang = JS["language"].get<int32_t>();
 
+    auto LangVal = JS["language"];
+
+    
+    std::string LanguageFullName;
 
-    // Language value for the vectors
-    int32_t LanguageValue = ProcessLanguageValue(RawLang);
+    if (LangVal.is_string()){  // V1 Language type standard model; see ETTSLanguageType enum desc on header
+        LanguageFullName = LangVal.get<std::string>();
+
+    }else{
+        // Convert legacy language to V1
+       int32_t LegacyLang = JS["language"].get<int32_t>();
+       LanguageFullName = LegacyToV1Lang.find(LegacyLang)->second;
+
+
+    }
+
+     ZStringDelimiter LangDel(LanguageFullName);
+     LangDel.AddDelimiter("-");
+
+     std::string LangName = LangDel[0];
+     std::string LangTypeStr = LangDel[1];
+
+     int32_t LangType = V1LangTypes.find(LangTypeStr)->second;
 
 
 
@@ -95,9 +134,11 @@ VoiceInfo VoxUtil::ReadModelJSON(const std::string &InfoFilename)
     std::string EndToken = JS["pad"].get<std::string>();
 
     // If it's phonetic then it's the token str, like "@EOS"
-    if (RawLang > -1)
+    if (LangType != ETTSLanguageType::Char && EndToken.size())
         EndToken =  " " + EndToken; // In this case we add a space for separation since we directly append the value to the prompt
 
+
+
     VoiceInfo Inf{JS["name"].get<std::string>(),
                  JS["author"].get<std::string>(),
                  JS["version"].get<int>(),
@@ -105,10 +146,11 @@ VoiceInfo VoxUtil::ReadModelJSON(const std::string &InfoFilename)
                  CuArch,
                  JS["note"].get<std::string>(),
                  JS["sarate"].get<uint32_t>(),
-                 RawLang,
-                LanguageNames[LanguageValue],
-                LangaugeNamesNumToWords[LanguageValue],
-                 EndToken};
+                LangName,
+                LanguageFullName,
+                 EndToken,
+                LangType
+                 };
 
     if (Inf.Note.size() > MaxNoteSize)
         Inf.Note = Inf.Note.substr(0,MaxNoteSize);
diff --git a/VoxCommon.hpp b/VoxCommon.hpp
index 32009f4..27b365b 100644
--- a/VoxCommon.hpp
+++ b/VoxCommon.hpp
@@ -3,16 +3,37 @@
  VoxCommon.hpp : Defines common data structures and constants to be used with TensorVox 
 */
 #include <iostream>
+
+#undef slots // https://github.com/pytorch/pytorch/issues/19405
+
+
+#pragma warning(push, 0) // LibTorch spams us with warnings
+#include <torch/script.h> // One-stop header.
+#pragma warning(pop)
+
+#define slots Q_SLOTS
+
 #include <vector>
 #include "ext/AudioFile.hpp"
 #include "ext/CppFlow/ops.h"
 #include "ext/CppFlow/model.h"
+
+
+
 #include <QMessageBox>
 
+
+
 #define IF_RETURN(cond,ret) if (cond){return ret;}
 
 const uint32_t CommonSampleRate = 48000;
 
+namespace VoxCommon{
+const std::string CommonLangConst = "_std";
+
+
+}
+
 // https://github.com/almogh52/rnnoise-cmake/blob/d981adb2e797216f456cfcf158f73761a29981f8/examples/rnnoise_demo.c#L31
 const uint32_t RNNoiseFrameSize = 480;
 typedef std::vector<std::tuple<std::string,cppflow::tensor>> TensorVec;
@@ -29,14 +50,16 @@ struct TFTensor {
 namespace ETTSRepo {
 enum Enum{
     TensorflowTTS = 0,
-    CoquiTTS
+    CoquiTTS,
+    jaywalnut310 // OG VITS repo
 };
 
 }
 namespace EText2MelModel {
 enum Enum{
     FastSpeech2 = 0,
-    Tacotron2
+    Tacotron2,
+    VITS
 };
 
 }
@@ -44,14 +67,17 @@ enum Enum{
 namespace EVocoderModel{
 enum Enum{
     MultiBandMelGAN = 0,
-    MelGANSTFT // there is no architectural changes so we can use mb-melgan class for melgan-stft
+    MelGANSTFT, // there is no architectural changes so we can use mb-melgan class for melgan-stft
+    NullVocoder // For fully E2E models
 };
 }
 
+// ===========DEPRECATED===============
 // Negative numbers denote character-based language, positive for phoneme based. Standard is char-equivalent language idx = negative(phn-based)
 // In case of English, since -0 doesn't exist, we use -1.
 // For example, German phonetic would be 3, and character based would be -3
 // IPA-phn-based are mainly for Coqui
+// ===========DEPRECATED===============
 namespace ETTSLanguage{
 enum Enum{
   GermanChar = -3,
@@ -65,6 +91,23 @@ enum Enum{
 
 }
 
+/* Language Spec Standard V1:
+- Language is specified with a string from the JSON and the type is saved instead of relying
+on ETTSLanguage enum.
+-- The string is LanguageName-Method; for example English-StressedIPA, English-ARPA, German-Char
+- Both pre-V1 standard and current are supported
+- V1 Standard does not require changes in code to add new languages
+
+*/
+
+namespace ETTSLanguageType{
+enum Enum{
+    ARPA = 0,
+    Char,
+    IPA,
+    GlobalPhone
+};
+}
 
 
 struct ArchitectureInfo{
@@ -89,11 +132,11 @@ struct VoiceInfo{
 
   uint32_t SampleRate;
 
-  int32_t Language;
-  std::string s_Language;
-  std::string s_Language_Num;
+  std::string s_Language; // Language name = English-ARPA -> "English"
+  std::string s_Language_Fullname; // Full language name = "English-ARPA"
 
   std::string EndPadding;
+  int32_t LangType;
 
 
 
@@ -101,6 +144,7 @@ struct VoiceInfo{
 
 namespace VoxUtil {
 
+
     std::string U32ToStr(const std::u32string& InU32);
     std::u32string StrToU32(const std::string& InStr);
 
@@ -109,6 +153,28 @@ namespace VoxUtil {
     VoiceInfo ReadModelJSON(const std::string& InfoFilename);
 
 
+
+    // Copy PyTorch tensor
+
+    template<typename D>
+    TFTensor<D> CopyTensor(at::Tensor& InTens){
+        D* Data = InTens.data<D>();
+        std::vector<int64_t> Shape = InTens.sizes().vec();
+
+        size_t TotalSize = 1;
+
+        for (const int64_t& Dim : Shape)
+            TotalSize *= Dim;
+
+        std::vector<D> DataVec = std::vector<D>(Data,Data + TotalSize);
+
+        return TFTensor<D>{DataVec,Shape,TotalSize};
+
+
+    }
+
+
+    // Copy CppFlow (TF) tensor
 	template<typename F>
     TFTensor<F> CopyTensor(cppflow::tensor& InTens)
 	{
@@ -123,8 +189,8 @@ namespace VoxUtil {
 
 	}
 
-	template<typename V>
-	bool FindInVec(V In, const std::vector<V>& Vec, size_t& OutIdx, size_t start = 0) {
+    template<typename VXVec1>
+    bool FindInVec(VXVec1 In, const std::vector<VXVec1>& Vec, size_t& OutIdx, size_t start = 0) {
 		for (size_t xx = start;xx < Vec.size();xx++)
 		{
 			if (Vec[xx] == In) {
@@ -139,8 +205,8 @@ namespace VoxUtil {
 		return false;
 
 	}
-    template<typename V, typename X>
-    bool FindInVec2(V In, const std::vector<X>& Vec, size_t& OutIdx, size_t start = 0) {
+    template<typename VXVec1, typename X>
+    bool FindInVec2(VXVec1 In, const std::vector<X>& Vec, size_t& OutIdx, size_t start = 0) {
         for (size_t xx = start;xx < Vec.size();xx++)
         {
             if (Vec[xx] == In) {
diff --git a/g2p_train/train_and_export.py b/g2p_train/train_and_export.py
index 9326eba..0f04618 100644
--- a/g2p_train/train_and_export.py
+++ b/g2p_train/train_and_export.py
@@ -145,7 +145,7 @@ def main():
     txtpadded, phnpadded, txtsize, phnsize, phn_wi, txt_wi, words, phns = preprocess(args.dict_path,args.char_tok_phn)
     
     yf = open(args.config_path,"r")
-    config = yaml.load(yf)
+    config = yaml.load(yf,Loader=yaml.FullLoader)
     yf.close()
 
     print("Finished preprocessing. Getting model")
diff --git a/mainwindow.cpp b/mainwindow.cpp
index 08bb150..96d5ef5 100644
--- a/mainwindow.cpp
+++ b/mainwindow.cpp
@@ -589,6 +589,8 @@ void MainWindow::PlayBuffer(QBuffer *pBuff,bool ByUser, int32_t RowID)
 
     if (MelSpec.Shape[0] != -1)
         PlotSpec(MelSpec,( ((float)NumSamples) / ((float)CommonSampleRate)));
+    else
+        ui->tabMetrics->setTabEnabled(1,false);
 
 
 
@@ -776,7 +778,7 @@ void MainWindow::ProcessCurlies(QString &ModTxt)
 
 
         // Curlie processing not supported in IPA
-        if (GetCurrentVoice()->GetInfo().s_Language.find("IPA") != std::string::npos)
+        if (GetCurrentVoice()->GetInfo().LangType == ETTSLanguageType::IPA)
         {
             QMessageBox::critical((QWidget*)pDarkFw,"Warning","Curly brace phonetic text input processing not supported in IPA");
 
@@ -798,9 +800,9 @@ void MainWindow::ProcessCurlies(QString &ModTxt)
 
 
 
-            // Only English requires all phn input to be uppercase
+            // Only ARPA requires all phn input to be uppercase
 
-            if (GetCurrentVoice()->GetInfo().Language == 0)
+            if (GetCurrentVoice()->GetInfo().LangType == ETTSLanguageType::ARPA)
                 Tk = Tk.toUpper();
 
             NewTokens.push_back("@" + Tk);
@@ -920,8 +922,12 @@ void MainWindow::on_btnLoad_clicked()
     LogiLedFlashLighting(0,100,100,5000,500);
 
 
-    if (VoMan[VoID]->GetInfo().Architecture.Text2Mel != EText2MelModel::Tacotron2)
-        ui->tabMetrics->setTabEnabled(2,false);
+    if (VoMan[VoID]->GetInfo().Architecture.Text2Mel == EText2MelModel::FastSpeech2)
+        ui->tabMetrics->setTabEnabled(2,false); // FS2 has no attention
+
+    if (VoMan[VoID]->GetInfo().Architecture.Text2Mel == EText2MelModel::VITS)
+        ui->tabMetrics->setTabEnabled(1,false); // VITS has no mel
+
 
 
 
@@ -1187,10 +1193,19 @@ void MainWindow::HandleIsMultiSpeaker(size_t inVid)
 
 
     ArchitectureInfo Inf = CurrentVoice.GetInfo().Architecture;
-    if (Inf.Text2Mel == EText2MelModel::FastSpeech2)
+    if (Inf.Text2Mel == EText2MelModel::FastSpeech2 || Inf.Text2Mel == EText2MelModel::VITS)
     {
         ui->grpFs2Params->show();
-        ui->chkBiPad->setEnabled(true);
+
+
+        bool IsFs2 = Inf.Text2Mel == EText2MelModel::FastSpeech2;
+
+        ui->SubEnergy_2->setVisible(IsFs2);
+        ui->SubF0_2->setVisible(IsFs2);
+
+        ui->chkBiPad->setEnabled(IsFs2);
+
+
     }
     else
     {
@@ -1274,7 +1289,7 @@ void MainWindow::on_actionOverrides_triggered()
 
     }
 
-    if (VoMan[CurrentIndex]->GetInfo().Language < 0){
+    if (VoMan[CurrentIndex]->GetInfo().LangType == ETTSLanguageType::Char){
         QMessageBox::critical(FwParent,"Error","Phonetic overrides dictionary is not available for character-based models. Please use a phoneme-based model.");
         return;
 
@@ -1289,7 +1304,7 @@ void MainWindow::on_actionOverrides_triggered()
 
     PhdDialog Dlg(FwParent);
     Dlg.Entrs = PhonDict.Entries;
-    Dlg.CurrentLang = VoMan[CurrentIndex]->GetInfo().s_Language;
+    Dlg.CurrentLang = VoMan[CurrentIndex]->GetInfo().s_Language_Fullname;
 
     FDlg.setContent(&Dlg);
     FDlg.ContentDlg(&Dlg);
@@ -1316,6 +1331,7 @@ void MainWindow::SetDict()
     VoMan.SetDict(PhonDict.Entries);
     for (Voice*& Vo : VoMan.GetVoices())
     {
+
         Vo->SetDictEntries(PhonDict.Entries);
 
     }
@@ -1528,6 +1544,12 @@ void MainWindow::on_tabMetrics_currentChanged(int index)
         ui->tabMetrics->setMinimumHeight(150);
 
 
+    }
+    if (index == 2)
+    {
+        ui->tabMetrics->setSizePolicy(QSizePolicy::Policy::Expanding,QSizePolicy::Policy::Expanding);
+        ui->tabMetrics->setMinimumHeight(225);
+
     }
 
     update();
@@ -1565,6 +1587,7 @@ void MainWindow::PlotSpec(const TFTensor<float> &InMel,float TimeInSecs)
 {
     UpdateIfDoSlides();
     ui->widSpec->DoPlot(InMel,TimeInSecs);
+    ui->tabMetrics->setTabEnabled(1,true);
 
 }
 
@@ -1584,7 +1607,7 @@ void MainWindow::on_actExAtt_triggered()
 {
     if (!ui->tabMetrics->isTabEnabled(2))
     {
-        QMessageBox::critical(FwParent,"Error","There is no attention map to export. Only Tacotron 2 models generate alignment.");
+        QMessageBox::critical(FwParent,"Error","There is no attention map to export. Only Tacotron 2 or VITS models generate alignment.");
         return;
 
 
@@ -1602,6 +1625,13 @@ void MainWindow::on_actExAtt_triggered()
 
 void MainWindow::on_actExSpec_triggered()
 {
+    if (!ui->tabMetrics->isTabEnabled(1))
+    {
+        QMessageBox::critical(FwParent,"Error","There is no spectrogram to export.");
+        return;
+
+
+    }
 
     QString ofname = QFileDialog::getSaveFileName(FwParent, tr("Export PNG file"), "Spect", tr("PNG image (*.png)"));
     if (!ofname.size())
diff --git a/mainwindow.ui b/mainwindow.ui
index 2122d4b..eb4a4af 100644
--- a/mainwindow.ui
+++ b/mainwindow.ui
@@ -115,108 +115,144 @@
           </item>
           <item>
            <widget class="QGroupBox" name="grpFs2Params">
+            <property name="sizePolicy">
+             <sizepolicy hsizetype="Preferred" vsizetype="Minimum">
+              <horstretch>0</horstretch>
+              <verstretch>4</verstretch>
+             </sizepolicy>
+            </property>
             <property name="title">
-             <string>FastSpeech2 Parameters</string>
+             <string>Control Parameters</string>
             </property>
             <layout class="QVBoxLayout" name="verticalLayout_5">
+             <property name="spacing">
+              <number>1</number>
+             </property>
+             <property name="sizeConstraint">
+              <enum>QLayout::SetMinimumSize</enum>
+             </property>
              <item>
-              <layout class="QHBoxLayout" name="horizontalLayout">
-               <item>
-                <widget class="QLabel" name="label">
-                 <property name="text">
-                  <string>Energy</string>
-                 </property>
-                </widget>
-               </item>
-               <item>
-                <widget class="QSlider" name="sliEnergy">
-                 <property name="maximum">
-                  <number>200</number>
-                 </property>
-                 <property name="value">
-                  <number>100</number>
-                 </property>
-                 <property name="orientation">
-                  <enum>Qt::Horizontal</enum>
-                 </property>
-                </widget>
-               </item>
-               <item>
-                <widget class="QLabel" name="lblEnergyShow">
-                 <property name="text">
-                  <string>100%</string>
-                 </property>
-                </widget>
-               </item>
-              </layout>
+              <widget class="QWidget" name="SubEnergy_2" native="true">
+               <property name="sizePolicy">
+                <sizepolicy hsizetype="Preferred" vsizetype="Minimum">
+                 <horstretch>0</horstretch>
+                 <verstretch>0</verstretch>
+                </sizepolicy>
+               </property>
+               <layout class="QHBoxLayout" name="SubEnergy">
+                <item>
+                 <widget class="QLabel" name="label">
+                  <property name="text">
+                   <string>Energy</string>
+                  </property>
+                 </widget>
+                </item>
+                <item>
+                 <widget class="QSlider" name="sliEnergy">
+                  <property name="maximum">
+                   <number>200</number>
+                  </property>
+                  <property name="value">
+                   <number>100</number>
+                  </property>
+                  <property name="orientation">
+                   <enum>Qt::Horizontal</enum>
+                  </property>
+                 </widget>
+                </item>
+                <item>
+                 <widget class="QLabel" name="lblEnergyShow">
+                  <property name="text">
+                   <string>100%</string>
+                  </property>
+                 </widget>
+                </item>
+               </layout>
+              </widget>
              </item>
              <item>
-              <layout class="QHBoxLayout" name="horizontalLayout_4">
-               <item>
-                <widget class="QLabel" name="label_2">
-                 <property name="text">
-                  <string>Speed </string>
-                 </property>
-                </widget>
-               </item>
-               <item>
-                <widget class="QSlider" name="sliSpeed">
-                 <property name="maximum">
-                  <number>200</number>
-                 </property>
-                 <property name="value">
-                  <number>100</number>
-                 </property>
-                 <property name="orientation">
-                  <enum>Qt::Horizontal</enum>
-                 </property>
-                </widget>
-               </item>
-               <item>
-                <widget class="QLabel" name="lblSpeedShow">
-                 <property name="text">
-                  <string>100%</string>
-                 </property>
-                </widget>
-               </item>
-              </layout>
+              <widget class="QWidget" name="widget" native="true">
+               <property name="sizePolicy">
+                <sizepolicy hsizetype="Preferred" vsizetype="Minimum">
+                 <horstretch>0</horstretch>
+                 <verstretch>0</verstretch>
+                </sizepolicy>
+               </property>
+               <layout class="QHBoxLayout" name="horizontalLayout_4">
+                <item>
+                 <widget class="QLabel" name="label_2">
+                  <property name="text">
+                   <string>Speed </string>
+                  </property>
+                 </widget>
+                </item>
+                <item>
+                 <widget class="QSlider" name="sliSpeed">
+                  <property name="maximum">
+                   <number>200</number>
+                  </property>
+                  <property name="value">
+                   <number>100</number>
+                  </property>
+                  <property name="orientation">
+                   <enum>Qt::Horizontal</enum>
+                  </property>
+                 </widget>
+                </item>
+                <item>
+                 <widget class="QLabel" name="lblSpeedShow">
+                  <property name="text">
+                   <string>100%</string>
+                  </property>
+                 </widget>
+                </item>
+               </layout>
+              </widget>
              </item>
              <item>
-              <layout class="QHBoxLayout" name="horizontalLayout_6">
-               <item>
-                <widget class="QLabel" name="label_6">
-                 <property name="sizePolicy">
-                  <sizepolicy hsizetype="Maximum" vsizetype="Preferred">
-                   <horstretch>0</horstretch>
-                   <verstretch>0</verstretch>
-                  </sizepolicy>
-                 </property>
-                 <property name="text">
-                  <string>F0       </string>
-                 </property>
-                </widget>
-               </item>
-               <item>
-                <widget class="QSlider" name="sliF0">
-                 <property name="maximum">
-                  <number>200</number>
-                 </property>
-                 <property name="value">
-                  <number>100</number>
-                 </property>
-                 <property name="orientation">
-                  <enum>Qt::Horizontal</enum>
-                 </property>
-                </widget>
-               </item>
-               <item>
-                <widget class="QLabel" name="lblF0Show">
-                 <property name="text">
-                  <string>100%</string>
-                 </property>
-                </widget>
-               </item>
-              </layout>
+              <widget class="QWidget" name="SubF0_2" native="true">
+               <property name="sizePolicy">
+                <sizepolicy hsizetype="Preferred" vsizetype="Minimum">
+                 <horstretch>0</horstretch>
+                 <verstretch>0</verstretch>
+                </sizepolicy>
+               </property>
+               <layout class="QHBoxLayout" name="SubF0">
+                <item>
+                 <widget class="QLabel" name="label_6">
+                  <property name="sizePolicy">
+                   <sizepolicy hsizetype="Maximum" vsizetype="Preferred">
+                    <horstretch>0</horstretch>
+                    <verstretch>0</verstretch>
+                   </sizepolicy>
+                  </property>
+                  <property name="text">
+                   <string>F0       </string>
+                  </property>
+                 </widget>
+                </item>
+                <item>
+                 <widget class="QSlider" name="sliF0">
+                  <property name="maximum">
+                   <number>200</number>
+                  </property>
+                  <property name="value">
+                   <number>100</number>
+                  </property>
+                  <property name="orientation">
+                   <enum>Qt::Horizontal</enum>
+                  </property>
+                 </widget>
+                </item>
+                <item>
+                 <widget class="QLabel" name="lblF0Show">
+                  <property name="text">
+                   <string>100%</string>
+                  </property>
+                 </widget>
+                </item>
+               </layout>
+              </widget>
              </item>
             </layout>
            </widget>
@@ -339,7 +375,7 @@
             <property name="sizePolicy">
              <sizepolicy hsizetype="Expanding" vsizetype="Expanding">
               <horstretch>4</horstretch>
-              <verstretch>9</verstretch>
+              <verstretch>14</verstretch>
              </sizepolicy>
             </property>
             <property name="acceptRichText">
@@ -435,7 +471,7 @@
             <property name="sizePolicy">
              <sizepolicy hsizetype="Expanding" vsizetype="Preferred">
               <horstretch>0</horstretch>
-              <verstretch>1</verstretch>
+              <verstretch>4</verstretch>
              </sizepolicy>
             </property>
             <property name="currentIndex">
diff --git a/melgen.h b/melgen.h
index 0c74563..f4dc15b 100644
--- a/melgen.h
+++ b/melgen.h
@@ -1,18 +1,21 @@
 #ifndef MELGEN_H
 #define MELGEN_H
 
+
+
 #include "ext/CppFlow/ops.h"
 #include "ext/CppFlow/model.h"
-
 #include "VoxCommon.hpp"
+
 #include <memory>
 
 // MelGen: base virtual class for mel generators
 class MelGen
 {
 private:
-    ETTSRepo::Enum CurrentRepo;
+
 public:
+    ETTSRepo::Enum CurrentRepo;
     MelGen();
     MelGen(const std::string& SavedModelFolder,ETTSRepo::Enum InTTSRepo);
 
diff --git a/modelinfodlg.cpp b/modelinfodlg.cpp
index 02d3d01..c97e8b4 100644
--- a/modelinfodlg.cpp
+++ b/modelinfodlg.cpp
@@ -23,7 +23,12 @@ void ModelInfoDlg::SetInfo(const QString &ModelName, const QString &Info, int32_
 
     ui->lblModelTitle->setText(ModelName);
 
-    ui->lblModelArchitecture->setText("Architecture: " + Repo + " " + MelGen + " & " + Vocoder);
+    QString ArchShow = "Architecture: " + Repo + " " + MelGen;
+
+    if (Vocoder.size())
+        ArchShow += " & " + Vocoder;
+
+    ui->lblModelArchitecture->setText(ArchShow);
     ui->lblSampleRate->setText("Sampling rate: " + QString::number(SampleRate / 1000) + "KHz");
 
     QString ImgPath = QApplication::applicationDirPath() + "/models/" + ModelName + "/image.png";
diff --git a/phonemizer.cpp b/phonemizer.cpp
index 0a76a98..f7ea486 100644
--- a/phonemizer.cpp
+++ b/phonemizer.cpp
@@ -157,12 +157,11 @@ Phonemizer::Phonemizer()
 
 }
 
-bool Phonemizer::Initialize(const std::string InPath, const std::string &NLangName,bool Minimal)
+bool Phonemizer::Initialize(const std::string InPath, bool Minimal)
 {
     IsMinimal = Minimal;
 
-    NumTxt.load(NLangName,InPath + "/" + NLangName + ".sor");
-    NumTxtLang = NLangName;
+
 
     // Load char indices
     CharId = GetDelimitedFile(InPath + "/char2id.txt");
diff --git a/phonemizer.h b/phonemizer.h
index afb4fc3..feb35e9 100644
--- a/phonemizer.h
+++ b/phonemizer.h
@@ -4,7 +4,6 @@
 #include <tuple>
 #include <set>
 #include <algorithm>
-#include <Numbertext.hxx>
 
 struct IdStr{
     int32_t ID;
@@ -30,7 +29,6 @@ class Phonemizer
 
     std::vector<VBucket> DictBuckets;
 
-    Numbertext NumTxt;
     std::string NumTxtLang;
 
     bool IsMinimal;
@@ -54,7 +52,7 @@ class Phonemizer
 
     std::string PhnLanguage;
 public:
-    int32_t PhnLangID;
+    std::string PhnLangID;
 public:
     Phonemizer();
     /*
@@ -65,14 +63,14 @@ class Phonemizer
      * -- phn2id.txt: Translation from output ID from the model to phoneme
      * - A model/ folder where a G2P-Tensorflow model was saved as SavedModel
      * - dict.txt: Phonetic dictionary. First it searches the word there and if it can't be found then it uses the model.
-     * - (two-char name).sor: Name of num2text.
+     *
      *
      * If Minimal == true, it only requires the .sor and char2id (for determining allowed graphemes only,
      * the IDs can be arbitrary in this case)
      * A Minimal phonemizer only serves to hold values useful to the processor and tokenizer, for char-based models.
 
     */
-    bool Initialize(const std::string InPath, const std::string& NLangName, bool Minimal);
+    bool Initialize(const std::string InPath, bool Minimal);
 
 
     std::string ProcessWord(const std::string& InWord, float Temperature = 0.1f);
@@ -81,8 +79,6 @@ class Phonemizer
 
     std::string GetGraphemeChars();
 
-    inline Numbertext& GetNumTxt() {return NumTxt;}
-
     ~Phonemizer();
 
     inline const std::string& GetNumTxtLang() {return NumTxtLang;}
diff --git a/phoneticdict.cpp b/phoneticdict.cpp
index 484766e..dee09e6 100644
--- a/phoneticdict.cpp
+++ b/phoneticdict.cpp
@@ -1,5 +1,18 @@
 #include "phoneticdict.h"
 #include "ext/ZFile.h"
+#include <map>
+
+const std::map<std::string,std::string> LegToV1{
+  {"English","English-ARPA"},
+  {"Spanish","Spanish-GlobalPhone"}
+};
+
+void AutoConvertToV1(std::string& LangStr){
+   auto It = LegToV1.find(LangStr);
+   if (It != LegToV1.end())
+       LangStr = It->second;
+
+}
 
 ZFILE_IOVR(DictEntry,inentr){
     right << inentr.Word;
@@ -12,6 +25,9 @@ ZFILE_OOVR(DictEntry,entr){
     right >> entr.Word;
     right >> entr.PhSpelling;
     right >> entr.Language;
+
+    AutoConvertToV1(entr.Language);
+
     return right;
 
 }
diff --git a/vits.cpp b/vits.cpp
new file mode 100644
index 0000000..d238b12
--- /dev/null
+++ b/vits.cpp
@@ -0,0 +1,80 @@
+#include "vits.h"
+
+
+std::vector<int64_t> VITS::ZeroPadVec(const std::vector<int32_t> &InIDs)
+{
+    std::vector<int64_t> NewIDs;
+    NewIDs.reserve(InIDs.size() * 2);
+
+    for (auto CharID : InIDs)
+    {
+        NewIDs.push_back(0);
+        NewIDs.push_back((int64_t)CharID);
+
+    }
+    // Add final 0
+    NewIDs.push_back(0);
+
+    return NewIDs;
+
+}
+
+VITS::VITS()
+{
+
+}
+
+bool VITS::Initialize(const std::string &SavedModelFolder, ETTSRepo::Enum InTTSRepo)
+{
+    try {
+        // Deserialize the ScriptModule from a file using torch::jit::load().
+
+        Model = torch::jit::load(SavedModelFolder);
+
+    }
+    catch (const c10::Error& e) {
+        return false;
+
+    }
+
+    CurrentRepo = InTTSRepo;
+    return true;
+}
+
+TFTensor<float> VITS::DoInference(const std::vector<int32_t> &InputIDs, const std::vector<float> &ArgsFloat, const std::vector<int32_t> ArgsInt, int32_t SpeakerID, int32_t EmotionID)
+{
+    std::vector<int64_t> PaddedIDs = ZeroPadVec(InputIDs);
+    std::vector<int64_t> inLen = { (int64_t)PaddedIDs.size() };
+
+
+    // ZDisket: Is this really necessary?
+    torch::TensorOptions Opts = torch::TensorOptions().requires_grad(false);
+
+    auto InIDS = torch::tensor(PaddedIDs, Opts).unsqueeze(0);
+    auto InLens = torch::tensor(inLen, Opts);
+    auto InLenScale = torch::tensor({ ArgsFloat[0]}, Opts);
+
+
+    std::vector<torch::jit::IValue> inputs{ InIDS,InLens,InLenScale };
+
+    // Infer
+
+    c10::IValue Output = Model.get_method("infer_ts")(inputs);
+
+    // Output = tuple (audio,att)
+
+    auto OutputT = Output.toTuple();
+
+    // Grab audio
+    // [1, frames] -> [frames]
+    auto AuTens = OutputT.get()->elements()[0].toTensor().squeeze();
+
+    // Grab Attention
+    // [1, 1, x, y] -> [x, y] -> [y,x] -> [1, y, x]
+    auto AttTens = OutputT.get()->elements()[1].toTensor().squeeze().transpose(0,1).unsqueeze(0);
+
+    Attention = VoxUtil::CopyTensor<float>(AttTens);
+
+    return VoxUtil::CopyTensor<float>(AuTens);
+
+}
diff --git a/vits.h b/vits.h
new file mode 100644
index 0000000..d2fc766
--- /dev/null
+++ b/vits.h
@@ -0,0 +1,47 @@
+#ifndef VITS_H
+#define VITS_H
+
+
+#include "melgen.h"
+
+
+
+
+
+// VITS is a fully E2E model; no separate vocoder needed
+class VITS : public MelGen
+{
+private:
+    torch::jit::script::Module Model;
+
+    // Most VITS model require zero-interspersed input IDs
+    std::vector<int64_t> ZeroPadVec(const std::vector<int32_t>& InIDs);
+
+public:
+    TFTensor<float> Attention;
+
+    VITS();
+
+    // Since VITS runs on PyTorch, we override the loader
+    /*
+    Initialize and load the model
+
+    -> SavedModelFolder: Not a folder, but path to TorchScripted .pt file
+    <- Returns: (bool)Success
+    */
+    virtual bool Initialize(const std::string& SavedModelFolder, ETTSRepo::Enum InTTSRepo);
+
+
+    /*
+    Do inference on a VITS model.
+
+    -> InputIDs: Input IDs of tokens for inference
+    -> SpeakerID: ID of the speaker in the model to do inference on. If single speaker, always leave at 0. If multispeaker, refer to your model.
+    -> ArgsFloat[0]: Length scale.
+
+    <- Returns: TFTensor<float> with shape {frames} of audio data
+    */
+    TFTensor<float> DoInference(const std::vector<int32_t>& InputIDs,const std::vector<float>& ArgsFloat,const std::vector<int32_t> ArgsInt, int32_t SpeakerID = 0, int32_t EmotionID = -1);
+};
+
+#endif // VITS_H
diff --git a/voicemanager.cpp b/voicemanager.cpp
index 808cdfc..f8a4415 100644
--- a/voicemanager.cpp
+++ b/voicemanager.cpp
@@ -2,12 +2,12 @@
 #define SAFE_DELETE(pdel)if (pdel){delete pdel;}
 #include <QCoreApplication>
 
-Phonemizer* VoiceManager::LoadPhonemizer(const QString& InPhnLang,const QString& InNumberLang,int32_t InLangNum)
+Phonemizer* VoiceManager::LoadPhonemizer(const QString& InPhnLang,int32_t InLangNum)
 {
 
     for (Phonemizer*& Phn : Phonemizers)
     {
-       if (Phn->PhnLangID == InLangNum)
+       if (Phn->GetPhnLanguage() == InPhnLang.toStdString())
            return Phn;
 
 
@@ -18,10 +18,10 @@ Phonemizer* VoiceManager::LoadPhonemizer(const QString& InPhnLang,const QString&
 
     // Initialize regularly or minimally
     CreatePhn->Initialize(QString(QCoreApplication::applicationDirPath() + "/g2p/" + InPhnLang).toStdString(),
-                          InNumberLang.toStdString(), InLangNum < 0);
+                          InLangNum == ETTSLanguageType::Char);
 
     CreatePhn->SetPhnLanguage(InPhnLang.toStdString());
-    CreatePhn->PhnLangID = InLangNum;
+
 
     Phonemizers.push_back(CreatePhn);
 
@@ -34,9 +34,13 @@ size_t VoiceManager::LoadVoice(const QString &Voname)
 {
     Voice* NuVoice = new Voice(QString(QCoreApplication::applicationDirPath() + "/models/" + Voname).toStdString(),Voname.toStdString(),nullptr);
 
-    QString PLang = QString::fromStdString(NuVoice->GetInfo().s_Language);
-    QString NLang = QString::fromStdString(NuVoice->GetInfo().s_Language_Num);
-    NuVoice->AddPhonemizer(LoadPhonemizer(PLang,NLang,NuVoice->GetInfo().Language));
+    QString PLang = QString::fromStdString(NuVoice->GetInfo().s_Language_Fullname);
+    NuVoice->AddPhonemizer(LoadPhonemizer(PLang,NuVoice->GetInfo().LangType));
+
+    std::string NumTxtPath = QString(QCoreApplication::applicationDirPath() + "/num2txt/" +
+                                     QString::fromStdString(NuVoice->GetInfo().s_Language) + ".sor").toStdString();
+
+    NuVoice->LoadNumberText(NumTxtPath);
 
     Voices.push_back(NuVoice);
     Voices[Voices.size() - 1]->SetDictEntries(ManDict);
diff --git a/voicemanager.h b/voicemanager.h
index 8ca1320..cc740ee 100644
--- a/voicemanager.h
+++ b/voicemanager.h
@@ -12,7 +12,7 @@ class VoiceManager
 
     std::vector<Phonemizer*> Phonemizers;
 
-    Phonemizer* LoadPhonemizer(const QString& InPhnLang, const QString& InNumberLang, int32_t InLangNum);
+    Phonemizer* LoadPhonemizer(const QString& InPhnLang, int32_t InLangNum);