-
Notifications
You must be signed in to change notification settings - Fork 4
/
translate-multi.py
98 lines (77 loc) · 3.56 KB
/
translate-multi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import streamlit as st
import sentencepiece as spm
import ctranslate2
from nltk import sent_tokenize
def translate(source, translator, sp_source_model, sp_target_model):
"""Use CTranslate model to translate a sentence
Args:
source (str): Source sentences to translate
translator (object): Object of Translator, with the CTranslate2 model
sp_source_model (object): Object of SentencePieceProcessor, with the SentencePiece source model
sp_target_model (object): Object of SentencePieceProcessor, with the SentencePiece target model
Returns:
Translation of the source text
"""
source_sentences = sent_tokenize(source) # split sentences
source_tokenized = sp_source_model.encode(source_sentences, out_type=str)
translations = translator.translate_batch(source_tokenized, replace_unknowns=True)
translations = [translation[0]["tokens"] for translation in translations]
translations_detokenized = sp_target_model.decode(translations)
return translations_detokenized
# [Modify] File paths here to the CTranslate2 and SentencePiece models.
@st.cache(allow_output_mutation=True)
def load_models(lang_pair, device="cpu"):
"""Load CTranslate2 model and SentencePiece models
Args:
lang_pair (str): Language pair to load the models for
device (str): "cpu" (default) or "cuda"
Returns:
CTranslate2 Translator and SentencePieceProcessor objects to load the models
"""
if lang_pair == "English-to-French":
ct_model_path = "/path/to/your/ctranslate2/model/"
sp_source_model_path = "/path/to/your/sp_source.model"
sp_target_model_path = "/path/to/your/sp_target.model"
elif lang_pair == "French-to-English":
ct_model_path = "/path/to/your/ctranslate2/model/"
sp_source_model_path = "/path/to/your/sp_source.model"
sp_target_model_path = "/path/to/your/sp_target.model"
sp_source_model = spm.SentencePieceProcessor(sp_source_model_path)
sp_target_model = spm.SentencePieceProcessor(sp_target_model_path)
translator = ctranslate2.Translator(ct_model_path, device)
return translator, sp_source_model, sp_target_model
# Title for the page and nice icon
st.set_page_config(page_title="NMT", page_icon="🤖")
# Header
st.title("Translate")
# Form to add your items
with st.form("my_form"):
# Dropdown menu to select a language pair
lang_pair = st.selectbox("Select Language Pair",
("English-to-French", "French-to-English"))
# st.write('You selected:', lang_pair)
# Textarea to type the source text.
user_input = st.text_area("Source Text", max_chars=200)
sources = user_input.split("\n") # split on new line.
# Load models
translator, sp_source_model, sp_target_model = load_models(lang_pair, device="cpu")
# Translate with CTranslate2 model
translations = [translate(source, translator, sp_source_model, sp_target_model) for source in sources]
translations = [" ". join(translation) for translation in translations]
# Create a button
submitted = st.form_submit_button("Translate")
# If the button pressed, print the translation
if submitted:
st.write("Translation")
st.code("\n".join(translations))
# Optional Style
st.markdown(""" <style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
header {visibility: hidden;}
.reportview-container .main .block-container{
padding-top: 0rem;
padding-right: 0rem;
padding-left: 0rem;
padding-bottom: 0rem;
} </style> """, unsafe_allow_html=True)