Skip to content

Commit

Permalink
add new criterion
Browse files Browse the repository at this point in the history
  • Loading branch information
arhihihipov committed May 1, 2024
1 parent ce30af1 commit d12b785
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 3 deletions.
84 changes: 84 additions & 0 deletions app/criteria/comparison_whole_speech/criterion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from bson import ObjectId

from app.root_logger import get_root_logger
from app.localisation import *
from ..criterion_base import BaseCriterion
from ..criterion_result import CriterionResult
from app.audio import Audio
from app.presentation import Presentation
from app.utils import normalize_text
from ..text_comparison import Doc2VecEvaluator

logger = get_root_logger('web')


class ComparisonWholeSpeechCriterion(BaseCriterion):
PARAMETERS = dict(
vector_size=int.__name__,
window=int.__name__,
min_count=int.__name__,
workers=int.__name__,
epochs=int.__name__,
dm=int.__name__,
)

def __init__(self, parameters, dependent_criteria, name=''):
super().__init__(
name=name,
parameters=parameters,
dependent_criteria=dependent_criteria,
)
vector_size = self.parameters['vector_size']
window = self.parameters['window']
min_count = self.parameters['min_count']
workers = self.parameters['workers']
epochs = self.parameters['epochs']
dm = self.parameters['dm']

self.model = Doc2VecEvaluator(vector_size, window, min_count, workers, epochs, dm)

@property
def description(self):
return {
"Критерий": t(self.name),
"Описание": t("Проверяет, что тема доклада студента совпадает с темой презентации"),
"Оценка": t(
"1, если тема доклада и презентации совпадают не менее, чем на 40%, иначе 2.5 * k, где k - степень соответствия темы доклада теме презентации")
}

def apply(self, audio: Audio, presentation: Presentation, training_id: ObjectId,
criteria_results: dict) -> CriterionResult:
normalized_speech = []
normalized_slides = []

for i in range(len(audio.audio_slides)):
# Список сказанных на слайде слов
current_slide_speech = audio.audio_slides[i].recognized_words
# Очистка списка от timestamp-ов и probability
current_slide_speech = list(map(lambda x: x.word.value, current_slide_speech))
# Нормализация текста
current_slide_speech = " ".join(normalize_text(current_slide_speech))
if current_slide_speech != "":
normalized_speech.append(current_slide_speech)

# Текст из слайда презентации
current_slide_text = presentation.slides[i].words
# Нормализация текста слайда
current_slide_text = " ".join(normalize_text(current_slide_text.split()))
if current_slide_text != "":
normalized_slides.append(current_slide_text)

if len(normalized_speech) == 0:
return CriterionResult(0, "Тренажер не зафиксировал, что вы что-то говорили")
normalized_speech_text = " ".join(normalized_speech)

if len(normalized_slides) == 0:
return CriterionResult(0, "Загруженная вами презентация не содержит текста")
normalized_slides_text = " ".join(normalized_slides)

self.model.train_model([normalized_speech_text, normalized_slides_text])

score = 2.5 * self.model.evaluate_semantic_similarity(normalized_speech_text, normalized_slides_text)
logger.info(f"Score={score}")
return CriterionResult(1 if score >= 1 else score,
"Ваша речь соответствует тексту презентации" if score >= 1 else "Ваша речь не полностью соответствует теме презентации")
1 change: 1 addition & 0 deletions app/criteria/criterions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
from .speech_pace.criterion import SpeechPaceCriterion
from .strict_speech_duration.criterion import StrictSpeechDurationCriterion
from .comparison_speech_slides.criterion import ComparisonSpeechSlidesCriterion
from .comparison_whole_speech.criterion import ComparisonWholeSpeechCriterion
16 changes: 15 additions & 1 deletion app/criteria/preconfigured_criterions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

from criteria import (FillersNumberCriterion, FillersRatioCriterion,
SpeechIsNotInDatabaseCriterion, SpeechPaceCriterion,
StrictSpeechDurationCriterion, ComparisonSpeechSlidesCriterion)
StrictSpeechDurationCriterion, ComparisonSpeechSlidesCriterion,
ComparisonWholeSpeechCriterion)

from .utils import DEFAULT_FILLERS
from .utils import DEFAULT_SKIP_SLIDES
Expand Down Expand Up @@ -149,6 +150,19 @@
name="ComparisonSpeechSlidesCriterion",
parameters={"skip_slides": DEFAULT_SKIP_SLIDES},
dependent_criteria=[],
),

ComparisonWholeSpeechCriterion(
name="ComparisonWholeSpeechCriterion",
parameters={
"vector_size": 200,
"window": 5,
"min_count": 3,
"workers": 4,
"epochs": 40,
"dm": 0
},
dependent_criteria=[],
)

]
Expand Down
20 changes: 20 additions & 0 deletions app/criteria/text_comparison.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models.doc2vec import Doc2Vec, TaggedDocument


class SlidesSimilarityEvaluator:
Expand All @@ -15,3 +16,22 @@ def evaluate_semantic_similarity(self, text1: str, text2: str) -> float:
similarity = cosine_similarity(vector1, vector2)[0][0]

return round(similarity, 3)


class Doc2VecEvaluator:
def __init__(self, vector_size: int, window: int, min_count: int, workers: int, epochs: int, dm: int):
self.model = Doc2Vec(vector_size=vector_size, window=window, min_count=min_count, workers=workers,
epochs=epochs, dm=dm)

def train_model(self, documents: list):
tagged_documents = [TaggedDocument(words=doc.split(), tags=[i]) for i, doc in enumerate(documents)]
self.model.build_vocab(tagged_documents)
self.model.train(tagged_documents, total_examples=self.model.corpus_count, epochs=self.model.epochs)

def evaluate_semantic_similarity(self, text1: str, text2: str) -> float:
text1 = text1.split()
text2 = text2.split()

similarity = self.model.wv.n_similarity(text1, text2)

return round(similarity, 3)
3 changes: 2 additions & 1 deletion app/criteria_pack/preconfigured_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
[['SimpleNumberSlidesCriterion', 0.05],
['SlidesCheckerCriterion', 0.95]],
'ComparisonPack':
[['ComparisonSpeechSlidesCriterion', 1]]
[['ComparisonSpeechSlidesCriterion', 0.5],
['ComparisonWholeSpeechCriterion', 0.5]]
}


Expand Down
2 changes: 1 addition & 1 deletion app/feedback_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json

from app.criteria import SpeechDurationCriterion, SpeechPaceCriterion, FillersRatioCriterion, FillersNumberCriterion, \
StrictSpeechDurationCriterion, ComparisonSpeechSlidesCriterion
StrictSpeechDurationCriterion, ComparisonSpeechSlidesCriterion, ComparisonWholeSpeechCriterion


class Feedback:
Expand Down

0 comments on commit d12b785

Please sign in to comment.