-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetect_candidate_scenes_task.py
119 lines (98 loc) · 3.86 KB
/
detect_candidate_scenes_task.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import argparse
from utils.configs.class_choices import ClassChoices
from modules.scene_detection import PySceneDetector
from modules.scene_detection.abs_scene_detector import AbsSceneDetector
from modules.face_detection import DSFDFaceDetector
from modules.face_detection import RetinaFaceDetector
from modules.face_detection.abs_face_detector import AbsFaceDetector
from modules.face_alignment import FANAligner
from modules.face_alignment.abs_face_aligner import AbsFaceAligner
from modules.active_speaker_detection import TalkNetASD
from modules.active_speaker_detection.abs_active_speaker_detector import AbsASD
from modules.automatic_speech_recognition import WhisperASR
from modules.automatic_speech_recognition.abs_automatic_speech_recognizer import AbsASR
from pipelines import AnnoTheiaPipeline
from pipelines.abs_pipeline import AbsPipeline
scene_detection_choices = ClassChoices( name="scene_detection", classes=dict(
pyscenedetect=PySceneDetector,
),
type_check=AbsSceneDetector,
default="pyscenedetect",
)
face_detection_choices = ClassChoices(
name="face_detection",
classes=dict(
dsfd=DSFDFaceDetector,
retina=RetinaFaceDetector,
),
type_check=AbsFaceDetector,
default="dsfd",
)
face_alignment_choices = ClassChoices(
name="face_alignment",
classes=dict(
fan=FANAligner,
),
type_check=AbsFaceAligner,
default=None,
optional=True
)
active_speaker_detection_choices = ClassChoices(
name="active_speaker_detection",
classes=dict(
talknet=TalkNetASD,
),
type_check=AbsASD,
default="talknet",
)
automatic_speech_recognition_choices = ClassChoices(
name="automatic_speech_recognition",
classes=dict(
whisper=WhisperASR,
),
type_check=AbsASR,
default="whisper",
)
pipeline_choices = ClassChoices(
name="pipeline",
classes=dict(
annotheia=AnnoTheiaPipeline,
),
type_check=AbsPipeline,
default="annotheia",
)
class DetectCandidateScenesTask():
@classmethod
def build_pipeline(self, args: argparse.Namespace) -> AnnoTheiaPipeline:
# 1. Scene Detection
scene_detection_class = scene_detection_choices.get_class(args.scene_detection)
scene_detection = scene_detection_class(**args.scene_detection_conf)
# 2. Face Detection
face_detection_class = face_detection_choices.get_class(args.face_detection)
face_detection = face_detection_class(**args.face_detection_conf)
# 3. Face Alignment
if getattr(args, "face_alignment", None) is not None:
face_alignment_class = face_alignment_choices.get_class(args.face_alignment)
face_alignment = face_alignment_class(**args.face_alignment_conf)
else:
face_alignment = None
# 4. Active Speaker Detection
active_speaker_detection_class = active_speaker_detection_choices.get_class(args.active_speaker_detection)
active_speaker_detection = active_speaker_detection_class(**args.active_speaker_detection_conf)
# 5. Automatic Speech Recognition
automatic_speech_recognition_class = automatic_speech_recognition_choices.get_class(args.automatic_speech_recognition)
automatic_speech_recognition = automatic_speech_recognition_class(**args.automatic_speech_recognition_conf)
# 6. Building AnnoTheia Pipeline
try:
pipeline_class = pipeline_choices.get_class(args.pipeline)
except AttributeError:
pipeline_class = pipeline_choices.get_class("annotheia")
pipeline = pipeline_class(
scene_detection=scene_detection,
face_detection=face_detection,
face_alignment=face_alignment,
active_speaker_detection=active_speaker_detection,
automatic_speech_recognition=automatic_speech_recognition,
**args.pipeline_conf,
)
return pipeline