-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 8f51408
Showing
21 changed files
with
1,410 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
.vscode | ||
tmp | ||
test_files | ||
test.ipynb | ||
test.py | ||
*.egg-info | ||
dist | ||
*.pyc | ||
__pycache__ | ||
pretrained |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
[build-system] | ||
requires = [ | ||
"setuptools>=42", | ||
"wheel" | ||
] | ||
build-backend = "setuptools.build_meta" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
[metadata] | ||
name = MMSA-FET | ||
version = 0.0.7 | ||
author = THUIAR | ||
description = A Tool for extracting multimodal features from videos. | ||
long_description = file: README.md | ||
long_description_content_type = text/markdown | ||
url = https://github.com/FlameSky-S/MMSA_FET | ||
project_urls = | ||
Bug Tracker = https://github.com/FlameSky-S/MMSA_FET/issues | ||
classifiers = | ||
Programming Language :: Python :: 3 | ||
License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+) | ||
Operating System :: OS Independent | ||
|
||
[options] | ||
package_dir = | ||
= src | ||
packages = find: | ||
python_requires = >=3.6 | ||
install_requires = | ||
torch >= 1.7.0 | ||
mediapipe >= 0.8.8.1 | ||
opencv-contrib-python >= 4.5.4.58 | ||
transformers >= 4.4.0 | ||
opensmile >= 2.2.0 | ||
librosa >= 0.8.1 | ||
ffmpeg-python >= 0.2.0 | ||
ffmpeg >= 4.2.2 | ||
numpy >= 1.20.3 | ||
|
||
[options.packages.find] | ||
where = src |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .main import FeatureExtractionTool |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
{ | ||
"audio": { | ||
"tool": "librosa", | ||
"sample_rate": null, | ||
"args": { | ||
"mfcc": { | ||
"n_mfcc": 20, | ||
"htk": true | ||
}, | ||
"rms": {}, | ||
"zero_crossing_rate": {}, | ||
"spectral_rolloff": {}, | ||
"spectral_centroid": {} | ||
} | ||
}, | ||
"video": { | ||
"tool": "mediapipe", | ||
"fps": 25, | ||
"args": { | ||
"face_mesh": { | ||
"refine_landmarks": true, | ||
"min_detection_confidence": 0.35, | ||
"min_tracking_confidence": 0.5 | ||
}, | ||
"visualize": true, | ||
"visualize_dir": "/home/mhs/Projects/MMSA-Feature-Extraction-Tool/tmp/out/" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
{ | ||
"audio": { | ||
"tool": "opensmile", | ||
"sample_rate": 48000, | ||
"args": { | ||
"feature_set": "ComParE_2016", | ||
"feature_level": "Functionals", | ||
"start": null, | ||
"end": null | ||
} | ||
}, | ||
"video": { | ||
"tool": "vggface", | ||
"fps": 25 | ||
}, | ||
"alignment": { | ||
"tool": "p2fa" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{ | ||
"audio": { | ||
"tool": "wav2vec", | ||
"sample_rate": 16000, | ||
"pretrained": "facebook/wav2vec2-base-960h" | ||
}, | ||
"video": { | ||
"tool": "mediapipe", | ||
"fps": 25, | ||
"args": { | ||
"holistic": { | ||
"model_complexity": 1, | ||
"smooth_landmarks": true, | ||
"enable_segmentation": true, | ||
"smooth_segmentation": true, | ||
"min_detection_confidence": 0.5, | ||
"min_tracking_confidence": 0.5 | ||
}, | ||
"visualize": true, | ||
"visualize_dir": "/home/mhs/Projects/MMSA-Feature-Extraction-Tool/tmp/out/" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from .audio.librosa import librosaExtractor | ||
from .audio.opensmile import opensmileExtractor | ||
from .audio.wave2vec import wav2vec2Extractor | ||
from .video.mediapipe import mediapipeExtractor | ||
from .video.vggface import vggfaceExtractor |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import numpy as np | ||
import librosa | ||
from ..baseExtractor import baseAudioExtractor | ||
|
||
|
||
class librosaExtractor(baseAudioExtractor): | ||
""" | ||
Audio feature extractor using librosa. | ||
Ref: https://librosa.org/doc/latest/index.html | ||
""" | ||
def __init__(self, config, logger): | ||
try: | ||
logger.info("Initializing librosa audio feature extractor.") | ||
super().__init__(config, logger) | ||
except Exception as e: | ||
self.logger.error("Failed to initialize librosaExtractor.") | ||
raise e | ||
|
||
def extract(self, file): | ||
""" | ||
Function: | ||
Extract features from audio file using librosa. | ||
Parameters: | ||
file: path to audio file | ||
Returns: | ||
audio_result: extracted audio features in numpy array | ||
""" | ||
try: | ||
y, sr = self.load_audio(file) | ||
res = {} | ||
for audio_feature in self.config['args'].keys(): | ||
kwargs = self.config['args'][audio_feature] | ||
method = getattr(librosa.feature, audio_feature) | ||
try: | ||
res[audio_feature] = method(y=y, sr=sr, **kwargs).T | ||
except TypeError: | ||
res[audio_feature] = method(y=y, **kwargs).T | ||
# concatenate all features | ||
audio_result = np.concatenate(list(res.values()), axis=1) | ||
return audio_result | ||
except Exception as e: | ||
self.logger.error(f"Failed to extract audio features with librosa from {file}.") | ||
raise e |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from ..baseExtractor import baseAudioExtractor | ||
import opensmile | ||
|
||
class opensmileExtractor(baseAudioExtractor): | ||
""" | ||
Audio feature extractor using openSMILE. | ||
Ref: https://github.com/audeering/opensmile-python | ||
""" | ||
def __init__(self, config, logger): | ||
try: | ||
logger.info("Initializing openSMILE audio feature extractor") | ||
super().__init__(config, logger) | ||
self.args = self.config['args'] | ||
self.extractor = opensmile.Smile( | ||
feature_set=eval(f"opensmile.FeatureSet.{self.args['feature_set']}"), | ||
feature_level=eval(f"opensmile.FeatureLevel.{self.args['feature_level']}"), | ||
num_workers=None, | ||
) | ||
except Exception as e: | ||
self.logger.error("Failed to initialize opensmileExtractor.") | ||
raise e | ||
|
||
def extract(self, file): | ||
""" | ||
Function: | ||
Extract features from audio file using openSMILE. | ||
Parameters: | ||
file: path to audio file | ||
Returns: | ||
audio_result: extracted audio features in numpy array | ||
""" | ||
try: | ||
y, sr = self.load_audio(file) | ||
audio_result = self.extractor.process_signal( | ||
signal = y, | ||
sampling_rate = sr, | ||
start = self.args['start'], | ||
end = self.args['end'] | ||
) | ||
audio_result = self.extractor.to_numpy(audio_result) | ||
return audio_result | ||
except Exception as e: | ||
self.logger.error(f"Failed to extract audio features with openSMILE from {file}") | ||
raise e |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
from transformers import Wav2Vec2FeatureExtractor | ||
from ..baseExtractor import baseAudioExtractor | ||
|
||
|
||
class wav2vec2Extractor(baseAudioExtractor): | ||
""" | ||
Audio feature extractor using Wav2Vec2. | ||
Ref: https://huggingface.co/transformers/model_doc/wav2vec2.html | ||
""" | ||
def __init__(self, config, logger): | ||
try: | ||
logger.info("Initializing Wav2Vec2 audio feature extractor.") | ||
super().__init__(config, logger) | ||
self.extractor = Wav2Vec2FeatureExtractor.from_pretrained(config['pretrained']) | ||
except Exception as e: | ||
self.logger.error("Failed to initialize Wav2VecExtractor.") | ||
raise e | ||
|
||
def extract(self, file): | ||
""" | ||
Function: | ||
Extract features from audio file using wav2vec2 pretrained model. | ||
Parameters: | ||
file: path to audio file | ||
Returns: | ||
audio_result: extracted audio features in numpy array | ||
""" | ||
try: | ||
y, sr = self.load_audio(file) | ||
audio_result = self.extractor(y, sampling_rate=sr, return_tensors="np").input_values.T | ||
return audio_result | ||
except Exception as e: | ||
self.logger.error(f"Failed to extract audio features with Wav2Vec2 from {file}.") | ||
raise e |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import librosa | ||
|
||
|
||
class baseExtractor(object): | ||
""" | ||
Base class for all extractors. | ||
""" | ||
def __init__(self, config, logger): | ||
self.config = config | ||
self.logger = logger | ||
|
||
def extract(self, file): | ||
""" | ||
Extract features from input file. | ||
""" | ||
raise NotImplementedError("extract() not implemented") | ||
|
||
|
||
class baseAudioExtractor(baseExtractor): | ||
""" | ||
Base class for all audio extractors. | ||
""" | ||
def __init__(self, config, logger): | ||
super().__init__(config, logger) | ||
|
||
def load_audio(self, file): | ||
""" | ||
Load audio file using librosa. | ||
""" | ||
y, sr = librosa.load(file, sr=self.config['sample_rate']) | ||
return y, sr | ||
|
||
# class baseVideoExtractor(baseExtractor): | ||
# """ | ||
# Base class for all video extractors. | ||
# """ | ||
# def __init__(self, config, logger): | ||
# super().__init__(config, logger) | ||
|
||
# def load_images(self, img_dir): | ||
# """ | ||
# Load image files using cv2. | ||
# """ | ||
# images = [] | ||
# for image_path in sorted(glob(osp.join(img_dir, '*.bmp'))): | ||
# name = Path(image_path).stem | ||
# image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB) | ||
# images.append(image) | ||
# return images |
Empty file.
Oops, something went wrong.