Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add session file parser #70

Merged
merged 4 commits into from
Nov 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from waitress import serve
from http import HTTPStatus

from classification.file_loading import get_raw_array
from classification.parser import get_raw_array
from classification.exceptions import ClassificationError
from classification.config.constants import Sex, ALLOWED_FILE_EXTENSIONS
from classification.model import SleepStagesClassifier
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""
Function utilities to convert data acquired on an OpenBCI
Cyton board using the SD card logging strategy.
Function utilities to convert data acquired on an OpenBCI board

TODO: Consider cropping file (from bed to wake up time) here, before the for loop. Have to consider
not all lines hold sample values (i.e. first line with comment and second line with a single timestamp).
Expand All @@ -14,44 +13,27 @@
from mne import create_info
from mne.io import RawArray
import numpy as np
import pandas as pd

from classification.exceptions import ClassificationError
from classification.config.constants import (
EEG_CHANNELS,
OPENBCI_CYTON_SAMPLE_RATE,
)

ADS1299_Vref = 4.5
ADS1299_gain = 24.
SCALE_uV_PER_COUNT = ADS1299_Vref / ((2**23) - 1) / ADS1299_gain * 1000000
SCALE_V_PER_COUNT = SCALE_uV_PER_COUNT / 1e6

FILE_COLUMN_OFFSET = 1
CYTON_TOTAL_NB_CHANNELS = 8
SKIP_ROWS = 2
from classification.config.constants import OPENBCI_CYTON_SAMPLE_RATE, EEG_CHANNELS
from classification.parser.constants import SCALE_V_PER_COUNT
from classification.parser.file_type import FileType, detect_file_type


def get_raw_array(file):
"""Converts a file following the Cyton board SD card logging format into a mne.RawArray
"""Converts a file following a logging format into a mne.RawArray
Input:
- file: received as an input file
Returns:
- mne.RawArray of the two EEG channels of interest
"""

retained_columns = tuple(range(1, len(EEG_CHANNELS) + 1))

try:
eeg_raw = pd.read_csv(file,
skiprows=SKIP_ROWS,
usecols=retained_columns
).to_numpy()
except Exception:
raise ClassificationError()
filetype = detect_file_type(file)
print(f"""
Detected {filetype.name} format.
""")

hexstr_to_int = np.vectorize(_hexstr_to_int)
eeg_raw = hexstr_to_int(eeg_raw)
parse = filetype.parser
eeg_raw = parse(file)

raw_object = RawArray(
SCALE_V_PER_COUNT * np.transpose(eeg_raw),
Expand All @@ -61,6 +43,7 @@ def get_raw_array(file):
ch_types='eeg'),
verbose=False,
)

print(f"""
First sample values: {raw_object[:, 0]}
Second sample values: {raw_object[:, 1]}
Expand All @@ -70,13 +53,3 @@ def get_raw_array(file):
""")

return raw_object


def _hexstr_to_int(hexstr):
"""Converts a two complement hexadecimal value in a string to a signed float
Input:
- hex_value: signed hexadecimal value
Returns:
- decimal value
"""
return int.from_bytes(bytes.fromhex(hexstr), byteorder='big', signed=True)
10 changes: 10 additions & 0 deletions backend/classification/parser/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from classification.config.constants import EEG_CHANNELS

ADS1299_Vref = 4.5
ADS1299_gain = 24.
SCALE_uV_PER_COUNT = ADS1299_Vref / ((2**23) - 1) / ADS1299_gain * 1000000
SCALE_V_PER_COUNT = SCALE_uV_PER_COUNT / 1e6

FILE_COLUMN_OFFSET = 1

RETAINED_COLUMNS = tuple(range(FILE_COLUMN_OFFSET, len(EEG_CHANNELS) + 1))
14 changes: 14 additions & 0 deletions backend/classification/parser/csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import pandas as pd

from classification.exceptions import ClassificationError

def read_csv(file, rows_to_skip=0, columns_to_read=None):
try:
raw_array = pd.read_csv(file,
skiprows=rows_to_skip,
usecols=columns_to_read
).to_numpy()
except Exception:
raise ClassificationError()

return raw_array
20 changes: 20 additions & 0 deletions backend/classification/parser/file_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from enum import Enum

from classification.parser.sd_file import parse_sd_file
from classification.parser.session_file import parse_session_file

class FileType(Enum):
SDFile = (parse_sd_file,)
SessionFile = (parse_session_file,)
def __init__(self, parser):
self.parser = parser


def detect_file_type(file) -> FileType:
"""Detects file type
- file: received as an input file
Returns:
- FileType of the input file
"""
first_line = file.readline().decode("utf-8")
return FileType.SessionFile if "EEG Data" in first_line else FileType.SDFile
30 changes: 30 additions & 0 deletions backend/classification/parser/sd_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import numpy as np

from classification.parser.constants import RETAINED_COLUMNS
from classification.parser.csv import read_csv

ROWS_TO_SKIP = 2


def _hexstr_to_int(hexstr):
"""Converts a two complement hexadecimal value in a string to a signed float
Input:
- hex_value: signed hexadecimal value
Returns:
- decimal value
"""
return int.from_bytes(bytes.fromhex(hexstr), byteorder='big', signed=True)


def parse_sd_file(file):
"""Converts a file following SD File logging format into a np.array
Input:
- file: received as an input file
Returns:
- np.array of the two EEG channels of interest
"""
eeg_raw = read_csv(file, ROWS_TO_SKIP, RETAINED_COLUMNS)
hexstr_to_int = np.vectorize(_hexstr_to_int)
eeg_raw = hexstr_to_int(eeg_raw)

return eeg_raw
17 changes: 17 additions & 0 deletions backend/classification/parser/session_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pandas as pd

from classification.parser.constants import RETAINED_COLUMNS
from classification.parser.csv import read_csv


ROWS_TO_SKIP = 5


def parse_session_file(file):
"""Converts a file following Session File logging format into a np.array
Input:
- file: received as an input file
Returns:
- np.array of the two EEG channels of interest
"""
return read_csv(file, ROWS_TO_SKIP, RETAINED_COLUMNS)