PolyCortex · abelfodil · Nov 10, 2020 · Nov 9, 2020 · Nov 10, 2020 · Nov 10, 2020
diff --git a/backend/app.py b/backend/app.py
@@ -3,7 +3,7 @@
 from waitress import serve
 from http import HTTPStatus
 
-from classification.file_loading import get_raw_array
+from classification.parser import get_raw_array
 from classification.exceptions import ClassificationError
 from classification.config.constants import Sex, ALLOWED_FILE_EXTENSIONS
 from classification.model import SleepStagesClassifier

diff --git a/backend/classification/file_loading.py → backend/classification/parser/__init__.py b/backend/classification/file_loading.py → backend/classification/parser/__init__.py
@@ -1,6 +1,5 @@
 """
-Function utilities to convert data acquired on an OpenBCI
-Cyton board using the SD card logging strategy.
+Function utilities to convert data acquired on an OpenBCI board
 
 TODO: Consider cropping file (from bed to wake up time) here, before the for loop. Have to consider
 not all lines hold sample values (i.e. first line with comment and second line with a single timestamp).
@@ -14,44 +13,27 @@
 from mne import create_info
 from mne.io import RawArray
 import numpy as np
-import pandas as pd
 
-from classification.exceptions import ClassificationError
-from classification.config.constants import (
-    EEG_CHANNELS,
-    OPENBCI_CYTON_SAMPLE_RATE,
-)
-
-ADS1299_Vref = 4.5
-ADS1299_gain = 24.
-SCALE_uV_PER_COUNT = ADS1299_Vref / ((2**23) - 1) / ADS1299_gain * 1000000
-SCALE_V_PER_COUNT = SCALE_uV_PER_COUNT / 1e6
-
-FILE_COLUMN_OFFSET = 1
-CYTON_TOTAL_NB_CHANNELS = 8
-SKIP_ROWS = 2
+from classification.config.constants import OPENBCI_CYTON_SAMPLE_RATE, EEG_CHANNELS
+from classification.parser.constants import SCALE_V_PER_COUNT
+from classification.parser.file_type import FileType, detect_file_type
 
 
 def get_raw_array(file):
-    """Converts a file following the Cyton board SD card logging format into a mne.RawArray
+    """Converts a file following a logging format into a mne.RawArray
     Input:
     - file: received as an input file
     Returns:
     - mne.RawArray of the two EEG channels of interest
     """
 
-    retained_columns = tuple(range(1, len(EEG_CHANNELS) + 1))
-
-    try:
-        eeg_raw = pd.read_csv(file,
-                              skiprows=SKIP_ROWS,
-                              usecols=retained_columns
-                              ).to_numpy()
-    except Exception:
-        raise ClassificationError()
+    filetype = detect_file_type(file)
+    print(f"""
+    Detected {filetype.name} format.
+    """)
 
-    hexstr_to_int = np.vectorize(_hexstr_to_int)
-    eeg_raw = hexstr_to_int(eeg_raw)
+    parse = filetype.parser
+    eeg_raw = parse(file)
 
     raw_object = RawArray(
         SCALE_V_PER_COUNT * np.transpose(eeg_raw),
@@ -61,6 +43,7 @@ def get_raw_array(file):
             ch_types='eeg'),
         verbose=False,
     )
+
     print(f"""
         First sample values: {raw_object[:, 0]}
         Second sample values: {raw_object[:, 1]}
@@ -70,13 +53,3 @@ def get_raw_array(file):
     """)
 
     return raw_object
-
-
-def _hexstr_to_int(hexstr):
-    """Converts a two complement hexadecimal value in a string to a signed float
-    Input:
-    - hex_value: signed hexadecimal value
-    Returns:
-    - decimal value
-    """
-    return int.from_bytes(bytes.fromhex(hexstr), byteorder='big', signed=True)
diff --git a/backend/classification/parser/constants.py b/backend/classification/parser/constants.py
@@ -0,0 +1,10 @@
+from classification.config.constants import EEG_CHANNELS
+
+ADS1299_Vref = 4.5
+ADS1299_gain = 24.
+SCALE_uV_PER_COUNT = ADS1299_Vref / ((2**23) - 1) / ADS1299_gain * 1000000
+SCALE_V_PER_COUNT = SCALE_uV_PER_COUNT / 1e6
+
+FILE_COLUMN_OFFSET = 1
+
+RETAINED_COLUMNS = tuple(range(FILE_COLUMN_OFFSET, len(EEG_CHANNELS) + 1))
diff --git a/backend/classification/parser/csv.py b/backend/classification/parser/csv.py
@@ -0,0 +1,14 @@
+import pandas as pd
+
+from classification.exceptions import ClassificationError
+
+def read_csv(file, rows_to_skip=0, columns_to_read=None):
+    try:
+        raw_array = pd.read_csv(file,
+                              skiprows=rows_to_skip,
+                              usecols=columns_to_read
+                              ).to_numpy()
+    except Exception:
+        raise ClassificationError()
+
+    return raw_array
diff --git a/backend/classification/parser/file_type.py b/backend/classification/parser/file_type.py
@@ -0,0 +1,20 @@
+from enum import Enum
+
+from classification.parser.sd_file import parse_sd_file
+from classification.parser.session_file import parse_session_file
+
+class FileType(Enum):
+     SDFile = (parse_sd_file,)
+     SessionFile = (parse_session_file,)
+     def __init__(self, parser):
+             self.parser = parser
+
+
+def detect_file_type(file) -> FileType:
+    """Detects file type
+    - file: received as an input file
+    Returns:
+    - FileType of the input file
+    """
+    first_line = file.readline().decode("utf-8")
+    return FileType.SessionFile if "EEG Data" in first_line else FileType.SDFile
diff --git a/backend/classification/parser/sd_file.py b/backend/classification/parser/sd_file.py
@@ -0,0 +1,30 @@
+import numpy as np
+
+from classification.parser.constants import RETAINED_COLUMNS
+from classification.parser.csv import read_csv
+
+ROWS_TO_SKIP = 2
+
+
+def _hexstr_to_int(hexstr):
+    """Converts a two complement hexadecimal value in a string to a signed float
+    Input:
+    - hex_value: signed hexadecimal value
+    Returns:
+    - decimal value
+    """
+    return int.from_bytes(bytes.fromhex(hexstr), byteorder='big', signed=True)
+
+
+def parse_sd_file(file):
+    """Converts a file following SD File logging format into a np.array
+    Input:
+    - file: received as an input file
+    Returns:
+    - np.array of the two EEG channels of interest
+    """
+    eeg_raw = read_csv(file, ROWS_TO_SKIP, RETAINED_COLUMNS)
+    hexstr_to_int = np.vectorize(_hexstr_to_int)
+    eeg_raw = hexstr_to_int(eeg_raw)
+
+    return eeg_raw
diff --git a/backend/classification/parser/session_file.py b/backend/classification/parser/session_file.py
@@ -0,0 +1,17 @@
+import pandas as pd
+
+from classification.parser.constants import RETAINED_COLUMNS
+from classification.parser.csv import read_csv
+
+
+ROWS_TO_SKIP = 5
+
+
+def parse_session_file(file):
+    """Converts a file following Session File logging format into a np.array
+    Input:
+    - file: received as an input file
+    Returns:
+    - np.array of the two EEG channels of interest
+    """
+    return read_csv(file, ROWS_TO_SKIP, RETAINED_COLUMNS)