diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py index b3d30e1d..57f36d96 100644 --- a/wfdb/io/annotation.py +++ b/wfdb/io/annotation.py @@ -4,6 +4,7 @@ import pandas as pd import re import posixpath +import pathlib import pdb import struct import sys @@ -146,7 +147,7 @@ def __init__(self, record_name, extension, sample, symbol=None, label_store=None, description=None, custom_labels=None, contained_labels=None): - self.record_name = record_name + self.record_name = str(record_name) self.extension = extension self.sample = sample @@ -1584,7 +1585,7 @@ def rdann(record_name, extension, sampfrom=0, sampto=None, shift_samps=False, Parameters ---------- - record_name : str + record_name : str or pathlib.Path The record name of the WFDB annotation file. ie. for file '100.atr', record_name='100'. extension : str @@ -1678,7 +1679,7 @@ def rdann(record_name, extension, sampfrom=0, sampto=None, shift_samps=False, pass # Create the annotation object - annotation = Annotation(record_name=os.path.split(record_name)[1], + annotation = Annotation(record_name=pathlib.Path(record_name).name, extension=extension, sample=sample, label_store=label_store, subtype=subtype, chan=chan, num=num, aux_note=aux_note, fs=fs, @@ -1747,7 +1748,7 @@ def load_byte_pairs(record_name, extension, pn_dir): Parameters ---------- - record_name : str + record_name : str or pathlib.Path The record name of the WFDB annotation file. ie. for file '100.atr', record_name='100'. extension : str @@ -1764,13 +1765,14 @@ def load_byte_pairs(record_name, extension, pn_dir): The input filestream converted to an Nx2 array of unsigned bytes. """ + file_name = pathlib.Path(record_name).with_suffix('.' + extension) # local file if pn_dir is None: - with open(record_name + '.' + extension, 'rb') as f: + with open(file_name, 'rb') as f: filebytes = np.fromfile(f, '>> url = _get_url('mitdb/1.0.0', '100.dat') + >>> url + 'https://physionet.org/content/mitdb/100.dat' + >>> url = _get_url('mitdb/1.0.0/', 'x_mitdb/x_111.dat') + >>> url + 'https://physionet.org/files/mitdb/1.0.0/x_mitdb/x_111.dat' + >>> url = _get_url('mitdb', '1.0.0', 'x_mitdb', 'x_111.dat') + >>> url + 'https://physionet.org/files/mitdb/1.0.0/x_mitdb/x_111.dat' + >>> import pathlib + >>> pp = pathlib.PureWindowsPath('x_mitdb\\x_111.dat') + >>> url = _get_url('mitdb/1.0.0', pp) + 'https://physionet.org/files/mitdb/1.0.0/x_mitdb/x_111.dat' + >>> sp = 'x_mitdb\\x_111.dat' # on a Windows machine + >>> url = _get_url('mitdb/1.0.0', sp) + 'https://physionet.org/files/mitdb/1.0.0/x_mitdb/x_111.dat' + + """ + url = config.db_index_url + for item in dir_or_file: + url = posixpath.join(url, pathlib.PurePosixPath(pathlib.Path(item))) + return url + + def _remote_file_size(url=None, file_name=None, pn_dir=None): """ Get the remote file size in bytes. @@ -57,11 +99,12 @@ def _remote_file_size(url=None, file_name=None, pn_dir=None): url : str, optional The full url of the file. Use this option to explicitly state the full url. - file_name : str, optional + file_name : str or pathlib.Path, optional The base file name. Use this argument along with pn_dir if you want the full url to be constructed. pn_dir : str, optional - The base file name. Use this argument along with file_name if + The PhysioNet directory where the file is located. + Use this argument along with file_name if you want the full url to be constructed. Returns @@ -72,7 +115,7 @@ def _remote_file_size(url=None, file_name=None, pn_dir=None): """ # Option to construct the url if file_name and pn_dir: - url = posixpath.join(config.db_index_url, pn_dir, file_name) + url = _get_url(pn_dir, file_name) with _url.openurl(url, 'rb') as f: remote_file_size = f.seek(0, os.SEEK_END) @@ -86,7 +129,7 @@ def _stream_header(file_name, pn_dir): Parameters ---------- - file_name : str + file_name : str or pathlib.Path The name of the headerr file to be read. pn_dir : str The PhysioNet database directory from which to find the @@ -102,7 +145,7 @@ def _stream_header(file_name, pn_dir): """ # Full url of header location - url = posixpath.join(config.db_index_url, pn_dir, file_name) + url = _get_url(pn_dir, file_name) # Get the content of the remote file with _url.openurl(url, 'rb') as f: @@ -140,7 +183,7 @@ def _stream_dat(file_name, pn_dir, byte_count, start_byte, dtype): Parameters ---------- - file_name : str + file_name : str or pathlib.Path The name of the dat file to be read. pn_dir : str The PhysioNet directory where the dat file is located. @@ -158,7 +201,7 @@ def _stream_dat(file_name, pn_dir, byte_count, start_byte, dtype): """ # Full url of dat file - url = posixpath.join(config.db_index_url, pn_dir, file_name) + url = _get_url(pn_dir, file_name) # Get the content with _url.openurl(url, 'rb', buffering=0) as f: @@ -177,7 +220,7 @@ def _stream_annotation(file_name, pn_dir): Parameters ---------- - file_name : str + file_name : str or pathlib.Path The name of the annotation file to be read. pn_dir : str The PhysioNet directory where the annotation file is located. @@ -189,7 +232,7 @@ def _stream_annotation(file_name, pn_dir): """ # Full url of annotation file - url = posixpath.join(config.db_index_url, pn_dir, file_name) + url = _get_url(pn_dir, file_name) # Get the content with _url.openurl(url, 'rb') as f: @@ -262,10 +305,14 @@ def get_record_list(db_dir, records='all'): """ # Full url PhysioNet database - if '/' not in db_dir: - db_url = posixpath.join(config.db_index_url, db_dir, record.get_version(db_dir)) + # if '/' not in db_dir: + # db_url = posixpath.join(config.db_index_url, db_dir, record.get_version(db_dir)) + # else: + # db_url = posixpath.join(config.db_index_url, db_dir) + if not re.search(record.DB_VERSION_PATTERN, db_dir): + db_url = _get_url(db_dir, record.get_version(db_dir)) else: - db_url = posixpath.join(config.db_index_url, db_dir) + db_url = _get_url(db_dir) # Check for a RECORDS file if records == 'all': @@ -308,7 +355,7 @@ def get_annotators(db_dir, annotators): """ # Full url PhysioNet database - db_url = posixpath.join(config.db_index_url, db_dir) + db_url = _get_url(db_dir) if annotators is not None: # Check for an ANNOTATORS file @@ -396,7 +443,7 @@ def dl_pn_file(inputs): basefile, subdir, db, dl_dir, keep_subdirs, overwrite = inputs # Full url of file - url = posixpath.join(config.db_index_url, db, subdir, basefile) + url = _get_url(db, subdir, basefile) # Figure out where the file should be locally if keep_subdirs: diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 9d0ff047..1f0445e5 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -2,6 +2,7 @@ import multiprocessing import posixpath import re +import pathlib import numpy as np import os @@ -176,7 +177,7 @@ def __init__(self, record_name=None, n_sig=None, fs=None, counter_freq=None, base_counter=None, sig_len=None, base_time=None, base_date=None, comments=None, sig_name=None): - self.record_name = record_name + self.record_name = str(record_name) self.n_sig = n_sig self.fs = fs self.counter_freq = counter_freq @@ -1233,6 +1234,9 @@ def multi_to_single(self, physical, return_res=64): LIST_FIELDS = tuple(_header.SIGNAL_SPECS.index) + ('comments', 'e_p_signal', 'e_d_signal', 'segments') +# Database Version Pattern +DB_VERSION_PATTERN = re.compile("\d+.\d+\.\d+") + def get_version(pn_dir): """ Get the version number of the desired project. @@ -1381,7 +1385,7 @@ def edf2mit(record_name, pn_dir=None, delete_file=True, record_only=True, Parameters ---------- - record_name : str + record_name : str or pathlib.Path The name of the input EDF record to be read. pn_dir : str, optional Option used to stream data from Physionet. The Physionet @@ -1640,7 +1644,7 @@ def edf2mit(record_name, pn_dir=None, delete_file=True, record_only=True, struct.unpack('<32s', edf_file.read(32))[0].decode() # Pre-process the acquired data before creating the record - record_name_out = record_name.split(os.sep)[-1].replace('-','_').replace('.edf','') + record_name_out = pathlib.Path(record_name).stem.replace('-','_') sample_rate = [int(i/block_duration) for i in samps_per_block] fs = functools.reduce(math.gcd, sample_rate) samps_per_frame = [int(s/min(samps_per_block)) for s in samps_per_block] @@ -1781,7 +1785,7 @@ def mit2edf(record_name, pn_dir=None, sampfrom=0, sampto=None, channels=None, Parameters ---------- - record_name : str + record_name : str or pathlib.Path The name of the input WFDB record to be read. Can also work with both EDF and WAV files. pn_dir : str, optional @@ -1861,7 +1865,7 @@ def mit2edf(record_name, pn_dir=None, sampfrom=0, sampto=None, channels=None, """ record = rdrecord(record_name, pn_dir=pn_dir, sampfrom=sampfrom, sampto=sampto, smooth_frames=False) - record_name_out = record_name.split(os.sep)[-1].replace('-','_') + record_name_out = pathlib.Path(record_name).name.replace('-','_') # Maximum data block length, in bytes edf_max_block = 61440 @@ -2312,7 +2316,7 @@ def wav2mit(record_name, pn_dir=None, delete_file=True, record_only=False): Parameters ---------- - record_name : str + record_name : str or pathlib.Path The name of the input .wav record to be read. pn_dir : str, optional Option used to stream data from Physionet. The Physionet @@ -2377,7 +2381,7 @@ def wav2mit(record_name, pn_dir=None, delete_file=True, record_only=False): >>> wav_record = wfdb.wav2mit('sample-data/SC4001E0-PSG.wav', record_only=True) """ - if not record_name.endswith('.wav'): + if not pathlib.Path(record_name).suffix == '.wav': raise Exception('Name of the input file must end in .wav') if pn_dir is not None: @@ -2393,7 +2397,7 @@ def wav2mit(record_name, pn_dir=None, delete_file=True, record_only=False): open(record_name, 'wb').write(f.read()) wave_file = open(record_name, mode='rb') - record_name_out = record_name.split(os.sep)[-1].replace('-','_').replace('.wav','') + record_name_out = pathlib.Path(record_name).stem.replace('-','_') chunk_ID = ''.join([s.decode() for s in struct.unpack('>4s', wave_file.read(4))]) if chunk_ID != 'RIFF': @@ -2546,7 +2550,7 @@ def wfdb2mat(record_name, pn_dir=None, sampfrom=0, sampto=None, channels=None): Parameters ---------- - record_name : str + record_name : str or pathlib.Path The name of the input WFDB record to be read. Can also work with both EDF and WAV files. pn_dir : str, optional @@ -2591,7 +2595,7 @@ def wfdb2mat(record_name, pn_dir=None, sampfrom=0, sampto=None, channels=None): """ record = rdrecord(record_name, pn_dir=pn_dir, sampfrom=sampfrom, sampto=sampto) - record_name_out = record_name.split(os.sep)[-1].replace('-','_') + 'm' + record_name_out = pathlib.Path(record_name).name.replace('-','_') + 'm' # Some variables describing the format of the .mat file field_version = 256 # 0x0100 or 256 @@ -2789,7 +2793,7 @@ def csv2mit(file_name, fs, units, fmt=None, adc_gain=None, baseline=None, Parameters ---------- - file_name : str + file_name : str or pathlib.Path The name of the WFDB record to be read, without any file extensions. If the argument contains any path delimiter characters, the argument will be interpreted as PATH/BASE_RECORD. @@ -3067,14 +3071,12 @@ def csv2mit(file_name, fs, units, fmt=None, adc_gain=None, baseline=None, print('Signal names: {}'.format(sig_name)) # Set the output header file name to be the same, remove path - if os.sep in file_name: - file_name = file_name.split(os.sep)[-1] - record_name = file_name.replace('.csv','') + record_name = pathlib.Path(file_name).stem if verbose: print('Output header: {}.hea'.format(record_name)) # Replace the CSV file tag with DAT - dat_file_name = file_name.replace('.csv','.dat') + dat_file_name = str(pathlib.Path(file_name).with_suffix('.dat')) dat_file_name = [dat_file_name] * n_sig if verbose: print('Output record: {}'.format(dat_file_name[0])) @@ -3228,7 +3230,7 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): Parameters ---------- - record_name : str + record_name : str or pathlib.Path The name of the WFDB record to be read, without any file extensions. If the argument contains any path delimiter characters, the argument will be interpreted as PATH/BASE_RECORD. @@ -3339,7 +3341,7 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, Parameters ---------- - record_name : str + record_name : str or pathlib.Path The name of the WFDB record to be read, without any file extensions. If the argument contains any path delimiter characters, the argument will be interpreted as PATH/BASE_RECORD. @@ -3425,20 +3427,22 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, channels=[1, 3]) """ - dir_name, base_record_name = os.path.split(record_name) - dir_name = os.path.abspath(dir_name) + _record_name = pathlib.Path(record_name) + base_record_name = _record_name.name + dir_name = _record_name.absolute().parent + record_suffix = _record_name.suffix # Read the header fields if (pn_dir is not None) and ('.' not in pn_dir): dir_list = pn_dir.split('/') pn_dir = posixpath.join(dir_list[0], get_version(dir_list[0]), *dir_list[1:]) - if record_name.endswith('.edf'): - record = edf2mit(record_name, pn_dir=pn_dir, record_only=True) - elif record_name.endswith('.wav'): - record = wav2mit(record_name, pn_dir=pn_dir, record_only=True) + if record_suffix == '.edf': + record = edf2mit(_record_name, pn_dir=pn_dir, record_only=True) + elif record_suffix == '.wav': + record = wav2mit(_record_name, pn_dir=pn_dir, record_only=True) else: - record = rdheader(record_name, pn_dir=pn_dir, rd_segments=False) + record = rdheader(_record_name, pn_dir=pn_dir, rd_segments=False) # Set defaults for sampto and channels input variables if sampto is None: @@ -3514,7 +3518,7 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, # A single segment record elif isinstance(record, Record): - if record_name.endswith('.edf') or record_name.endswith('.wav'): + if record_suffix in ['.edf', '.wav',]: no_file = True sig_data = record.d_signal else: @@ -3630,7 +3634,7 @@ def rdsamp(record_name, sampfrom=0, sampto=None, channels=None, pn_dir=None, Parameters ---------- - record_name : str + record_name : str or pathlib.Path The name of the WFDB record to be read (without any file extensions). If the argument contains any path delimiter characters, the argument will be interpreted as PATH/baserecord @@ -4304,7 +4308,7 @@ def wrsamp(record_name, fs, units, sig_name, p_signal=None, d_signal=None, Parameters ---------- - record_name : str + record_name : str or pathlib.Path The string name of the WFDB record to be written (without any file extensions). Must not contain any "." since this would indicate an EDF file which is not compatible at this point. @@ -4373,7 +4377,7 @@ def wrsamp(record_name, fs, units, sig_name, p_signal=None, d_signal=None, """ # Check for valid record name - if '.' in record_name: + if '.' in str(record_name): raise Exception("Record name must not contain '.'") # Check input field combinations if p_signal is not None and d_signal is not None: