From bfee4570aeaa1b3578eda30ee74ee9447249e819 Mon Sep 17 00:00:00 2001 From: Pinar Alper Date: Thu, 7 May 2020 00:20:41 +0200 Subject: [PATCH 1/4] data importer simplified demo data changed to DM-DS workshop scenario --- core/importer/base_importer.py | 71 ++++ core/importer/datadecs_importer.py | 209 ----------- core/importer/datasets_importer.py | 332 +++++++++++++----- core/importer/elx_submission_importer.py | 201 ++++++----- core/importer/projects_importer.py | 96 +---- core/management/commands/import_datasets.py | 15 +- core/management/commands/load_demo_data.py | 13 +- core/models/project.py | 10 +- core/tests/data/ELX_LU_SUB-1.json | 30 +- core/tests/data/datadecs.json | 136 ------- core/tests/data/datasets.json | 262 +++++++++++--- core/tests/data/projects.json | 10 +- core/tests/importer/test_bioportal_client.py | 28 -- core/tests/importer/test_datadecs_importer.py | 51 --- core/tests/importer/test_datasets_importer.py | 11 +- core/tests/importer/test_export.py | 2 +- .../importer/test_import_elx_submission.py | 6 +- data/demo/datadecs.json | 44 --- data/demo/datasets.json | 83 ++--- data/demo/projects.json | 65 +++- elixir_daisy/settings.py | 2 +- web/templates/about.html | 17 +- web/views/about.py | 5 +- 23 files changed, 805 insertions(+), 894 deletions(-) create mode 100644 core/importer/base_importer.py delete mode 100644 core/importer/datadecs_importer.py delete mode 100644 core/tests/data/datadecs.json delete mode 100644 core/tests/importer/test_bioportal_client.py delete mode 100644 core/tests/importer/test_datadecs_importer.py delete mode 100644 data/demo/datadecs.json diff --git a/core/importer/base_importer.py b/core/importer/base_importer.py new file mode 100644 index 00000000..58903b2a --- /dev/null +++ b/core/importer/base_importer.py @@ -0,0 +1,71 @@ + +from core.models import Partner, Contact, ContactType +from core.models import User +from core.utils import DaisyLogger +from django.conf import settings +from core.constants import Groups as GroupConstants +from django.contrib.auth.models import Group + + +PRINCIPAL_INVESTIGATOR = 'Principal_Investigator' + +class BaseImporter: + + logger = DaisyLogger(__name__) + + def process_contacts(self, project_dict): + local_custodians = [] + local_personnel = [] + external_contacts = [] + + home_organisation = Partner.objects.get(acronym=settings.COMPANY) + + for contact_dict in project_dict.get('contacts', []): + first_name = contact_dict.get('first_name').strip() + last_name = contact_dict.get('last_name').strip() + email = contact_dict.get('email').strip() + full_name = "{} {}".format(first_name, last_name) + role_name = contact_dict.get('role') + if home_organisation.elu_accession == contact_dict.get('institution').strip(): + user = (User.objects.filter(first_name__icontains=first_name.lower(), + last_name__icontains=last_name.lower()) | User.objects.filter( + first_name__icontains=first_name.upper(), last_name__icontains=last_name.upper())).first() + if user is None: + self.logger.warning('no user found for %s an inactive user will be created', full_name) + + usr_name = first_name.lower() + '.' + last_name.lower() + user = User.objects.create(username=usr_name, password='', first_name=first_name, last_name=last_name, is_active=False, + email=email, + ) + user.staff = True + + if role_name == PRINCIPAL_INVESTIGATOR: + g = Group.objects.get(name=GroupConstants.VIP.value) + user.groups.add(g) + + user.save() + if role_name == PRINCIPAL_INVESTIGATOR: + local_custodians.append(user) + else: + local_personnel.append(user) + + else: + contact = (Contact.objects.filter(first_name__icontains=first_name.lower(), + last_name__icontains=last_name.lower()) | Contact.objects.filter( + first_name__icontains=first_name.upper(), last_name__icontains=last_name.upper())).first() + if contact is None: + contact_type_pi, _ = ContactType.objects.get_or_create(name=role_name) + contact, _ = Contact.objects.get_or_create( + first_name=first_name, + last_name=last_name, + email=email, + type=contact_type_pi + ) + affiliation = Partner.objects.get(elu_accession=contact_dict.get('institution')) + if affiliation: + contact.partners.add(affiliation) + contact.save() + external_contacts.append(contact) + + return local_custodians, local_personnel, external_contacts + diff --git a/core/importer/datadecs_importer.py b/core/importer/datadecs_importer.py deleted file mode 100644 index b3a55f83..00000000 --- a/core/importer/datadecs_importer.py +++ /dev/null @@ -1,209 +0,0 @@ -import json -import sys - -from core.exceptions import DatasetImportError -from core.models import Dataset, DataType, Partner, Project, Contract, ContactType, Contact, PartnerRole, \ - GDPRRole, UseRestriction -from core.models.data_declaration import SubjectCategory, DeidentificationMethod, DataDeclaration, ShareCategory, \ - ConsentStatus -from core.utils import DaisyLogger - -logger = DaisyLogger(__name__) - - -class DatadecsImporter: - - def import_json(self, json_string, stop_on_error=False, verbose=False): - logger.info('Import started for file') - result = True - all_dicts = json.loads(json_string) - for datadec_dict in all_dicts: - logger.debug(' * Importing data declaration : "{}"...'.format(datadec_dict.get('title', 'N/A'))) - try: - self.process_datadec(datadec_dict) - except Exception as e: - logger.error('Import failed') - logger.error(str(e)) - if verbose: - import traceback - ex = traceback.format_exception(*sys.exc_info()) - logger.error('\n'.join([e for e in ex])) - if stop_on_error: - raise e - result = False - logger.debug(" ... complete!") - logger.info('Import result for file: {}'.format('success' if result else 'fail')) - return result - - def process_datadec(self, datadec_dict, **kwargs): - try: - title = datadec_dict['title'] - except KeyError: - raise DatasetImportError(data='Data declaration title missing') - - if 'dataset_obj' not in kwargs: - try: - dataset_title = datadec_dict['dataset'] - dataset = Dataset.objects.get(title=dataset_title.strip()) - except KeyError: - raise DatasetImportError(data='Parent dataset info missing') - except Dataset.DoesNotExist: - raise DatasetImportError(data='Parent dataset not found in DB') - else: - dataset = kwargs.pop('dataset_obj') - try: - datadec = DataDeclaration.objects.get(title=title.strip(), dataset=dataset) - except DataDeclaration.DoesNotExist: - datadec = None - - if datadec: - logger.warning("Data declaration with title '{}' already found. It will be updated.".format(title)) - else: - datadec = DataDeclaration.objects.create(title=title, dataset=dataset) - - datadec.has_special_subjects = datadec_dict.get('has_special_subjects', False) - datadec.data_types_notes = datadec_dict.get('data_type_notes', None) - datadec.deidentification_method = self.process_deidentification_method(datadec_dict) - datadec.subjects_category = self.process_subjects_category(datadec_dict) - datadec.special_subjects_description = datadec_dict.get('special_subject_notes', None) - datadec.other_external_id = datadec_dict.get('other_external_id', None) - datadec.share_category = self.process_access_category(datadec_dict) - datadec.consent_status = self.process_constent_status(datadec_dict) - datadec.comments = datadec_dict.get('source_notes', None) - - if 'data_types' in datadec_dict: - datadec.data_types_received.set(self.process_datatypes(datadec_dict)) - - if 'contract_obj' not in kwargs: - if 'source_collaboration' in datadec_dict: - datadec.contract = self.process_source_contract(dataset, datadec_dict) - else: - datadec.contract = kwargs.pop('contract_obj') - if datadec.contract: - datadec.partner = datadec.contract.partners.first() - self.process_use_restrictions(datadec, datadec_dict) - - - datadec.save() - - def process_datatypes(self, datadec_dict): - datatypes = [] - for datatype_str in datadec_dict.get('data_types', []): - datatype_str = datatype_str.strip() - # TODO Data types is a controlled vocabulaRY we should not create new when importing - datatype, _ = DataType.objects.get_or_create(name=datatype_str) - datatypes.append(datatype) - return datatypes - - def process_deidentification_method(self, datadec_dict): - deidentification_method_str = datadec_dict.get('de_identification', '') - try: - return DeidentificationMethod[deidentification_method_str] - except KeyError: - return DeidentificationMethod.pseudonymization - - def process_subjects_category(self, datadec_dict): - if 'subject_categories' in datadec_dict: - sub_category_str = datadec_dict.get('subject_categories', '').strip() - try: - return SubjectCategory[sub_category_str] - except KeyError: - return SubjectCategory.unknown - else: - return SubjectCategory.unknown - - def process_source_contract(self, dataset, datadec_dict): - - contract_dict = datadec_dict['source_collaboration'] - - try: - partner_elu = contract_dict['collab_inst'] - if partner_elu is None: - raise DatasetImportError(f'Partner accession number is NULL!') - partner = Partner.objects.get(elu_accession=partner_elu.strip()) - except KeyError: - raise DatasetImportError(f'Contract partner accession number is missing') - except Partner.DoesNotExist: - raise DatasetImportError(f'Cannot find institution partner with the elu: {repository}') - - if 'collab_project' not in contract_dict: - logger.debug( - ' * Contract project missing! Skipping contract setting for datadeclaration : "{}"...'.format( - datadec_dict.get('title', 'N/A'))) - return None - else: - # create contract project if it does not exist - try: - project = Project.objects.get(acronym=contract_dict['collab_project'].strip()) - except Project.DoesNotExist: - project = Project.objects.create( - acronym=contract_dict['collab_project'].strip() - ) - project.local_custodians.set(dataset.local_custodians.all()) - project.save() - try: - contract = Contract.objects.get( - partners_roles__partner=partner, - project=project) - except Contract.DoesNotExist: - if 'collab_role' in contract_dict: - role_str = contract_dict['collab_role'] - role = GDPRRole[role_str] - else: - role = GDPRRole["joint_controller"] - - contract = Contract.objects.create( - project=project, - ) - contract.company_roles.add(role) - contract.add_partner_with_role(partner=partner, role=role) - contract.local_custodians.set(project.local_custodians.all()) - - if 'collab_pi' in contract_dict: - contact_type_pi, _ = ContactType.objects.get_or_create(name="Principal_Investigator") - - contract_pi_str = contract_dict['collab_pi'] - contract_split = contract_pi_str.split() - - first_name = contract_split[0] - last_name = " ".join(contract_split[1:]) - contact, _ = Contact.objects.get_or_create( - first_name=first_name, - last_name=last_name, - type=contact_type_pi - ) - contact.partner = partner - contact.save() - partner_role = PartnerRole.objects.filter(contract=contract, partner=partner).first() - partner_role.contacts.add(contact) - partner_role.save() - - contract.save() - return contract - - def process_use_restrictions(self, data_dec, datadec_dict): - use_restrictions = [] - for user_restriction_dict in datadec_dict['use_restrictions']: - ga4gh_code = user_restriction_dict['ga4gh_code'] - notes = user_restriction_dict['note'] - - use_restriction = UseRestriction.objects.create(data_declaration=data_dec, restriction_class=ga4gh_code, notes=notes) - use_restrictions.append(use_restriction) - return use_restrictions - - def process_access_category(self, datadec_dict): - share_category_str = datadec_dict.get('access_category', '').strip() - try: - return ShareCategory[share_category_str] - except KeyError: - return None - - def process_constent_status(self, datadec_dict): - if 'consent_status' in datadec_dict: - consent_status_str = datadec_dict.get('consent_status', '').strip() - try: - return ConsentStatus[consent_status_str] - except KeyError: - return ConsentStatus.unknown - else: - return ConsentStatus.unknown diff --git a/core/importer/datasets_importer.py b/core/importer/datasets_importer.py index 987b4821..8f365837 100644 --- a/core/importer/datasets_importer.py +++ b/core/importer/datasets_importer.py @@ -2,18 +2,15 @@ import sys from core.exceptions import DatasetImportError -from core.models import Dataset, Project, StorageResource, User, Contract, Partner, GDPRRole +from core.importer.base_importer import BaseImporter +from core.models import Dataset, DataDeclaration, Project, StorageResource, Partner, \ + UseRestriction, DataType from core.models.access import Access +from core.models.data_declaration import ShareCategory, ConsentStatus, DeidentificationMethod, SubjectCategory from core.models.share import Share from core.models.storage_location import StorageLocationCategory, DataLocation -from core.utils import DaisyLogger -from core.constants import Groups as GroupConstants -from django.contrib.auth.models import Group -logger = DaisyLogger(__name__) - - -class DatasetsImporter: +class DatasetsImporter(BaseImporter): """ `DatasetsImporter`, parse json representation of a set of datasets and store them in the database """ @@ -22,30 +19,30 @@ class DateImportException(Exception): pass def import_json(self, json_string, stop_on_error=False, verbose=False): - logger.info('Import started for file') + self.logger.info('Import started for file') result = True dataset_list = json.loads(json_string) for dataset in dataset_list: - logger.debug(' * Importing dataset: "{}"...'.format(dataset.get('title', 'N/A'))) + self.logger.debug(' * Importing dataset: "{}"...'.format(dataset.get('name', 'N/A'))) try: self.process_dataset(dataset) except Exception as e: - logger.error('Import failed') - logger.error(str(e)) + self.logger.error('Import failed') + self.logger.error(str(e)) if verbose: import traceback ex = traceback.format_exception(*sys.exc_info()) - logger.error('\n'.join([e for e in ex])) + self.logger.error('\n'.join([e for e in ex])) if stop_on_error: raise e result = False - logger.info('... completed') - logger.info('Import result for file: {}'.format('success' if result else 'fail')) + self.logger.info('... completed') + self.logger.info('Import result for file: {}'.format('success' if result else 'fail')) return result def process_dataset(self, dataset_dict): try: - title = dataset_dict['title'] + title = dataset_dict['name'] except KeyError: raise DatasetImportError(data='dataset without title') @@ -57,7 +54,7 @@ def process_dataset(self, dataset_dict): dataset = None if dataset: - logger.warning("Dataset with title '{}' already found. It will be updated.".format(title)) + self.logger.warning("Dataset with title '{}' already found. It will be updated.".format(title)) else: dataset = Dataset.objects.create(title=title) @@ -66,9 +63,11 @@ def process_dataset(self, dataset_dict): dataset.sensitivity = dataset_dict.get('sensitivity', None) - local_custodians = self.process_local_custodians(dataset_dict) + local_custodians, local_personnel, external_contacts = self.process_contacts(dataset_dict) + if local_custodians: - dataset.local_custodians.set(local_custodians) + dataset.local_custodians.set(local_custodians, clear=True) + data_locations = self.process_data_locations(dataset, dataset_dict) if data_locations: @@ -88,34 +87,36 @@ def process_dataset(self, dataset_dict): for local_custodian in local_custodians: local_custodian.assign_permissions_to_dataset(dataset) - @staticmethod - def process_local_custodians(dataset_dict): - result = [] - - local_custodians = dataset_dict.get('local_custodian', []) - - for local_custodian in local_custodians: - custodian_str_strip = local_custodian.strip() - user = (User.objects.filter(full_name__icontains=custodian_str_strip.lower()) | User.objects.filter( - full_name__icontains=custodian_str_strip.upper())).first() - if user is None: - names = custodian_str_strip.split(maxsplit=1) - - if len(names) == 2: - logger.warning('no user found for %s and inactive user will be created', custodian_str_strip) - usr_name = names[0].strip().lower() + '.' + names[1].strip().lower() - user = User.objects.create(username=usr_name, password='', first_name=names[0], last_name=names[1],is_active=False, - email='inactive.user@uni.lu', - ) - user.staff = True - g = Group.objects.get(name=GroupConstants.VIP.value) - user.groups.add(g) - user.save() - result.append(user) - - else: - result.append(user) - return result + self.process_datadeclarations(dataset_dict, dataset) + + # @staticmethod + # def process_local_custodians(dataset_dict): + # result = [] + # + # local_custodians = dataset_dict.get('local_custodian', []) + # + # for local_custodian in local_custodians: + # custodian_str_strip = local_custodian.strip() + # user = (User.objects.filter(full_name__icontains=custodian_str_strip.lower()) | User.objects.filter( + # full_name__icontains=custodian_str_strip.upper())).first() + # if user is None: + # names = custodian_str_strip.split(maxsplit=1) + # + # if len(names) == 2: + # logger.warning('no user found for %s and inactive user will be created', custodian_str_strip) + # usr_name = names[0].strip().lower() + '.' + names[1].strip().lower() + # user = User.objects.create(username=usr_name, password='', first_name=names[0], last_name=names[1],is_active=False, + # email='inactive.user@uni.lu', + # ) + # user.staff = True + # g = Group.objects.get(name=GroupConstants.VIP.value) + # user.groups.add(g) + # user.save() + # result.append(user) + # + # else: + # result.append(user) + # return result def process_project(self, project_name): try: @@ -147,10 +148,10 @@ def process_data_locations(self, dataset, dataset_dict): 'sample-storage': 'sample-storage', 'other': 'other' } - if 'storage_locations' in dataset_dict: + if 'storages' in dataset_dict: - for storage_location_dict in dataset_dict['storage_locations']: - backend_name = storage_location_dict['storage_resource'].lower().strip() + for storage_location_dict in dataset_dict['storages']: + backend_name = storage_location_dict['platform'].lower().strip() backend_name = backend_mapping.get(backend_name, backend_name) if not backend_name: raise DatasetImportError(data=f'Not a proper backend name: "{backend_name}".') @@ -159,8 +160,7 @@ def process_data_locations(self, dataset, dataset_dict): except StorageResource.DoesNotExist: raise DatasetImportError(data=f'Cannot find StorageResource with slug: "{backend_name}".') category = self.process_category(storage_location_dict) - acl_policy_description = self.process_acl_info(storage_location_dict) - #DLCLazz = backend.get_location_class() + location_delimeted = '\n'.join(storage_location_dict['locations']) @@ -172,6 +172,7 @@ def process_data_locations(self, dataset, dataset_dict): ) master_locations = DataLocation.objects.filter(category=StorageLocationCategory.master, dataset=dataset) + acl_policy_description = self.process_acl_info(storage_location_dict) if acl_policy_description: acc = Access.objects.create( dataset=dataset, @@ -182,47 +183,33 @@ def process_data_locations(self, dataset, dataset_dict): data_locations.append(dl) return data_locations - def process_user_acl(self, storage_location_dict): - storage_acl_info = storage_location_dict.get("storage_acl_users", "") - storage_acl_info_list = storage_acl_info.split(',') - users_with_access = [] - for storage_acl_info_str in storage_acl_info_list: - # try to identify user - storage_acl_info_str = storage_acl_info_str.strip() - user = (User.objects.filter(full_name__icontains=storage_acl_info_str.lower()) | User.objects.filter( - full_name__icontains=storage_acl_info_str.upper())).first() - if user is None: - logger.warning('no user found for %s', storage_acl_info_str) - else: - users_with_access.append(user) - return users_with_access def process_shares(self, dataset_dict, dataset): - def process_share(share_object, dataset): + def process_share(share_dict, dataset): share = Share() - share.access_notes = share_object.get('share_notes') + share.access_notes = share_dict.get('share_notes') share.dataset = dataset - share_institution_elu = share_object.get('share_inst') + share_institution_elu = share_dict.get('share_inst') share_institution = Partner.objects.get(elu_accession=share_institution_elu.strip()) share.partner = share_institution - project = dataset.project - if share_institution and project: - contracts = project.contracts.all() - for contract in contracts: - for partner in contract.partners: - if share_institution_elu.strip() == partner.elu_accession: - share.contract = contract - break - if not share.contract: - contract = Contract.objects.create( - project=project, - ) - contract.company_roles.add(GDPRRole["joint_controller"]) - contract.add_partner_with_role(share_institution, GDPRRole["joint_controller"]) - contract.local_custodians.set(project.local_custodians.all()) - contract.save() - share.contract = contract + # project = dataset.project + # if share_institution and project: + # contracts = project.contracts.all() + # for contract in contracts: + # for partner in contract.partners: + # if share_institution_elu.strip() == partner.elu_accession: + # share.contract = contract + # break + # if not share.contract: + # contract = Contract.objects.create( + # project=project, + # ) + # contract.company_roles.add(GDPRRole["joint_controller"]) + # contract.add_partner_with_role(share_institution, GDPRRole["joint_controller"]) + # contract.local_custodians.set(project.local_custodians.all()) + # contract.save() + # share.contract = contract return share shares = dataset_dict.get('shares', []) @@ -236,8 +223,177 @@ def process_category(self, storage_location_dict): return StorageLocationCategory.not_specified def process_acl_info(self, storage_location_dict): - if 'storage_acl_info' in storage_location_dict: + if 'accesses' in storage_location_dict: + return "\n".join(storage_location_dict['accesses']) + else: + return None + + def process_datadeclarations(self, dataset_dict, dataset): + + datadec_dicts = dataset_dict.get('data_declarations', []) - return storage_location_dict['storage_acl_info'] + for ddec_dict in datadec_dicts: + self.process_datadeclaration(ddec_dict, dataset) + + def process_datadeclaration(self, datadec_dict, dataset): + try: + title = datadec_dict['title'] + except KeyError: + raise DatasetImportError(data='Data declaration title missing') + + try: + datadec = DataDeclaration.objects.get(title=title.strip(), dataset=dataset) + except DataDeclaration.DoesNotExist: + datadec = None + + if datadec: + self.logger.warning("Data declaration with title '{}' already found. It will be updated.".format(title)) + else: + datadec = DataDeclaration.objects.create(title=title, dataset=dataset) + + datadec.has_special_subjects = datadec_dict.get('has_special_subjects', False) + datadec.data_types_notes = datadec_dict.get('data_type_notes', None) + datadec.deidentification_method = self.process_deidentification_method(datadec_dict) + datadec.subjects_category = self.process_subjects_category(datadec_dict) + datadec.special_subjects_description = datadec_dict.get('special_subject_notes', None) + datadec.other_external_id = datadec_dict.get('other_external_id', None) + datadec.share_category = self.process_access_category(datadec_dict) + datadec.consent_status = self.process_constent_status(datadec_dict) + datadec.comments = datadec_dict.get('source_notes', None) + + if 'data_types' in datadec_dict: + datadec.data_types_received.set(self.process_datatypes(datadec_dict)) + + # if 'contract_obj' not in kwargs: + # if 'source_collaboration' in datadec_dict: + # datadec.contract = self.process_source_contract(dataset, datadec_dict) + # else: + # datadec.contract = kwargs.pop('contract_obj') + # if datadec.contract: + # datadec.partner = datadec.contract.partners.first() + self.process_use_restrictions(datadec, datadec_dict) + datadec.dataset = dataset + datadec.save() + + + def process_datatypes(self, datadec_dict): + datatypes = [] + for datatype_str in datadec_dict.get('data_types', []): + datatype_str = datatype_str.strip() + # TODO Data types is a controlled vocabulaRY we should not create new when importing + datatype, _ = DataType.objects.get_or_create(name=datatype_str) + datatypes.append(datatype) + return datatypes + + def process_deidentification_method(self, datadec_dict): + deidentification_method_str = datadec_dict.get('de_identification', '') + try: + return DeidentificationMethod[deidentification_method_str] + except KeyError: + return DeidentificationMethod.pseudonymization + + def process_subjects_category(self, datadec_dict): + if 'subject_categories' in datadec_dict: + sub_category_str = datadec_dict.get('subject_categories', '').strip() + try: + return SubjectCategory[sub_category_str] + except KeyError: + return SubjectCategory.unknown else: + return SubjectCategory.unknown + + # def process_source_contract(self, dataset, datadec_dict): + # + # contract_dict = datadec_dict['source_collaboration'] + # + # try: + # partner_elu = contract_dict['collab_inst'] + # if partner_elu is None: + # raise DatasetImportError(f'Partner accession number is NULL!') + # partner = Partner.objects.get(elu_accession=partner_elu.strip()) + # except KeyError: + # raise DatasetImportError(f'Contract partner accession number is missing') + # except Partner.DoesNotExist: + # raise DatasetImportError(f'Cannot find institution partner with the elu: {partner_elu}') + # + # if 'collab_project' not in contract_dict: + # logger.debug( + # ' * Contract project missing! Skipping contract setting for datadeclaration : "{}"...'.format( + # datadec_dict.get('title', 'N/A'))) + # return None + # else: + # # create contract project if it does not exist + # try: + # project = Project.objects.get(acronym=contract_dict['collab_project'].strip()) + # except Project.DoesNotExist: + # project = Project.objects.create( + # acronym=contract_dict['collab_project'].strip() + # ) + # project.local_custodians.set(dataset.local_custodians.all()) + # project.save() + # try: + # contract = Contract.objects.get( + # partners_roles__partner=partner, + # project=project) + # except Contract.DoesNotExist: + # if 'collab_role' in contract_dict: + # role_str = contract_dict['collab_role'] + # role = GDPRRole[role_str] + # else: + # role = GDPRRole["joint_controller"] + # + # contract = Contract.objects.create( + # project=project, + # ) + # contract.company_roles.add(role) + # contract.add_partner_with_role(partner=partner, role=role) + # contract.local_custodians.set(project.local_custodians.all()) + # + # if 'collab_pi' in contract_dict: + # contact_type_pi, _ = ContactType.objects.get_or_create(name="Principal_Investigator") + # + # contract_pi_str = contract_dict['collab_pi'] + # contract_split = contract_pi_str.split() + # + # first_name = contract_split[0] + # last_name = " ".join(contract_split[1:]) + # contact, _ = Contact.objects.get_or_create( + # first_name=first_name, + # last_name=last_name, + # type=contact_type_pi + # ) + # contact.partner = partner + # contact.save() + # partner_role = PartnerRole.objects.filter(contract=contract, partner=partner).first() + # partner_role.contacts.add(contact) + # partner_role.save() + # + # contract.save() + # return contract + + def process_use_restrictions(self, data_dec, datadec_dict): + use_restrictions = [] + for user_restriction_dict in datadec_dict['use_restrictions']: + ga4gh_code = user_restriction_dict['ga4gh_code'] + notes = user_restriction_dict['note'] + + use_restriction = UseRestriction.objects.create(data_declaration=data_dec, restriction_class=ga4gh_code, notes=notes) + use_restrictions.append(use_restriction) + return use_restrictions + + def process_access_category(self, datadec_dict): + share_category_str = datadec_dict.get('access_category', '').strip() + try: + return ShareCategory[share_category_str] + except KeyError: return None + + def process_constent_status(self, datadec_dict): + if 'consent_status' in datadec_dict: + consent_status_str = datadec_dict.get('consent_status', '').strip() + try: + return ConsentStatus[consent_status_str] + except KeyError: + return ConsentStatus.unknown + else: + return ConsentStatus.unknown diff --git a/core/importer/elx_submission_importer.py b/core/importer/elx_submission_importer.py index 5e74cf64..fbecf7f4 100644 --- a/core/importer/elx_submission_importer.py +++ b/core/importer/elx_submission_importer.py @@ -1,20 +1,20 @@ import json import sys -from django.utils.datetime_safe import datetime + from core.exceptions import DatasetImportError -from core.importer.datadecs_importer import DatadecsImporter -from core.importer.datasets_importer import DatasetsImporter -from core.models import Contact, Dataset, Partner, Project, ContactType, GDPRRole -from core.models.contract import Contract + +from core.models import Contact, Dataset, Project, ContactType + from core.utils import DaisyLogger +from .base_importer import BaseImporter from .projects_importer import ProjectsImporter logger = DaisyLogger(__name__) -class DishSubmissionImporter: +class DishSubmissionImporter(BaseImporter): """ `DishSubmissionImporter`, parse json export of the Data Submission System and create relevant Dataset, Collaboration, (external Project) and DataDeclaration records in DAISY @@ -30,24 +30,16 @@ def import_json(self, json_string, stop_on_error=False, verbose=False): try: logger.info('Import started') submission_dict = json.loads(json_string) - logger.debug(' * Importing Data Declaration: "{}"...'.format(submission_dict['title'])) + logger.debug(' * Importing Data Declaration: "{}"...'.format(submission_dict['name'])) if self.is_elixir_submission(submission_dict): project = Project.objects.filter(acronym=self.elixir_project_name).first() dataset = self.process_submission_as_dataset(submission_dict, project) - contract = self.process_submission_as_contract(submission_dict, project) - datadec_dicts = submission_dict.get('datadecs', []) - importer = DatadecsImporter() - for datadec_dict in datadec_dicts: - kwargs = {} - kwargs['dataset_obj'] = dataset - if contract: - kwargs['contract_obj'] = contract - importer.process_datadec(datadec_dict, **kwargs) + # contract = self.process_submission_as_contract(submission_dict, project) - for study_dict in submission_dict.get('studies', []): - study = self.process_study(study_dict) + # for study_dict in submission_dict.get('studies', []): + # study = self.process_study(study_dict) @@ -63,72 +55,72 @@ def import_json(self, json_string, stop_on_error=False, verbose=False): return False return True - def process_submission_as_contract(self, submission_dict, project): - try: - partner_accession = submission_dict['submitting_institution'] - except KeyError: - raise DatasetImportError(data='Submitting institute info missing. Aborting import!') - - try: - partner = Partner.objects.get(elu_accession=partner_accession) - except Partner.DoesNotExist: - raise DatasetImportError( - data='Partner institute with accession {} not found in DB. Aborting import.'.format(partner_accession)) - - if self.is_elixir_submission(submission_dict): - try: - contract = Contract.objects.get(project=project, partners_roles__partner=partner) - except Contract.DoesNotExist: - contract = Contract.objects.create( - project=project, - ) - contract.company_roles.add(GDPRRole["joint_controller"]) - contract.add_partner_with_role(partner, GDPRRole["joint_controller"]) - contract.local_custodians.set(project.local_custodians.all()) - contract.save() - return contract - - def process_study(self, study_dict): - try: - title = study_dict['title'] - except KeyError: - raise DatasetImportError(data='study without title') - - description = study_dict.get('description', None) - ethics_approval_exists = study_dict.get('ethics_approval_exists', False) - ethics_notes = "The submitter confirms that an ethics approval exists for the data collection, sharing and \ - the purposes for which the data is shared." if ethics_approval_exists else None - - existing_project = Project.objects.filter(title=title).first() - if existing_project is not None: - timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - logger.warning( - "Project with title '{}' already found. It will be imported again with timestamp {}.".format(title, - timestamp)) - title = title + timestamp - - project = Project.objects.create(title=title, - description=description, - has_cner=ethics_approval_exists, - cner_notes=ethics_notes - ) - contacts = self.process_external_contacts(study_dict.get('contacts', [])) - - if contacts: - project.contacts.set(contacts) - project.save() - - # study_types = self.process_studytypes(study_dict) - # if study_types: - # project.study_types.set(study_types) - # project.save() - - return project - - @staticmethod - def process_role(role_string): - role, _ = ContactType.objects.get_or_create(name=role_string.strip()) - return role + # def process_submission_as_contract(self, submission_dict, project): + # try: + # partner_accession = submission_dict['submitting_institution'] + # except KeyError: + # raise DatasetImportError(data='Submitting institute info missing. Aborting import!') + # + # try: + # partner = Partner.objects.get(elu_accession=partner_accession) + # except Partner.DoesNotExist: + # raise DatasetImportError( + # data='Partner institute with accession {} not found in DB. Aborting import.'.format(partner_accession)) + # + # if self.is_elixir_submission(submission_dict): + # try: + # contract = Contract.objects.get(project=project, partners_roles__partner=partner) + # except Contract.DoesNotExist: + # contract = Contract.objects.create( + # project=project, + # ) + # contract.company_roles.add(GDPRRole["joint_controller"]) + # contract.add_partner_with_role(partner, GDPRRole["joint_controller"]) + # contract.local_custodians.set(project.local_custodians.all()) + # contract.save() + # return contract + + # def process_study(self, study_dict): + # try: + # title = study_dict['title'] + # except KeyError: + # raise DatasetImportError(data='study without title') + # + # description = study_dict.get('description', None) + # ethics_approval_exists = study_dict.get('ethics_approval_exists', False) + # ethics_notes = "The submitter confirms that an ethics approval exists for the data collection, sharing and \ + # the purposes for which the data is shared." if ethics_approval_exists else None + # + # existing_project = Project.objects.filter(title=title).first() + # if existing_project is not None: + # timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + # logger.warning( + # "Project with title '{}' already found. It will be imported again with timestamp {}.".format(title, + # timestamp)) + # title = title + timestamp + # + # project = Project.objects.create(title=title, + # description=description, + # has_cner=ethics_approval_exists, + # cner_notes=ethics_notes + # ) + # contacts = self.process_external_contacts(study_dict.get('contacts', [])) + # + # if contacts: + # project.contacts.set(contacts) + # project.save() + # + # # study_types = self.process_studytypes(study_dict) + # # if study_types: + # # project.study_types.set(study_types) + # # project.save() + # + # return project + + # @staticmethod + # def process_role(role_string): + # role, _ = ContactType.objects.get_or_create(name=role_string.strip()) + # return role # def process_studytypes(self, study_dict): # studytypes = [] @@ -138,22 +130,22 @@ def process_role(role_string): # studytypes.append(studytype) # return studytypes - def process_external_contacts(self, contact_dicts): - contacts = [] - for contact_dict in contact_dicts: - if 'role' in contact_dict: - role = self.process_role(contact_dict.get('role')) - - partner = ProjectsImporter.process_partner(contact_dict.get('institution')) - contact, _ = Contact.objects.get_or_create(first_name=contact_dict.get('first_name').strip(), - last_name=contact_dict.get('last_name').strip(), - email=contact_dict.get('email').strip(), - type=role) - contact.partners.set([partner]) - contact.save() - contacts.append(contact) - - return contacts + # def process_external_contacts(self, contact_dicts): + # contacts = [] + # for contact_dict in contact_dicts: + # if 'role' in contact_dict: + # role = self.process_role(contact_dict.get('role')) + # + # partner = ProjectsImporter.process_partner(contact_dict.get('institution')) + # contact, _ = Contact.objects.get_or_create(first_name=contact_dict.get('first_name').strip(), + # last_name=contact_dict.get('last_name').strip(), + # email=contact_dict.get('email').strip(), + # type=role) + # contact.partners.set([partner]) + # contact.save() + # contacts.append(contact) + # + # return contacts def is_elixir_submission(self, submission_dict): return submission_dict['scope'] == 'e' @@ -175,14 +167,17 @@ def process_submission_as_dataset(self, submission_dict, project): dataset.project = project created_on_str = submission_dict['created_on'] - title = submission_dict['title'] + title = submission_dict['name'] scope_str = 'Elixir' if submission_dict['scope'] == 'e' else 'LCSB Collaboration' local_project_str = submission_dict.get('local_project', '') dataset.comments = "ELU Accession: {}\nTitle: {}\nCreated On: {}\nScope: {}\nSubmitted to Project: {}".format( elu_accession, title, created_on_str, scope_str, local_project_str) - local_custodians = DatasetsImporter.process_local_custodians(submission_dict) + local_custodians, local_personnel, external_contacts = self.process_contacts(submission_dict) + if local_custodians: - dataset.local_custodians.set(local_custodians) + dataset.local_custodians.set(local_custodians, clear=True) + dataset.save() + return dataset diff --git a/core/importer/projects_importer.py b/core/importer/projects_importer.py index 00fb1f0b..dffe9540 100644 --- a/core/importer/projects_importer.py +++ b/core/importer/projects_importer.py @@ -1,23 +1,11 @@ -import logging import re from datetime import datetime from json import loads -from core.models import Partner, Project, Publication, Contact, ContactType -from core.models import User +from core.importer.base_importer import BaseImporter +from core.models import Partner, Project, Publication - -from django.conf import settings - -PRINCIPAL_INVESTIGATOR = 'Principal_Investigator' - -logger = logging.getLogger(__name__) - - -from core.constants import Groups as GroupConstants -from django.contrib.auth.models import Group - -class ProjectsImporter: +class ProjectsImporter(BaseImporter): """ `ProjectsImporter`, should be able to fill the database with projects' information, based on JSON file complying to the schema in: @@ -36,17 +24,17 @@ class DateImportException(Exception): def import_json(self, json_string, stop_on_error=False): try: - logger.info('Import started"') + self.logger.info('Import started"') all_information = loads(json_string) - logger.debug('Import started"') + self.logger.debug('Import started"') for project in all_information: - logger.debug(' * Importing project: "{}"...'.format(project.get('acronym', "N/A"))) + self.logger.debug(' * Importing project: "{}"...'.format(project.get('acronym', "N/A"))) self.process_project(project) - logger.debug(" ... success!") - logger.info('Import succeeded"') + self.logger.debug(" ... success!") + self.logger.info('Import succeeded"') except Exception as e: - logger.error('Import failed"') - logger.error(str(e)) + self.logger.error('Import failed"') + self.logger.error(str(e)) if stop_on_error: raise e @@ -56,7 +44,7 @@ def process_project(self, project_dict): for publication_dict in project_dict.get('publications', [])] - title = project_dict.get('title', "N/A") + title = project_dict.get('name', "N/A") description = project_dict.get('description', None) has_cner = project_dict.get('has_national_ethics_approval', False) has_erp = project_dict.get('has_institutional_ethics_approval', False) @@ -74,7 +62,7 @@ def process_project(self, project_dict): erp_notes=erp_notes ) else: - logger.warning("Project with acronym '{}' already found. It will be updated.".format(acronym)) + self.logger.warning("Project with acronym '{}' already found. It will be updated.".format(acronym)) project.title = title project.description = description project.has_cner = has_cner @@ -89,7 +77,7 @@ def process_project(self, project_dict): message = "\tCouldn't import the 'start_date'. Does it follow the '%Y-%m-%d' format?\n\t" message = message + 'Was: "{}". '.format(project_dict.get('start_date')) message = message + "Continuing with empty value." - logger.warning(message) + self.logger.warning(message) try: if 'end_date' in project_dict and len(project_dict.get('end_date')) > 0: @@ -98,7 +86,7 @@ def process_project(self, project_dict): message = "\tCouldn't import the 'end_date'. Does it follow the '%Y-%m-%d' format?\n\t" message = message + 'Was: "{}". '.format(project_dict.get('end_date')) message = message + "Continuing with empty value." - logger.warning(message) + self.logger.warning(message) project.save() @@ -110,6 +98,9 @@ def process_project(self, project_dict): if local_custodians: project.local_custodians.set(local_custodians, clear=True) + if external_contacts: + project.contacts.set(external_contacts, clear=True) + for publication in publications: project.publications.add(publication) @@ -119,57 +110,6 @@ def process_project(self, project_dict): local_custodian.assign_permissions_to_dataset(project) - def process_contacts(self, project_dict): - local_custodians = [] - local_personnel = [] - external_contacts = [] - - home_organisation = Partner.objects.get(acronym=settings.COMPANY) - - for contact_dict in project_dict.get('contacts', []): - first_name = contact_dict.get('first_name').strip() - last_name = contact_dict.get('last_name').strip() - full_name = "{} {}".format(first_name, last_name) - role_name = contact_dict.get('role') - if home_organisation.elu_accession == contact_dict.get('institution').strip(): - user = (User.objects.filter(first_name__icontains=first_name.lower(), - last_name__icontains=last_name.lower()) | User.objects.filter( - first_name__icontains=first_name.upper(), last_name__icontains=last_name.upper())).first() - if user is None: - logger.warning('no user found for %s an inactive user will be created', full_name) - - usr_name = first_name.lower() + '.' + last_name.lower() - user = User.objects.create(username=usr_name, password='', first_name=first_name, last_name=last_name, is_active=False, - email='inactive.user@uni.lu', - ) - user.staff = True - - if role_name == PRINCIPAL_INVESTIGATOR: - g = Group.objects.get(name=GroupConstants.VIP.value) - user.groups.add(g) - - user.save() - if role_name == PRINCIPAL_INVESTIGATOR: - local_custodians.append(user) - else: - local_personnel.append(user) - - else: - contact = (Contact.objects.filter(first_name__icontains=first_name.lower(), - last_name__icontains=last_name.lower()) | Contact.objects.filter( - first_name__icontains=first_name.upper(), last_name__icontains=last_name.upper())).first() - if contact is None: - contact = Contact.objects.create(first_name=first_name, last_name=last_name ) - contact.type = ContactType.objects.get_or_create(name=role_name) - affiliation = Partner.objects.get(elu_accession=contact_dict.get('institution')) - if affiliation: - contact.partners.add(affiliation) - contact.save() - external_contacts.append(contact) - - return local_custodians, local_personnel, external_contacts - - @staticmethod @@ -181,7 +121,7 @@ def process_partner(partner_string): @staticmethod def process_publication(publication_dict): - publication = Publication.objects.create(citation=publication_dict.get('citation_string')) + publication = Publication.objects.create(citation=publication_dict.get('citation')) if 'doi' in publication_dict: publication.doi = publication_dict.get('doi') publication.save() diff --git a/core/management/commands/import_datasets.py b/core/management/commands/import_datasets.py index c36467e6..c0bbe686 100644 --- a/core/management/commands/import_datasets.py +++ b/core/management/commands/import_datasets.py @@ -2,7 +2,7 @@ from django.core.management import BaseCommand, CommandError -from core.importer.datadecs_importer import DatadecsImporter + from core.importer.datasets_importer import DatasetsImporter JSON_SUFFIX = '.json' @@ -29,20 +29,13 @@ def handle(self, *args, **options): path_to_json_directory = options.get('d') verbose = options.get('verbose') exxit = options.get('exit') - importer = {"dataset": DatasetsImporter(), "datadec": DatadecsImporter()} + importer = DatasetsImporter() # We import all dataset files first for json_file_path in os.listdir(path_to_json_directory): - if json_file_path.startswith("dataset") and json_file_path.endswith(JSON_SUFFIX): - self.import_file(importer['dataset'], os.path.join(path_to_json_directory, json_file_path), verbose, + if json_file_path.endswith(JSON_SUFFIX): + self.import_file(importer, os.path.join(path_to_json_directory, json_file_path), verbose, exxit) - - # Then we import all datadec files - for json_file_path in os.listdir(path_to_json_directory): - if json_file_path.startswith("datadec") and json_file_path.endswith(JSON_SUFFIX): - self.import_file(importer['datadec'], os.path.join(path_to_json_directory, json_file_path), verbose, - exxit) - except Exception as e: self.stderr.write( self.style.ERROR("Something went wrong during the import! Is the path valid? Is the file valid?")) diff --git a/core/management/commands/load_demo_data.py b/core/management/commands/load_demo_data.py index b0de9b1a..6f2f5a94 100644 --- a/core/management/commands/load_demo_data.py +++ b/core/management/commands/load_demo_data.py @@ -2,7 +2,7 @@ from django.conf import settings import os -from core.importer.datadecs_importer import DatadecsImporter + from core.importer.datasets_importer import DatasetsImporter from core.importer.projects_importer import ProjectsImporter from core.models import User @@ -28,15 +28,6 @@ def handle(self, *args, **options): importer = DatasetsImporter() importer.import_json(json_file_contents) self.stdout.write(self.style.SUCCESS("Dataset import successful!")) - datadecs_json = os.path.join(DEMO_DATA_DIR, 'datadecs.json') - with open(datadecs_json, encoding='utf-8') as json_file: - json_file_contents = json_file.read() - importer = DatadecsImporter() - importer.import_json(json_file_contents) - self.stdout.write(self.style.SUCCESS("Data declaration import successful!")) - - - admin_usr = User.objects.create_user(username='admin', password='', email='demo.admin@uni.lu') admin_usr.is_superuser =True admin_usr.save() @@ -49,8 +40,6 @@ def handle(self, *args, **options): user.set_password('demo') user.save() - - except Exception as e: self.stderr.write( self.style.ERROR("Something went wrong during the import! Is the path valid? Is the file valid?")) diff --git a/core/models/project.py b/core/models/project.py index df68a1a3..6fad164b 100644 --- a/core/models/project.py +++ b/core/models/project.py @@ -180,12 +180,18 @@ def to_dict(self): "role": "Principal_Investigator" if lc.is_part_of(constants.Groups.VIP.name) else "Researcher", "affiliations": [HomeOrganisation().name]}) + pub_dicts = [] + for pub in self.publications.all(): + pub_dicts.append( + {"citation": pub.citation if pub.citation else None, + "doi": pub.doi if pub.doi else None}) + base_dict = { "source": settings.SERVER_URL, "id_at_source": self.id.__str__(), - "name": self.acronym, + "acronym": self.acronym, "elu_accession": self.elu_accession if self.elu_accession else None, - "title": self.title if self.title else None, + "name": self.title if self.title else None, "description": self.description if self.description else None, "has_institutional_ethics_approval": self.has_erp, "has_national_ethics_approval": self.has_cner, diff --git a/core/tests/data/ELX_LU_SUB-1.json b/core/tests/data/ELX_LU_SUB-1.json index fbef30e3..a1ab39a6 100644 --- a/core/tests/data/ELX_LU_SUB-1.json +++ b/core/tests/data/ELX_LU_SUB-1.json @@ -1,12 +1,28 @@ { "elu_accession": "ELX_LU_SUB-1", - "title": "Test Submission to be exported.", + "name": "Test Submission to be exported.", "submitting_institution": "ELU_I_5", "created_on": "2018-10-15", "scope": "e", - "local_custodian": [ - "Elgin Gray", - "Rob Blue" + "contacts": [ + { + "first_name": "Elgin", + "last_name": "Gray", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + }, + { + "first_name": "Rob", + "last_name": "Blue", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + }, + { + "first_name": "Embury", + "last_name": "Bask", + "role": "Researcher", + "institution": "ELU_I_77" + } ], "local_project": "Submitting to NCER PD Diagnosis project", "data_providers": [ @@ -41,9 +57,9 @@ ] } ], - "datadecs": [ + "data_declarations": [ { - "title": "Test datadec 1", + "name": "Test datadec 1", "source_study": "Test Study ABC", "legal_basis_data_collection": "Consent", "legal_basis_data_sharing": "Consent", @@ -67,7 +83,7 @@ ] }, { - "title": "Test datadec 2", + "name": "Test datadec 2", "source_study": "Test Study ABC", "legal_basis_data_collection": "Consent", "legal_basis_data_sharing": "Consent", diff --git a/core/tests/data/datadecs.json b/core/tests/data/datadecs.json deleted file mode 100644 index f54d0159..00000000 --- a/core/tests/data/datadecs.json +++ /dev/null @@ -1,136 +0,0 @@ -[ - { - "dataset": "ABCD data", - "title": "ABCD", - "data_type_notes": "..\n", - "data_types": [ - "Metabolomics", - "Clinical_data", - "Methylation_array" - ], - "de_identification": "anonymization", - "subject_categories": "controls", - "has_special_subjects": true, - "special_subject_notes": "2 year old children", - "source_notes": "Data is from collaborator.", - "use_restrictions": [ - { - "ga4gh_code": "PS", - "note": "Use is restricted to projects: ABCD" - }, - { - "ga4gh_code": "PUB", - "note": "Acknowledgement required." - } - ] - }, - { - "dataset": "Hypertension data", - "title": "Hypertension-ABC disease", - "data_types": [ - "Genotype_data", - "Whole_genome_sequencing" - ], - "de_identification": "pseudonymization", - "subject_categories": "cases_and_controls", - "has_special_subjects": false, - "source_collaboration": { - "collab_inst": "ELU_I_94", - "collab_pi": "Alberto Pico", - "collab_project": "Hypertension", - "collab_role": "joint_controller" - }, - "source_notes": "Data is from collaborator.", - "use_restrictions": [ - { - "ga4gh_code": "PS", - "note": "Use is restricted to projects: Hypertension" - }, - { - "ga4gh_code": "RS-[XX]", - "note": "Use is restricted to research areas: Hypertension-ABC disease" - } - ] - }, - { - "dataset": "PD data", - "title": "XYZ", - "source_notes": "Data is from own cohort.", - "data_types": [ - "Cell_Imaging", - "Clinical_data", - "Other_Phenotype_data", - "Samples" - ], - "de_identification": "pseudonymization", - "ombudsman": "Biobank, Principle Investigator", - "subject_categories": "cases_and_controls", - "has_special_subjects": false, - "consent_status": "homogeneous", - "use_restrictions": [ - { - "ga4gh_code": "PS", - "note": "Consent form restricts data use to projects XYZ" - }, - { - "ga4gh_code": "RS-[XX]", - "note": "Data is consented for research onParkinson's disease" - }, - { - "ga4gh_code": "GS-[XX]", - "note": "Data is consented for sharing outside institute (Within Luxembourg)" - } - ] - }, - { - "dataset": "REPO data", - "title": "ZZZZ", - "source_collaboration": { - "collab_inst": "ELU_I_84" - }, - "source_notes": "Data is obtained from repository.", - "data_types": [ - "Whole_genome_sequencing", - "Other_Phenotype_data" - ], - "de_identification": "pseudonymization", - "subject_categories": "cases_and_controls", - "has_special_subjects": false, - "access_category": "controlled_access", - "use_restrictions": [ - { - "ga4gh_code": "PUB", - "note": "Acknowledgement required." - } - ] - }, - { - "dataset": "PD data", - "title": "PD data (german cohort)", - "data_type_notes": "Small-scale protein, mRNA or mtDNA integrity data, cell and mitochondrial function data\n", - "data_types": [ - "Other", - "Samples" - ], - "de_identification": "pseudonymization", - "subject_categories": "cases_and_controls", - "has_special_subjects": false, - "source_collaboration": { - "collab_inst": "ELU_I_8", - "collab_pi": "Manuela Swift", - "collab_project": "PD_Project", - "collab_role": "joint_controller" - }, - "source_notes": "Data is from collaborator.", - "use_restrictions": [ - { - "ga4gh_code": "PUB", - "note": "Acknowledgement required." - }, - { - "ga4gh_code": "TS-[XX]", - "note": "Data is obtained for a limited duration. 2021-02-28" - } - ] - } -] diff --git a/core/tests/data/datasets.json b/core/tests/data/datasets.json index 5c9b3d28..7b88c281 100644 --- a/core/tests/data/datasets.json +++ b/core/tests/data/datasets.json @@ -1,9 +1,6 @@ [ { - "local_custodian": [ - "Igor Teal" - ], - "title": "ABCD data", + "name": "ABCD data", "project": "ABCD", "shares": [ { @@ -11,37 +8,116 @@ "share_notes": "Melanie Silver" } ], - "storage_locations": [ + "contacts": [ { - "storage_acl_users": "Igor Teal", - "storage_acl_notes":"Test description......", - "storage_resource": "Other", + "first_name": "Igor", + "last_name": "Teal", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + } + + ], + "storages": [ + { + "accesses": [ + "Igor Teal" + ], + "platform": "Other", "locations": [ "https://someserver.edu/Studies/Gambia", "https://someserver.edu/Studies/Gambia_additional"], "category": "master" } + ], + "data_declarations": [ { + "title": "ABCD", + "data_type_notes": "..\n", + "data_types": [ + "Metabolomics", + "Clinical_data", + "Methylation_array" + ], + "de_identification": "anonymization", + "subject_categories": "controls", + "has_special_subjects": true, + "special_subject_notes": "2 year old children", + "source_notes": "Data is from collaborator.", + "use_restrictions": [ + { + "ga4gh_code": "PS", + "note": "Use is restricted to projects: ABCD" + }, + { + "ga4gh_code": "PUB", + "note": "Acknowledgement required." + } + ] + } ] }, { - "local_custodian": [ - "Joanne Swift" - ], - "title": "Hypertension data", + + "name": "Hypertension data", "project": "Hypertension", - "storage_locations": [ + "contacts": [ + { + "first_name": "Joanne", + "last_name": "Swift", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + } + + ], + "storages": [ { - "storage_acl_users": "Elgin Gray, Paul Mauve", - "storage_resource": "application", + "accesses": [ + "Elgin Gray", "Paul Mauve" + ], + "platform": "application", "locations": ["RedCap sevrver at http://address"], "category": "master" } + ], + "data_declarations": [ + { + "title": "Hypertension-ABC disease", + "data_types": [ + "Genotype_data", + "Whole_genome_sequencing" + ], + "de_identification": "pseudonymization", + "subject_categories": "cases_and_controls", + "has_special_subjects": false, + "source_collaboration": { + "collab_inst": "ELU_I_94", + "collab_pi": "Alberto Pico", + "collab_project": "Hypertension", + "collab_role": "joint_controller" + }, + "source_notes": "Data is from collaborator.", + "use_restrictions": [ + { + "ga4gh_code": "PS", + "note": "Use is restricted to projects: Hypertension" + }, + { + "ga4gh_code": "RS-[XX]", + "note": "Use is restricted to research areas: Hypertension-ABC disease" + } + ] + } ] }, { - "local_custodian": [ - "Rob Blue" - ], - "title": "MDPDP data", + "name": "MDPDP data", "project": "MDPDP", + "contacts": [ + { + "first_name": "Rob", + "last_name": "Blue", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + } + + ], "shares": [ { "share_inst": "ELU_I_79", @@ -52,48 +128,89 @@ "share_notes": "Max Ginger" } ], - "storage_locations": [ + "storages": [ { - "storage_acl_users": "Paul Antony", - "storage_resource": "Other", + "accesses": [ + "Paul Antony" + ], + "platform": "Other", "locations": [ "\\\\someserver.edu\\LCSB_HCS\\OperaQEHS", "\\\\someserver.edu\\users\\PA\\PrecisionMedicine","\\\\someserver.edu\\Images\\groups"], "category": "master" }, { - "storage_resource": "sample-storage", + "platform": "sample-storage", "locations": ["Hospital ABCDFE"], "category": "master" } ] }, { - "title": "REPO data", - "local_custodian": [ - "Paul Mauve", - "Rene Sahoo" - ], - "storage_acl_info": "Paul Mauve, John Doe", - "storage_locations": [ + "name": "REPO data", + "storages": [ { - "storage_resource": "lcsb_group_server", + "accesses": [ + "Paul Mauve", "John Doe" + ], + "platform": "lcsb_group_server", "locations": [ "some_server_directory:/work/projects/wgs/cohort" ], "category": "master" } + ], + "contacts": [ + { + "first_name": "Paul", + "last_name": "Mauve", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + }, { + "first_name": "Rene", + "last_name": "Sahoo", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + } + + ], + "data_declarations": [ + { + "title": "ZZZZ", + "source_collaboration": { + "collab_inst": "ELU_I_84" + }, + "source_notes": "Data is obtained from repository.", + "data_types": [ + "Whole_genome_sequencing", + "Other_Phenotype_data" + ], + "de_identification": "pseudonymization", + "subject_categories": "cases_and_controls", + "has_special_subjects": false, + "access_category": "controlled_access", + "use_restrictions": [ + { + "ga4gh_code": "PUB", + "note": "Acknowledgement required." + } + ] + } ] }, { - "title": "PD data", - "local_custodian": [ - "Ali Gator" - ], + "name": "PD data", "project": "PD_Project", "shares": [], - "storage_acl_info": "All group members have access", - "storage_locations": [ + "contacts": [ + { + "first_name": "Ali", + "last_name": "Gator", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + } + ], + "storages": [ { - "storage_resource": "lcsb_group_server", + "platform": "lcsb_group_server", "locations": [ "\\\\some_server\\GROUP\\Projects\\SUB_FOLDER", "\\\\some_server\\GROUP\\General" @@ -101,14 +218,14 @@ "category": "master" }, { - "storage_resource": "Other", + "platform": "Other", "locations": [ "this server is backed-up by IT" ], "category": "backup" }, { - "storage_resource": "atlas_personal", + "platform": "atlas_personal", "locations": [ "\\\\some_server\\users\\first.user", "\\\\some_server\\users\\second.user ", @@ -122,7 +239,7 @@ "category": "copy" }, { - "storage_resource": "lcsb_laptop", + "platform": "lcsb_laptop", "locations": [ "first.user", "second.user", @@ -136,7 +253,7 @@ "category": "copy" }, { - "storage_resource": "External Storage (e.g. Hard disk, DVD)", + "platform": "External Storage (e.g. Hard disk, DVD)", "locations": [ "first.user", "second.user", @@ -150,7 +267,7 @@ "category": "copy" }, { - "storage_resource": "Owncloud", + "platform": "Owncloud", "locations": [ "first.user", "second.user", @@ -164,12 +281,69 @@ "category": "copy" }, { - "storage_resource": "sample-storage", + "platform": "sample-storage", "locations": [ "Hospital/Institute Floor X Cabinet Y" ], "category": "master" } + ], + "data_declarations": [{ + "title": "XYZ", + "source_notes": "Data is from own cohort.", + "data_types": [ + "Cell_Imaging", + "Clinical_data", + "Other_Phenotype_data", + "Samples" + ], + "de_identification": "pseudonymization", + "ombudsman": "Biobank, Principle Investigator", + "subject_categories": "cases_and_controls", + "has_special_subjects": false, + "consent_status": "homogeneous", + "use_restrictions": [ + { + "ga4gh_code": "PS", + "note": "Consent form restricts data use to projects XYZ" + }, + { + "ga4gh_code": "RS-[XX]", + "note": "Data is consented for research onParkinson's disease" + }, + { + "ga4gh_code": "GS-[XX]", + "note": "Data is consented for sharing outside institute (Within Luxembourg)" + } + ] + }, { + "title": "PD data (german cohort)", + "data_type_notes": "Small-scale protein, mRNA or mtDNA integrity data, cell and mitochondrial function data\n", + "data_types": [ + "Other", + "Samples" + ], + "de_identification": "pseudonymization", + "subject_categories": "cases_and_controls", + "has_special_subjects": false, + "source_collaboration": { + "collab_inst": "ELU_I_8", + "collab_pi": "Manuela Swift", + "collab_project": "PD_Project", + "collab_role": "joint_controller" + }, + "source_notes": "Data is from collaborator.", + "use_restrictions": [ + { + "ga4gh_code": "PUB", + "note": "Acknowledgement required." + }, + { + "ga4gh_code": "TS-[XX]", + "note": "Data is obtained for a limited duration. 2021-02-28" + } + ] + } ] } ] \ No newline at end of file diff --git a/core/tests/data/projects.json b/core/tests/data/projects.json index 1f1cfa0d..0f5edaa9 100644 --- a/core/tests/data/projects.json +++ b/core/tests/data/projects.json @@ -1,7 +1,7 @@ [ { "acronym": "In vitro disease modeling", - "title": "Parkinson's disease in vitro disease modeling - Focus on genes ........., .", + "name": "Parkinson's disease in vitro disease modeling - Focus on genes ........., .", "description": "Generation of disease specific iPSCs; ...............", "start_date": "2014-01-01", "end_date": "", @@ -31,16 +31,16 @@ "institutional_ethics_approval_notes": "test notes 123", "publications": [ { - "citation_string": "Paper 1 citation string...." + "citation": "Paper 1 citation string...." }, { - "citation_string": " Paper 2 citation string..." + "citation": " Paper 2 citation string..." } ] }, { "acronym": "CCCC deficiency", - "title": "CCCC deficiency", + "name": "CCCC deficiency", "description": "Analysis of CCCC function in human disease", "start_date": "2016-11-01", "contacts": [ @@ -75,7 +75,7 @@ "national_ethics_approval_notes": "We analyse samples ........", "publications": [ { - "citation_string": "CCCC deficiency: a novel method in ........ (in preparation)" + "citation": "CCCC deficiency: a novel method in ........ (in preparation)" } ] } diff --git a/core/tests/importer/test_bioportal_client.py b/core/tests/importer/test_bioportal_client.py deleted file mode 100644 index f48eddf0..00000000 --- a/core/tests/importer/test_bioportal_client.py +++ /dev/null @@ -1,28 +0,0 @@ -import urllib.request, urllib.error, urllib.parse -import json -import pytest - -REST_URL = "http://data.bioontology.org" -API_KEY = "" - - -def get_json(url): - opener = urllib.request.build_opener() - opener.addheaders = [('Authorization', 'apikey token=' + API_KEY)] - return json.loads(opener.open(url).read()) - -@pytest.mark.skip(reason="we currently use local ontologies. However in the future we may switch to bioportal search.") -@pytest.mark.webtest -def test_term_search(): - terms = [] - terms.append("lewy") - - - # Do a search for every term - search_results = [] - for term in terms: - search_results.append(get_json(REST_URL + "/search?q=" + term)["collection"]) - - # Print the results - for result in search_results: - print(result) \ No newline at end of file diff --git a/core/tests/importer/test_datadecs_importer.py b/core/tests/importer/test_datadecs_importer.py deleted file mode 100644 index 798931f4..00000000 --- a/core/tests/importer/test_datadecs_importer.py +++ /dev/null @@ -1,51 +0,0 @@ -import os - -import pytest - -from core.importer.datadecs_importer import DatadecsImporter -from core.importer.datasets_importer import DatasetsImporter -from core.models import Dataset, DataDeclaration -from test import factories - - -@pytest.mark.xfail -@pytest.mark.django_db -def test_dummy(celery_session_worker, storage_resources, can_defer_constraint_checks): - pass - - -@pytest.mark.django_db -def test_import_datadecs(celery_session_worker, contact_types, partners, gdpr_roles, storage_resources, can_defer_constraint_checks): - - VIP = factories.VIPGroup() - - factories.UserFactory.create(first_name='Igor', last_name='Teal', groups=[VIP]) - factories.UserFactory.create(first_name='Joanne', last_name='Swift', groups=[VIP]) - factories.UserFactory.create(first_name='Elgin', last_name='Gray', groups=[VIP]) - factories.UserFactory.create(first_name='Paul', last_name='Mauve', groups=[VIP]) - factories.UserFactory.create(first_name='Rene', last_name='Sahoo', groups=[VIP]) - factories.UserFactory.create(first_name='Rob', last_name='Blue', groups=[VIP]) - - dataset_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/datasets.json") - with open(dataset_file, "r") as f: - importer = DatasetsImporter() - importer.import_json(f.read(), True) - - datadec_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/datadecs.json") - with open(datadec_file, "r") as f: - importer = DatadecsImporter() - importer.import_json(f.read(), True) - - dsets = Dataset.objects.all() - assert 5 == dsets.count() - - ddecs = DataDeclaration.objects.all() - HyperData = ddecs[1] - assert 'Hypertension-ABC disease' == HyperData.title - contract = HyperData.contract - first_partner_role = contract.partners_roles.first() - assert first_partner_role.contacts.count() > 0 - assert "Alberto" == first_partner_role.contacts.first().first_name - assert "Pico" == first_partner_role.contacts.first().last_name - assert "Hypertension" == contract.project.acronym - assert "ELU_I_94" == first_partner_role.partner.elu_accession diff --git a/core/tests/importer/test_datasets_importer.py b/core/tests/importer/test_datasets_importer.py index f154adea..0958bf6e 100644 --- a/core/tests/importer/test_datasets_importer.py +++ b/core/tests/importer/test_datasets_importer.py @@ -3,10 +3,16 @@ import pytest from core.importer.datasets_importer import DatasetsImporter -from core.models import Dataset, Project +from core.models import Dataset, Project, DataDeclaration from test import factories +@pytest.mark.xfail +@pytest.mark.django_db +def test_dummy(celery_session_worker, storage_resources, can_defer_constraint_checks): + pass + + @pytest.mark.django_db def test_import_datasets(celery_session_worker, storage_resources, partners, gdpr_roles, can_defer_constraint_checks): VIP = factories.VIPGroup() @@ -44,3 +50,6 @@ def test_import_datasets(celery_session_worker, storage_resources, partners, gdp d4 = Dataset.objects.filter(title='PD data').first() assert ["Ali Gator"] == [employee.full_name for employee in d4.local_custodians.all()] assert 7 == d4.data_locations.all().count() + + ddecs = DataDeclaration.objects.all() + assert 5 == ddecs.count() diff --git a/core/tests/importer/test_export.py b/core/tests/importer/test_export.py index 8c24b034..debc0342 100644 --- a/core/tests/importer/test_export.py +++ b/core/tests/importer/test_export.py @@ -26,7 +26,7 @@ def test_export_projects(celery_session_worker, contact_types, partners, gdpr_ro project_dicts = dict['items'] assert 2 == len(project_dicts) - assert "Test_PRJ" == project_dicts[0]['name'] + assert "Title of test project." == project_dicts[0]['name'] assert 2 == len(project_dicts[0]['contacts']) #TODO add check of more fields and schema validation diff --git a/core/tests/importer/test_import_elx_submission.py b/core/tests/importer/test_import_elx_submission.py index 62d343fd..0aad6e6f 100644 --- a/core/tests/importer/test_import_elx_submission.py +++ b/core/tests/importer/test_import_elx_submission.py @@ -25,10 +25,12 @@ def test_import_submission(celery_session_worker, partners, gdpr_roles, can_defe importer = DishSubmissionImporter(elixir_project.title) importer.import_json(file_with_dataset.read(), True, True) assert 1 == Dataset.objects.all().count() - assert 2 == Project.objects.all().count() + # assert 2 == Project.objects.all().count() dataset = Dataset.objects.first() assert 'ELX_LU_SUB-1' == dataset.title - assert 2 == dataset.data_declarations.all().count() + # assert 2 == dataset.data_declarations.all().count() + # TODO finalise Submission importer once elixir-dcp i.e. DISH goes into production. + # Mapping from DISH to DAISY not yet complete... assert 'ELIXIR' == dataset.project.title assert 2 == dataset.local_custodians.all().count() assert ["Elgin Gray", "Rob Blue"] == [custodian.full_name for custodian in dataset.local_custodians.all()] diff --git a/data/demo/datadecs.json b/data/demo/datadecs.json deleted file mode 100644 index e2bde1b5..00000000 --- a/data/demo/datadecs.json +++ /dev/null @@ -1,44 +0,0 @@ -[ - { - "dataset": "LCSB Human Data Survey v-1", - "title": "Survey Responses", - "data_type_notes": "XLS survey files filled in by PIs", - "data_types": [ - "Other"], - "has_special_subjects": false, - "source_collaboration": { - "collab_inst": "ELU_I_77", - "collab_pi": "John Doe", - "collab_project": "DAISY-Dev", - "collab_role": "controller" - }, - "source_notes": "Data is collected from all LCSB PIs working with Human Data.", - "use_restrictions": [ - { - "ga4gh_code": "PS", - "note": "Use is restricted to projects: DAISY-Dev" - } - ] - }, - { - "dataset": "DAISY LCSB Deployment", - "title": "Survey Responses Imported", - "data_type_notes": "XLS survey files filled in by PIs are imported in DAISY", - "data_types": [ - "Other"], - "has_special_subjects": false, - "source_collaboration": { - "collab_inst": "ELU_I_77", - "collab_pi": "John Doe", - "collab_project": "DAISY-Dev", - "collab_role": "controller" - }, - "source_notes": "Data is collected from all LCSB PIs working with Human Data.", - "use_restrictions": [ - { - "ga4gh_code": "PS", - "note": "Use is restricted to projects: DAISY-Dev" - } - ] - } -] \ No newline at end of file diff --git a/data/demo/datasets.json b/data/demo/datasets.json index 7fc91f2d..d810603d 100644 --- a/data/demo/datasets.json +++ b/data/demo/datasets.json @@ -1,61 +1,54 @@ [ { - "title": "LCSB Human Data Survey v-1", - "local_custodian": [ - "Alice White", - "Jane Doe" - ], - "project": "DAISY-Dev", - "shares": [ - { - "share_inst": "ELU_I_96", - "share_notes": "Joe Bloggs" - } - ], - "storage_locations": [ + "project": "EPIC-DREM", + "name": "EPIC-DREM Sequencing data", + "description": null, + "data_declarations": [ { - "storage_resource": "atlas_project", - "locations": [ - "smb://atlas.uni.lux/Data_Protection/Survey/XLS" + "title": "ChIP-Seq, RNA-seq, ...", + "data_types": [ + "RNASeq", + "Samples", + "ChIP-seq" ], - "category": "master", - "storage_acl_info": "Alice White and Jane Doe has access to folder." + "access_category": null, + "subjects_category": "controls", + "de_identification": "pseudonymization", + "consent_status": "homogeneous", + "has_special_subjects": null, + "special_subjects_description": "", + "embargo_date": null, + "storage_end_date": "2021-04-05", + "storage_duration_criteria": "The project PI will assess whether data should be retained for further research at the storage end date." } - ] - }, - { - "title": "DAISY LCSB Deployment", - "local_custodian": [ - "Alice White", - "Jane Doe" ], - "project": "DAISY-Dev", - "storage_locations": [ + "storages": [ { - "storage_resource": "application", + "platform": "application", "locations": [ - "https://daisy.lcsb.uni.lu" + "https://webdav-r3lab.uni.lu/" ], "category": "master", - "storage_acl_info": "All LCSB staff with an active LDAP account can access." + "accesses": [ + "The data is available on the sequencing platform's webdav server." + ] } - ] - }, - { - "title": "DAISY Demo Deployment", - "local_custodian": [ - "Alice White", - "Jane Doe" ], - "project": "DAISY-Dev", - "storage_locations": [ + "transfers": [], + "contacts": [ { - "storage_resource": "application", - "locations": [ - "https://daisy-demo.lcsb.uni.lu" - ], - "category": "master", - "storage_acl_info": "This is a demo deployment for external parties. IP Restricted access." + "first_name": "Alice", + "last_name": "White", + "email": "alice.white@uni.lu", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + }, + { + "first_name": "John", + "last_name": "Black", + "email": "john.black@uni.lu", + "role": "Principal_Investigator", + "institution": "ELU_I_77" } ] } diff --git a/data/demo/projects.json b/data/demo/projects.json index 330b408c..c56031d2 100644 --- a/data/demo/projects.json +++ b/data/demo/projects.json @@ -1,40 +1,77 @@ + [ { - "acronym": "DAISY-Dev", - "title": "Data Information System DAISY Development.", - "description": "Development of software to record GDPR-relevant metadata of human data and biosamples used in research.", - "start_date": "2018-03-01", - "end_date": "2019-09-01", + "acronym": "EPIC-DREM", + "name": "Understanding the gene regulatory interactions underlying cell differentiation and identity", + "description": "\"Temporal data on gene expression and context-specific open chromatin states can improve identification of key transcription factors (TFs) and the gene regulatory networks (GRNs) controlling cellular differentiation. However, their integration remains challenging. Here, we delineate a general approach for data-driven and unbiased identification of key TFs and dynamic GRNs, called EPIC-DREM. We generated time-series transcriptomic and epigenomic profiles during differentiation of mouse multipotent bone marrow stromal cell line (ST2) toward adipocytes and osteoblasts. Using our novel approach we constructed time-resolved GRNs for both lineages and identifed the shared TFs involved in both differentiation processes. To take an alternative approach to prioritize the identified shared regulators, we mapped dynamic super-enhancers in both lineages and associated them to target genes with correlated expression profiles. The combination of the two approaches identified aryl hydrocarbon receptor (AHR) and Glis family zinc finger 1 (GLIS1) as mesenchymal key TFs controlled by dynamic cell type-specific super-enhancers that become repressed in both lineages. AHR and GLIS1 control differentiation-induced genes and their overexpression can inhibit the lineage commitment of the multipotent bone marrow-derived ST2 cells..\" \r\n\r\n...taken from D G\u00e9rard, et.al. Nucleic Acids Research, Volume 47, Issue 3, 20 February 2019, Pages 1141\u20131163, https://doi.org/10.1093/nar/gky1240", + "has_institutional_ethics_approval": true, + "has_national_ethics_approval": true, + "institutional_ethics_approval_notes": "The umbrella project LUX-Epigen has an institutional and a national ethics approval.", + "national_ethics_approval_notes": "The umbrella project LUX-Epigen has an institutional and a national ethics approval.", + "start_date": "2020-03-02", + "end_date": "2021-04-30", "contacts": [ { "first_name": "Alice", "last_name": "White", + "email": "alice.white@uni.lu", "role": "Principal_Investigator", "institution": "ELU_I_77" }, { "first_name": "John", - "last_name": "Doe", + "last_name": "Black", + "email": "john.black@uni.lu", "role": "Principal_Investigator", "institution": "ELU_I_77" }, { - "first_name": "Jane", - "last_name": "Doe", + "first_name": "Roy", + "last_name": "Blue", + "email": "roy.blue@uni.lu", "role": "Researcher", "institution": "ELU_I_77" } ], - "has_institutional_ethics_approval": false, - "has_national_ethics_approval": false, - "national_ethics_approval_notes": "Not needed as it is a software development project. No data collection.", "publications": [ { - "citation_string": "Provenance-enabled stewardship of human data in the GDPR era. Pinar Alper, Regina Becker, Venkata Satagopam, Christophe Trefois, Valentin Groues, Jacek Lebioda, Yohan Jarosz. 
ProvenanceWeek 2018 IPAW Proceedings, London, July 9-10 2018." + "citation": "Gérard D, Schmidt F, Ginolhac A, Schmitz M, Halder R, Ebert P, Schulz MH,\nSauter T, Sinkkonen L. Temporal enhancer profiling of parallel lineages\nidentifies AHR and GLIS1 as regulators of mesenchymal multipotency. Nucleic Acids\nRes. 2019 Feb 20;47(3):1141-1163. PubMed PMID:\n30544251; PubMed Central PMCID: PMC6380961.", + "doi": "doi: 10.1093/nar/gky1240" + } + ] + }, + { + "acronym": "LUX-Epigen", + "name": "LUX-Epigen Research Programme - Phase1", + "description": "This is an imaginary research project created to demonstrate features of DAISY. LUX-Epigen is created as an umbrella project, which holds an ethics approval and contract among the participant institutes. There can be several PhD and Postdoc sub-projects cover under the umbrella of Lux-Epigen.", + "has_institutional_ethics_approval": true, + "has_national_ethics_approval": true, + "institutional_ethics_approval_notes": "See attached Uni-LU ERP approval. This is a dummy document for the imaginary LUX-Epigen programme.", + "national_ethics_approval_notes": "See attached CNER approval. This is a dummy document for the imaginary LUX-Epigen programme.", + "start_date": "2017-04-03", + "end_date": "2022-04-25", + "contacts": [ + { + "first_name": "Alice", + "last_name": "White", + "email": "alice.white@uni.lu", + "role": "Principal_Investigator", + "institution": "ELU_I_77" }, { - "citation_string": "ELIXIR-Luxembourg: providing sustainability to clinical and translational medicine data for research. Venkata Satagopam, Pinar Alper, Regina Becker, Dietlind Gerloff, Wei Gu, Roland Krause, Jacek Lebioda, Noua Toukourou, Christophe Trefois, Reinhard Schneider. Poster presentations at Intelligent Systems for Molecular Biology 2018, ISMB, Chicago, July 6-10 2018." + "first_name": "John", + "last_name": "Black", + "email": "john.black@uni.lu", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + }, + { + "first_name": "Igor", + "last_name": "Green", + "email": "igor.green@chl.lu", + "role": "Principal_Investigator", + "institution": "ELU_I_9" } ] } -] \ No newline at end of file +] diff --git a/elixir_daisy/settings.py b/elixir_daisy/settings.py index 7dbbf950..8ad50ef0 100644 --- a/elixir_daisy/settings.py +++ b/elixir_daisy/settings.py @@ -15,7 +15,7 @@ import pytz COMPANY = 'LCSB' # Used for generating some models' verbose names - +DEMO_MODE = False AUTH_USER_MODEL = 'core.User' diff --git a/web/templates/about.html b/web/templates/about.html index 73d8cd01..4b07c350 100644 --- a/web/templates/about.html +++ b/web/templates/about.html @@ -20,8 +20,8 @@

About

https://github.com/elixir-luxembourg/daisy/issues
Notes
- - {% comment %} + + {% if demo_mode %}
This is a demo deployment. It contains fictitious user accounts, projects and datasets to help you understand DAISY's information model.
Data in the demo deployment is cleaned periodically. Please refrain from recording information that @@ -40,23 +40,18 @@

About

VIP User (Principle Investigator) - jane.doe - demo - Standard User (Researcher) - - - john.doe + john.black demo VIP User (Principle Investigator) - admin + roy.blue demo - Super User (Application Admin) + Standard User (Researcher)
- {% endcomment %} + {% endif %} diff --git a/web/views/about.py b/web/views/about.py index 2f2815b8..4c287c92 100644 --- a/web/views/about.py +++ b/web/views/about.py @@ -1,12 +1,15 @@ from django.shortcuts import render import pkg_resources from stronghold.decorators import public +from django.conf import settings @public def about(request): context = { - "app_version": pkg_resources.require("elixir-daisy")[0].version + "app_version": pkg_resources.require("elixir-daisy")[0].version, + "demo_mode":settings.DEMO_MODE + } return render( From de8bbdcb8c119a36f1a57ee4df858d2420b83475 Mon Sep 17 00:00:00 2001 From: Pinar Alper Date: Thu, 7 May 2020 01:35:23 +0200 Subject: [PATCH 2/4] removed discrepancies between elu json schema and import/export results --- core/importer/base_importer.py | 2 +- core/importer/datasets_importer.py | 33 ++++++++++++++-------------- core/tests/data/ELX_LU_SUB-1.json | 2 +- core/tests/data/datasets.json | 35 +++++++++++++++++------------- core/tests/data/projects.json | 7 ++++++ data/demo/datasets.json | 24 ++++++++++++++++---- 6 files changed, 66 insertions(+), 37 deletions(-) diff --git a/core/importer/base_importer.py b/core/importer/base_importer.py index 58903b2a..a5a19f27 100644 --- a/core/importer/base_importer.py +++ b/core/importer/base_importer.py @@ -23,7 +23,7 @@ def process_contacts(self, project_dict): for contact_dict in project_dict.get('contacts', []): first_name = contact_dict.get('first_name').strip() last_name = contact_dict.get('last_name').strip() - email = contact_dict.get('email').strip() + email = contact_dict.get('email','').strip() full_name = "{} {}".format(first_name, last_name) role_name = contact_dict.get('role') if home_organisation.elu_accession == contact_dict.get('institution').strip(): diff --git a/core/importer/datasets_importer.py b/core/importer/datasets_importer.py index 8f365837..58969ce1 100644 --- a/core/importer/datasets_importer.py +++ b/core/importer/datasets_importer.py @@ -79,7 +79,7 @@ def process_dataset(self, dataset_dict): # if 'storage_acl_notes' in storage_location_dict: # dl.access_notes = storage_location_dict['storage_acl_notes'] - shares = self.process_shares(dataset_dict, dataset) + shares = self.process_transfers(dataset_dict, dataset) if shares: dataset.shares.set(shares, bulk=False) @@ -184,13 +184,13 @@ def process_data_locations(self, dataset, dataset_dict): return data_locations - def process_shares(self, dataset_dict, dataset): + def process_transfers(self, dataset_dict, dataset): - def process_share(share_dict, dataset): + def process_transfer(share_dict, dataset): share = Share() - share.access_notes = share_dict.get('share_notes') + share.share_notes = share_dict.get('transfer_details') share.dataset = dataset - share_institution_elu = share_dict.get('share_inst') + share_institution_elu = share_dict.get('partner') share_institution = Partner.objects.get(elu_accession=share_institution_elu.strip()) share.partner = share_institution # project = dataset.project @@ -212,8 +212,8 @@ def process_share(share_dict, dataset): # share.contract = contract return share - shares = dataset_dict.get('shares', []) - return [process_share(share_object, dataset) for share_object in shares] + transfers = dataset_dict.get('transfers', []) + return [process_transfer(transfer_dict, dataset) for transfer_dict in transfers] def process_category(self, storage_location_dict): category_str = storage_location_dict.get('category', '').strip().lower() @@ -255,7 +255,7 @@ def process_datadeclaration(self, datadec_dict, dataset): datadec.data_types_notes = datadec_dict.get('data_type_notes', None) datadec.deidentification_method = self.process_deidentification_method(datadec_dict) datadec.subjects_category = self.process_subjects_category(datadec_dict) - datadec.special_subjects_description = datadec_dict.get('special_subject_notes', None) + datadec.special_subjects_description = datadec_dict.get('special_subjects_description', None) datadec.other_external_id = datadec_dict.get('other_external_id', None) datadec.share_category = self.process_access_category(datadec_dict) datadec.consent_status = self.process_constent_status(datadec_dict) @@ -293,8 +293,8 @@ def process_deidentification_method(self, datadec_dict): return DeidentificationMethod.pseudonymization def process_subjects_category(self, datadec_dict): - if 'subject_categories' in datadec_dict: - sub_category_str = datadec_dict.get('subject_categories', '').strip() + if 'subjects_category' in datadec_dict: + sub_category_str = datadec_dict.get('subjects_category', '').strip() try: return SubjectCategory[sub_category_str] except KeyError: @@ -382,12 +382,13 @@ def process_use_restrictions(self, data_dec, datadec_dict): return use_restrictions def process_access_category(self, datadec_dict): - share_category_str = datadec_dict.get('access_category', '').strip() - try: - return ShareCategory[share_category_str] - except KeyError: - return None - + share_category_str = datadec_dict.get('access_category','') + if share_category_str: + try: + return ShareCategory[share_category_str] + except KeyError: + return None + return None def process_constent_status(self, datadec_dict): if 'consent_status' in datadec_dict: consent_status_str = datadec_dict.get('consent_status', '').strip() diff --git a/core/tests/data/ELX_LU_SUB-1.json b/core/tests/data/ELX_LU_SUB-1.json index a1ab39a6..43f3e8fd 100644 --- a/core/tests/data/ELX_LU_SUB-1.json +++ b/core/tests/data/ELX_LU_SUB-1.json @@ -70,7 +70,7 @@ "data_size_category": "m", "metadata_exists": true, "has_special_subjects": true, - "special_subject_notes": "Subjects minors. mothers and babies", + "special_subjects_description": "Subjects minors. mothers and babies", "consent_status": "homogeneous", "consent_notes": "Consent is consistent among all subjects", "de_identification": "pseudonymized", diff --git a/core/tests/data/datasets.json b/core/tests/data/datasets.json index 7b88c281..436fa05f 100644 --- a/core/tests/data/datasets.json +++ b/core/tests/data/datasets.json @@ -2,16 +2,17 @@ { "name": "ABCD data", "project": "ABCD", - "shares": [ + "transfers": [ { - "share_inst": "ELU_I_44", - "share_notes": "Melanie Silver" + "partner": "ELU_I_44", + "transfer_details": "Melanie Silver" } ], "contacts": [ { "first_name": "Igor", "last_name": "Teal", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" } @@ -36,9 +37,9 @@ "Methylation_array" ], "de_identification": "anonymization", - "subject_categories": "controls", + "subjects_category": "controls", "has_special_subjects": true, - "special_subject_notes": "2 year old children", + "special_subjects_description": "2 year old children", "source_notes": "Data is from collaborator.", "use_restrictions": [ { @@ -61,6 +62,7 @@ { "first_name": "Joanne", "last_name": "Swift", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" } @@ -84,7 +86,7 @@ "Whole_genome_sequencing" ], "de_identification": "pseudonymization", - "subject_categories": "cases_and_controls", + "subjects_category": "cases_and_controls", "has_special_subjects": false, "source_collaboration": { "collab_inst": "ELU_I_94", @@ -113,19 +115,20 @@ { "first_name": "Rob", "last_name": "Blue", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" } ], - "shares": [ + "transfers": [ { - "share_inst": "ELU_I_79", - "share_notes": "Feng Xiao" + "partner": "ELU_I_79", + "transfer_details": "Feng Xiao" }, { - "share_inst": "ELU_I_80", - "share_notes": "Max Ginger" + "partner": "ELU_I_80", + "transfer_details": "Max Ginger" } ], "storages": [ @@ -162,6 +165,7 @@ { "first_name": "Paul", "last_name": "Mauve", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" }, { @@ -184,7 +188,7 @@ "Other_Phenotype_data" ], "de_identification": "pseudonymization", - "subject_categories": "cases_and_controls", + "subjects_category": "cases_and_controls", "has_special_subjects": false, "access_category": "controlled_access", "use_restrictions": [ @@ -199,11 +203,12 @@ { "name": "PD data", "project": "PD_Project", - "shares": [], + "transfers": [], "contacts": [ { "first_name": "Ali", "last_name": "Gator", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" } @@ -299,7 +304,7 @@ ], "de_identification": "pseudonymization", "ombudsman": "Biobank, Principle Investigator", - "subject_categories": "cases_and_controls", + "subjects_category": "cases_and_controls", "has_special_subjects": false, "consent_status": "homogeneous", "use_restrictions": [ @@ -324,7 +329,7 @@ "Samples" ], "de_identification": "pseudonymization", - "subject_categories": "cases_and_controls", + "subjects_category": "cases_and_controls", "has_special_subjects": false, "source_collaboration": { "collab_inst": "ELU_I_8", diff --git a/core/tests/data/projects.json b/core/tests/data/projects.json index 0f5edaa9..d9f44218 100644 --- a/core/tests/data/projects.json +++ b/core/tests/data/projects.json @@ -9,18 +9,21 @@ { "first_name": "Joanne", "last_name": "Swift", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" }, { "first_name": "Rebecca", "last_name": "Kafe", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" }, { "first_name": "Embury", "last_name": "Bask", + "email": "user@uni.edu", "role": "Researcher", "institution": "ELU_I_77" } @@ -47,24 +50,28 @@ { "first_name": "Colman", "last_name": "Level", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" }, { "first_name": "Colman", "last_name": "Level", + "email": "user@uni.edu", "role": "Researcher", "institution": "ELU_I_77" }, { "first_name": "Nic", "last_name": "Purple", + "email": "user@uni.edu", "role": "Researcher", "institution": "ELU_I_77" }, { "first_name": "James", "last_name": "BK", + "email": "user@uni.edu", "role": "Researcher", "institution": "ELU_I_77" } diff --git a/data/demo/datasets.json b/data/demo/datasets.json index d810603d..8e83749c 100644 --- a/data/demo/datasets.json +++ b/data/demo/datasets.json @@ -9,17 +9,33 @@ "data_types": [ "RNASeq", "Samples", - "ChIP-seq" + "Whole_genome_sequencing" ], - "access_category": null, + "data_type_notes": "Chip-seq time series data", + "access_category": "controlled_access", "subjects_category": "controls", "de_identification": "pseudonymization", "consent_status": "homogeneous", - "has_special_subjects": null, "special_subjects_description": "", "embargo_date": null, "storage_end_date": "2021-04-05", - "storage_duration_criteria": "The project PI will assess whether data should be retained for further research at the storage end date." + "storage_duration_criteria": "The project PI will assess whether data should be retained for further research at the storage end date.", + "has_special_subjects": false, + "consent_status": "homogeneous", + "use_restrictions": [ + { + "ga4gh_code": "COL-[XX]", + "note": "Data is shared as part of the LUX-Epigen programme, a collaboration with CHL, IBBL, LCSB and LSRU." + }, + { + "ga4gh_code": "DS-[XX](CC)", + "note": "Data is consented for cancer research studies only." + }, + { + "ga4gh_code": "GS-[XX]", + "note": "Data is consented for sharing outside EU/EEA region." + } + ] } ], "storages": [ From dab17d7377a833cfcd016513947456ef7e22ac83 Mon Sep 17 00:00:00 2001 From: Pinar Alper Date: Thu, 7 May 2020 00:20:41 +0200 Subject: [PATCH 3/4] data importer simplified demo data changed to DM-DS workshop scenario --- core/importer/base_importer.py | 71 ++++ core/importer/datadecs_importer.py | 209 ----------- core/importer/datasets_importer.py | 332 +++++++++++++----- core/importer/elx_submission_importer.py | 201 ++++++----- core/importer/projects_importer.py | 96 +---- core/management/commands/import_datasets.py | 15 +- core/management/commands/load_demo_data.py | 13 +- core/models/project.py | 10 +- core/tests/data/ELX_LU_SUB-1.json | 30 +- core/tests/data/datadecs.json | 136 ------- core/tests/data/datasets.json | 262 +++++++++++--- core/tests/data/projects.json | 10 +- core/tests/importer/test_bioportal_client.py | 28 -- core/tests/importer/test_datadecs_importer.py | 51 --- core/tests/importer/test_datasets_importer.py | 11 +- core/tests/importer/test_export.py | 2 +- .../importer/test_import_elx_submission.py | 6 +- data/demo/datadecs.json | 44 --- data/demo/datasets.json | 83 ++--- data/demo/projects.json | 65 +++- elixir_daisy/settings.py | 2 +- web/templates/about.html | 17 +- web/views/about.py | 8 + 23 files changed, 809 insertions(+), 893 deletions(-) create mode 100644 core/importer/base_importer.py delete mode 100644 core/importer/datadecs_importer.py delete mode 100644 core/tests/data/datadecs.json delete mode 100644 core/tests/importer/test_bioportal_client.py delete mode 100644 core/tests/importer/test_datadecs_importer.py delete mode 100644 data/demo/datadecs.json diff --git a/core/importer/base_importer.py b/core/importer/base_importer.py new file mode 100644 index 00000000..58903b2a --- /dev/null +++ b/core/importer/base_importer.py @@ -0,0 +1,71 @@ + +from core.models import Partner, Contact, ContactType +from core.models import User +from core.utils import DaisyLogger +from django.conf import settings +from core.constants import Groups as GroupConstants +from django.contrib.auth.models import Group + + +PRINCIPAL_INVESTIGATOR = 'Principal_Investigator' + +class BaseImporter: + + logger = DaisyLogger(__name__) + + def process_contacts(self, project_dict): + local_custodians = [] + local_personnel = [] + external_contacts = [] + + home_organisation = Partner.objects.get(acronym=settings.COMPANY) + + for contact_dict in project_dict.get('contacts', []): + first_name = contact_dict.get('first_name').strip() + last_name = contact_dict.get('last_name').strip() + email = contact_dict.get('email').strip() + full_name = "{} {}".format(first_name, last_name) + role_name = contact_dict.get('role') + if home_organisation.elu_accession == contact_dict.get('institution').strip(): + user = (User.objects.filter(first_name__icontains=first_name.lower(), + last_name__icontains=last_name.lower()) | User.objects.filter( + first_name__icontains=first_name.upper(), last_name__icontains=last_name.upper())).first() + if user is None: + self.logger.warning('no user found for %s an inactive user will be created', full_name) + + usr_name = first_name.lower() + '.' + last_name.lower() + user = User.objects.create(username=usr_name, password='', first_name=first_name, last_name=last_name, is_active=False, + email=email, + ) + user.staff = True + + if role_name == PRINCIPAL_INVESTIGATOR: + g = Group.objects.get(name=GroupConstants.VIP.value) + user.groups.add(g) + + user.save() + if role_name == PRINCIPAL_INVESTIGATOR: + local_custodians.append(user) + else: + local_personnel.append(user) + + else: + contact = (Contact.objects.filter(first_name__icontains=first_name.lower(), + last_name__icontains=last_name.lower()) | Contact.objects.filter( + first_name__icontains=first_name.upper(), last_name__icontains=last_name.upper())).first() + if contact is None: + contact_type_pi, _ = ContactType.objects.get_or_create(name=role_name) + contact, _ = Contact.objects.get_or_create( + first_name=first_name, + last_name=last_name, + email=email, + type=contact_type_pi + ) + affiliation = Partner.objects.get(elu_accession=contact_dict.get('institution')) + if affiliation: + contact.partners.add(affiliation) + contact.save() + external_contacts.append(contact) + + return local_custodians, local_personnel, external_contacts + diff --git a/core/importer/datadecs_importer.py b/core/importer/datadecs_importer.py deleted file mode 100644 index b3a55f83..00000000 --- a/core/importer/datadecs_importer.py +++ /dev/null @@ -1,209 +0,0 @@ -import json -import sys - -from core.exceptions import DatasetImportError -from core.models import Dataset, DataType, Partner, Project, Contract, ContactType, Contact, PartnerRole, \ - GDPRRole, UseRestriction -from core.models.data_declaration import SubjectCategory, DeidentificationMethod, DataDeclaration, ShareCategory, \ - ConsentStatus -from core.utils import DaisyLogger - -logger = DaisyLogger(__name__) - - -class DatadecsImporter: - - def import_json(self, json_string, stop_on_error=False, verbose=False): - logger.info('Import started for file') - result = True - all_dicts = json.loads(json_string) - for datadec_dict in all_dicts: - logger.debug(' * Importing data declaration : "{}"...'.format(datadec_dict.get('title', 'N/A'))) - try: - self.process_datadec(datadec_dict) - except Exception as e: - logger.error('Import failed') - logger.error(str(e)) - if verbose: - import traceback - ex = traceback.format_exception(*sys.exc_info()) - logger.error('\n'.join([e for e in ex])) - if stop_on_error: - raise e - result = False - logger.debug(" ... complete!") - logger.info('Import result for file: {}'.format('success' if result else 'fail')) - return result - - def process_datadec(self, datadec_dict, **kwargs): - try: - title = datadec_dict['title'] - except KeyError: - raise DatasetImportError(data='Data declaration title missing') - - if 'dataset_obj' not in kwargs: - try: - dataset_title = datadec_dict['dataset'] - dataset = Dataset.objects.get(title=dataset_title.strip()) - except KeyError: - raise DatasetImportError(data='Parent dataset info missing') - except Dataset.DoesNotExist: - raise DatasetImportError(data='Parent dataset not found in DB') - else: - dataset = kwargs.pop('dataset_obj') - try: - datadec = DataDeclaration.objects.get(title=title.strip(), dataset=dataset) - except DataDeclaration.DoesNotExist: - datadec = None - - if datadec: - logger.warning("Data declaration with title '{}' already found. It will be updated.".format(title)) - else: - datadec = DataDeclaration.objects.create(title=title, dataset=dataset) - - datadec.has_special_subjects = datadec_dict.get('has_special_subjects', False) - datadec.data_types_notes = datadec_dict.get('data_type_notes', None) - datadec.deidentification_method = self.process_deidentification_method(datadec_dict) - datadec.subjects_category = self.process_subjects_category(datadec_dict) - datadec.special_subjects_description = datadec_dict.get('special_subject_notes', None) - datadec.other_external_id = datadec_dict.get('other_external_id', None) - datadec.share_category = self.process_access_category(datadec_dict) - datadec.consent_status = self.process_constent_status(datadec_dict) - datadec.comments = datadec_dict.get('source_notes', None) - - if 'data_types' in datadec_dict: - datadec.data_types_received.set(self.process_datatypes(datadec_dict)) - - if 'contract_obj' not in kwargs: - if 'source_collaboration' in datadec_dict: - datadec.contract = self.process_source_contract(dataset, datadec_dict) - else: - datadec.contract = kwargs.pop('contract_obj') - if datadec.contract: - datadec.partner = datadec.contract.partners.first() - self.process_use_restrictions(datadec, datadec_dict) - - - datadec.save() - - def process_datatypes(self, datadec_dict): - datatypes = [] - for datatype_str in datadec_dict.get('data_types', []): - datatype_str = datatype_str.strip() - # TODO Data types is a controlled vocabulaRY we should not create new when importing - datatype, _ = DataType.objects.get_or_create(name=datatype_str) - datatypes.append(datatype) - return datatypes - - def process_deidentification_method(self, datadec_dict): - deidentification_method_str = datadec_dict.get('de_identification', '') - try: - return DeidentificationMethod[deidentification_method_str] - except KeyError: - return DeidentificationMethod.pseudonymization - - def process_subjects_category(self, datadec_dict): - if 'subject_categories' in datadec_dict: - sub_category_str = datadec_dict.get('subject_categories', '').strip() - try: - return SubjectCategory[sub_category_str] - except KeyError: - return SubjectCategory.unknown - else: - return SubjectCategory.unknown - - def process_source_contract(self, dataset, datadec_dict): - - contract_dict = datadec_dict['source_collaboration'] - - try: - partner_elu = contract_dict['collab_inst'] - if partner_elu is None: - raise DatasetImportError(f'Partner accession number is NULL!') - partner = Partner.objects.get(elu_accession=partner_elu.strip()) - except KeyError: - raise DatasetImportError(f'Contract partner accession number is missing') - except Partner.DoesNotExist: - raise DatasetImportError(f'Cannot find institution partner with the elu: {repository}') - - if 'collab_project' not in contract_dict: - logger.debug( - ' * Contract project missing! Skipping contract setting for datadeclaration : "{}"...'.format( - datadec_dict.get('title', 'N/A'))) - return None - else: - # create contract project if it does not exist - try: - project = Project.objects.get(acronym=contract_dict['collab_project'].strip()) - except Project.DoesNotExist: - project = Project.objects.create( - acronym=contract_dict['collab_project'].strip() - ) - project.local_custodians.set(dataset.local_custodians.all()) - project.save() - try: - contract = Contract.objects.get( - partners_roles__partner=partner, - project=project) - except Contract.DoesNotExist: - if 'collab_role' in contract_dict: - role_str = contract_dict['collab_role'] - role = GDPRRole[role_str] - else: - role = GDPRRole["joint_controller"] - - contract = Contract.objects.create( - project=project, - ) - contract.company_roles.add(role) - contract.add_partner_with_role(partner=partner, role=role) - contract.local_custodians.set(project.local_custodians.all()) - - if 'collab_pi' in contract_dict: - contact_type_pi, _ = ContactType.objects.get_or_create(name="Principal_Investigator") - - contract_pi_str = contract_dict['collab_pi'] - contract_split = contract_pi_str.split() - - first_name = contract_split[0] - last_name = " ".join(contract_split[1:]) - contact, _ = Contact.objects.get_or_create( - first_name=first_name, - last_name=last_name, - type=contact_type_pi - ) - contact.partner = partner - contact.save() - partner_role = PartnerRole.objects.filter(contract=contract, partner=partner).first() - partner_role.contacts.add(contact) - partner_role.save() - - contract.save() - return contract - - def process_use_restrictions(self, data_dec, datadec_dict): - use_restrictions = [] - for user_restriction_dict in datadec_dict['use_restrictions']: - ga4gh_code = user_restriction_dict['ga4gh_code'] - notes = user_restriction_dict['note'] - - use_restriction = UseRestriction.objects.create(data_declaration=data_dec, restriction_class=ga4gh_code, notes=notes) - use_restrictions.append(use_restriction) - return use_restrictions - - def process_access_category(self, datadec_dict): - share_category_str = datadec_dict.get('access_category', '').strip() - try: - return ShareCategory[share_category_str] - except KeyError: - return None - - def process_constent_status(self, datadec_dict): - if 'consent_status' in datadec_dict: - consent_status_str = datadec_dict.get('consent_status', '').strip() - try: - return ConsentStatus[consent_status_str] - except KeyError: - return ConsentStatus.unknown - else: - return ConsentStatus.unknown diff --git a/core/importer/datasets_importer.py b/core/importer/datasets_importer.py index 987b4821..8f365837 100644 --- a/core/importer/datasets_importer.py +++ b/core/importer/datasets_importer.py @@ -2,18 +2,15 @@ import sys from core.exceptions import DatasetImportError -from core.models import Dataset, Project, StorageResource, User, Contract, Partner, GDPRRole +from core.importer.base_importer import BaseImporter +from core.models import Dataset, DataDeclaration, Project, StorageResource, Partner, \ + UseRestriction, DataType from core.models.access import Access +from core.models.data_declaration import ShareCategory, ConsentStatus, DeidentificationMethod, SubjectCategory from core.models.share import Share from core.models.storage_location import StorageLocationCategory, DataLocation -from core.utils import DaisyLogger -from core.constants import Groups as GroupConstants -from django.contrib.auth.models import Group -logger = DaisyLogger(__name__) - - -class DatasetsImporter: +class DatasetsImporter(BaseImporter): """ `DatasetsImporter`, parse json representation of a set of datasets and store them in the database """ @@ -22,30 +19,30 @@ class DateImportException(Exception): pass def import_json(self, json_string, stop_on_error=False, verbose=False): - logger.info('Import started for file') + self.logger.info('Import started for file') result = True dataset_list = json.loads(json_string) for dataset in dataset_list: - logger.debug(' * Importing dataset: "{}"...'.format(dataset.get('title', 'N/A'))) + self.logger.debug(' * Importing dataset: "{}"...'.format(dataset.get('name', 'N/A'))) try: self.process_dataset(dataset) except Exception as e: - logger.error('Import failed') - logger.error(str(e)) + self.logger.error('Import failed') + self.logger.error(str(e)) if verbose: import traceback ex = traceback.format_exception(*sys.exc_info()) - logger.error('\n'.join([e for e in ex])) + self.logger.error('\n'.join([e for e in ex])) if stop_on_error: raise e result = False - logger.info('... completed') - logger.info('Import result for file: {}'.format('success' if result else 'fail')) + self.logger.info('... completed') + self.logger.info('Import result for file: {}'.format('success' if result else 'fail')) return result def process_dataset(self, dataset_dict): try: - title = dataset_dict['title'] + title = dataset_dict['name'] except KeyError: raise DatasetImportError(data='dataset without title') @@ -57,7 +54,7 @@ def process_dataset(self, dataset_dict): dataset = None if dataset: - logger.warning("Dataset with title '{}' already found. It will be updated.".format(title)) + self.logger.warning("Dataset with title '{}' already found. It will be updated.".format(title)) else: dataset = Dataset.objects.create(title=title) @@ -66,9 +63,11 @@ def process_dataset(self, dataset_dict): dataset.sensitivity = dataset_dict.get('sensitivity', None) - local_custodians = self.process_local_custodians(dataset_dict) + local_custodians, local_personnel, external_contacts = self.process_contacts(dataset_dict) + if local_custodians: - dataset.local_custodians.set(local_custodians) + dataset.local_custodians.set(local_custodians, clear=True) + data_locations = self.process_data_locations(dataset, dataset_dict) if data_locations: @@ -88,34 +87,36 @@ def process_dataset(self, dataset_dict): for local_custodian in local_custodians: local_custodian.assign_permissions_to_dataset(dataset) - @staticmethod - def process_local_custodians(dataset_dict): - result = [] - - local_custodians = dataset_dict.get('local_custodian', []) - - for local_custodian in local_custodians: - custodian_str_strip = local_custodian.strip() - user = (User.objects.filter(full_name__icontains=custodian_str_strip.lower()) | User.objects.filter( - full_name__icontains=custodian_str_strip.upper())).first() - if user is None: - names = custodian_str_strip.split(maxsplit=1) - - if len(names) == 2: - logger.warning('no user found for %s and inactive user will be created', custodian_str_strip) - usr_name = names[0].strip().lower() + '.' + names[1].strip().lower() - user = User.objects.create(username=usr_name, password='', first_name=names[0], last_name=names[1],is_active=False, - email='inactive.user@uni.lu', - ) - user.staff = True - g = Group.objects.get(name=GroupConstants.VIP.value) - user.groups.add(g) - user.save() - result.append(user) - - else: - result.append(user) - return result + self.process_datadeclarations(dataset_dict, dataset) + + # @staticmethod + # def process_local_custodians(dataset_dict): + # result = [] + # + # local_custodians = dataset_dict.get('local_custodian', []) + # + # for local_custodian in local_custodians: + # custodian_str_strip = local_custodian.strip() + # user = (User.objects.filter(full_name__icontains=custodian_str_strip.lower()) | User.objects.filter( + # full_name__icontains=custodian_str_strip.upper())).first() + # if user is None: + # names = custodian_str_strip.split(maxsplit=1) + # + # if len(names) == 2: + # logger.warning('no user found for %s and inactive user will be created', custodian_str_strip) + # usr_name = names[0].strip().lower() + '.' + names[1].strip().lower() + # user = User.objects.create(username=usr_name, password='', first_name=names[0], last_name=names[1],is_active=False, + # email='inactive.user@uni.lu', + # ) + # user.staff = True + # g = Group.objects.get(name=GroupConstants.VIP.value) + # user.groups.add(g) + # user.save() + # result.append(user) + # + # else: + # result.append(user) + # return result def process_project(self, project_name): try: @@ -147,10 +148,10 @@ def process_data_locations(self, dataset, dataset_dict): 'sample-storage': 'sample-storage', 'other': 'other' } - if 'storage_locations' in dataset_dict: + if 'storages' in dataset_dict: - for storage_location_dict in dataset_dict['storage_locations']: - backend_name = storage_location_dict['storage_resource'].lower().strip() + for storage_location_dict in dataset_dict['storages']: + backend_name = storage_location_dict['platform'].lower().strip() backend_name = backend_mapping.get(backend_name, backend_name) if not backend_name: raise DatasetImportError(data=f'Not a proper backend name: "{backend_name}".') @@ -159,8 +160,7 @@ def process_data_locations(self, dataset, dataset_dict): except StorageResource.DoesNotExist: raise DatasetImportError(data=f'Cannot find StorageResource with slug: "{backend_name}".') category = self.process_category(storage_location_dict) - acl_policy_description = self.process_acl_info(storage_location_dict) - #DLCLazz = backend.get_location_class() + location_delimeted = '\n'.join(storage_location_dict['locations']) @@ -172,6 +172,7 @@ def process_data_locations(self, dataset, dataset_dict): ) master_locations = DataLocation.objects.filter(category=StorageLocationCategory.master, dataset=dataset) + acl_policy_description = self.process_acl_info(storage_location_dict) if acl_policy_description: acc = Access.objects.create( dataset=dataset, @@ -182,47 +183,33 @@ def process_data_locations(self, dataset, dataset_dict): data_locations.append(dl) return data_locations - def process_user_acl(self, storage_location_dict): - storage_acl_info = storage_location_dict.get("storage_acl_users", "") - storage_acl_info_list = storage_acl_info.split(',') - users_with_access = [] - for storage_acl_info_str in storage_acl_info_list: - # try to identify user - storage_acl_info_str = storage_acl_info_str.strip() - user = (User.objects.filter(full_name__icontains=storage_acl_info_str.lower()) | User.objects.filter( - full_name__icontains=storage_acl_info_str.upper())).first() - if user is None: - logger.warning('no user found for %s', storage_acl_info_str) - else: - users_with_access.append(user) - return users_with_access def process_shares(self, dataset_dict, dataset): - def process_share(share_object, dataset): + def process_share(share_dict, dataset): share = Share() - share.access_notes = share_object.get('share_notes') + share.access_notes = share_dict.get('share_notes') share.dataset = dataset - share_institution_elu = share_object.get('share_inst') + share_institution_elu = share_dict.get('share_inst') share_institution = Partner.objects.get(elu_accession=share_institution_elu.strip()) share.partner = share_institution - project = dataset.project - if share_institution and project: - contracts = project.contracts.all() - for contract in contracts: - for partner in contract.partners: - if share_institution_elu.strip() == partner.elu_accession: - share.contract = contract - break - if not share.contract: - contract = Contract.objects.create( - project=project, - ) - contract.company_roles.add(GDPRRole["joint_controller"]) - contract.add_partner_with_role(share_institution, GDPRRole["joint_controller"]) - contract.local_custodians.set(project.local_custodians.all()) - contract.save() - share.contract = contract + # project = dataset.project + # if share_institution and project: + # contracts = project.contracts.all() + # for contract in contracts: + # for partner in contract.partners: + # if share_institution_elu.strip() == partner.elu_accession: + # share.contract = contract + # break + # if not share.contract: + # contract = Contract.objects.create( + # project=project, + # ) + # contract.company_roles.add(GDPRRole["joint_controller"]) + # contract.add_partner_with_role(share_institution, GDPRRole["joint_controller"]) + # contract.local_custodians.set(project.local_custodians.all()) + # contract.save() + # share.contract = contract return share shares = dataset_dict.get('shares', []) @@ -236,8 +223,177 @@ def process_category(self, storage_location_dict): return StorageLocationCategory.not_specified def process_acl_info(self, storage_location_dict): - if 'storage_acl_info' in storage_location_dict: + if 'accesses' in storage_location_dict: + return "\n".join(storage_location_dict['accesses']) + else: + return None + + def process_datadeclarations(self, dataset_dict, dataset): + + datadec_dicts = dataset_dict.get('data_declarations', []) - return storage_location_dict['storage_acl_info'] + for ddec_dict in datadec_dicts: + self.process_datadeclaration(ddec_dict, dataset) + + def process_datadeclaration(self, datadec_dict, dataset): + try: + title = datadec_dict['title'] + except KeyError: + raise DatasetImportError(data='Data declaration title missing') + + try: + datadec = DataDeclaration.objects.get(title=title.strip(), dataset=dataset) + except DataDeclaration.DoesNotExist: + datadec = None + + if datadec: + self.logger.warning("Data declaration with title '{}' already found. It will be updated.".format(title)) + else: + datadec = DataDeclaration.objects.create(title=title, dataset=dataset) + + datadec.has_special_subjects = datadec_dict.get('has_special_subjects', False) + datadec.data_types_notes = datadec_dict.get('data_type_notes', None) + datadec.deidentification_method = self.process_deidentification_method(datadec_dict) + datadec.subjects_category = self.process_subjects_category(datadec_dict) + datadec.special_subjects_description = datadec_dict.get('special_subject_notes', None) + datadec.other_external_id = datadec_dict.get('other_external_id', None) + datadec.share_category = self.process_access_category(datadec_dict) + datadec.consent_status = self.process_constent_status(datadec_dict) + datadec.comments = datadec_dict.get('source_notes', None) + + if 'data_types' in datadec_dict: + datadec.data_types_received.set(self.process_datatypes(datadec_dict)) + + # if 'contract_obj' not in kwargs: + # if 'source_collaboration' in datadec_dict: + # datadec.contract = self.process_source_contract(dataset, datadec_dict) + # else: + # datadec.contract = kwargs.pop('contract_obj') + # if datadec.contract: + # datadec.partner = datadec.contract.partners.first() + self.process_use_restrictions(datadec, datadec_dict) + datadec.dataset = dataset + datadec.save() + + + def process_datatypes(self, datadec_dict): + datatypes = [] + for datatype_str in datadec_dict.get('data_types', []): + datatype_str = datatype_str.strip() + # TODO Data types is a controlled vocabulaRY we should not create new when importing + datatype, _ = DataType.objects.get_or_create(name=datatype_str) + datatypes.append(datatype) + return datatypes + + def process_deidentification_method(self, datadec_dict): + deidentification_method_str = datadec_dict.get('de_identification', '') + try: + return DeidentificationMethod[deidentification_method_str] + except KeyError: + return DeidentificationMethod.pseudonymization + + def process_subjects_category(self, datadec_dict): + if 'subject_categories' in datadec_dict: + sub_category_str = datadec_dict.get('subject_categories', '').strip() + try: + return SubjectCategory[sub_category_str] + except KeyError: + return SubjectCategory.unknown else: + return SubjectCategory.unknown + + # def process_source_contract(self, dataset, datadec_dict): + # + # contract_dict = datadec_dict['source_collaboration'] + # + # try: + # partner_elu = contract_dict['collab_inst'] + # if partner_elu is None: + # raise DatasetImportError(f'Partner accession number is NULL!') + # partner = Partner.objects.get(elu_accession=partner_elu.strip()) + # except KeyError: + # raise DatasetImportError(f'Contract partner accession number is missing') + # except Partner.DoesNotExist: + # raise DatasetImportError(f'Cannot find institution partner with the elu: {partner_elu}') + # + # if 'collab_project' not in contract_dict: + # logger.debug( + # ' * Contract project missing! Skipping contract setting for datadeclaration : "{}"...'.format( + # datadec_dict.get('title', 'N/A'))) + # return None + # else: + # # create contract project if it does not exist + # try: + # project = Project.objects.get(acronym=contract_dict['collab_project'].strip()) + # except Project.DoesNotExist: + # project = Project.objects.create( + # acronym=contract_dict['collab_project'].strip() + # ) + # project.local_custodians.set(dataset.local_custodians.all()) + # project.save() + # try: + # contract = Contract.objects.get( + # partners_roles__partner=partner, + # project=project) + # except Contract.DoesNotExist: + # if 'collab_role' in contract_dict: + # role_str = contract_dict['collab_role'] + # role = GDPRRole[role_str] + # else: + # role = GDPRRole["joint_controller"] + # + # contract = Contract.objects.create( + # project=project, + # ) + # contract.company_roles.add(role) + # contract.add_partner_with_role(partner=partner, role=role) + # contract.local_custodians.set(project.local_custodians.all()) + # + # if 'collab_pi' in contract_dict: + # contact_type_pi, _ = ContactType.objects.get_or_create(name="Principal_Investigator") + # + # contract_pi_str = contract_dict['collab_pi'] + # contract_split = contract_pi_str.split() + # + # first_name = contract_split[0] + # last_name = " ".join(contract_split[1:]) + # contact, _ = Contact.objects.get_or_create( + # first_name=first_name, + # last_name=last_name, + # type=contact_type_pi + # ) + # contact.partner = partner + # contact.save() + # partner_role = PartnerRole.objects.filter(contract=contract, partner=partner).first() + # partner_role.contacts.add(contact) + # partner_role.save() + # + # contract.save() + # return contract + + def process_use_restrictions(self, data_dec, datadec_dict): + use_restrictions = [] + for user_restriction_dict in datadec_dict['use_restrictions']: + ga4gh_code = user_restriction_dict['ga4gh_code'] + notes = user_restriction_dict['note'] + + use_restriction = UseRestriction.objects.create(data_declaration=data_dec, restriction_class=ga4gh_code, notes=notes) + use_restrictions.append(use_restriction) + return use_restrictions + + def process_access_category(self, datadec_dict): + share_category_str = datadec_dict.get('access_category', '').strip() + try: + return ShareCategory[share_category_str] + except KeyError: return None + + def process_constent_status(self, datadec_dict): + if 'consent_status' in datadec_dict: + consent_status_str = datadec_dict.get('consent_status', '').strip() + try: + return ConsentStatus[consent_status_str] + except KeyError: + return ConsentStatus.unknown + else: + return ConsentStatus.unknown diff --git a/core/importer/elx_submission_importer.py b/core/importer/elx_submission_importer.py index 5e74cf64..fbecf7f4 100644 --- a/core/importer/elx_submission_importer.py +++ b/core/importer/elx_submission_importer.py @@ -1,20 +1,20 @@ import json import sys -from django.utils.datetime_safe import datetime + from core.exceptions import DatasetImportError -from core.importer.datadecs_importer import DatadecsImporter -from core.importer.datasets_importer import DatasetsImporter -from core.models import Contact, Dataset, Partner, Project, ContactType, GDPRRole -from core.models.contract import Contract + +from core.models import Contact, Dataset, Project, ContactType + from core.utils import DaisyLogger +from .base_importer import BaseImporter from .projects_importer import ProjectsImporter logger = DaisyLogger(__name__) -class DishSubmissionImporter: +class DishSubmissionImporter(BaseImporter): """ `DishSubmissionImporter`, parse json export of the Data Submission System and create relevant Dataset, Collaboration, (external Project) and DataDeclaration records in DAISY @@ -30,24 +30,16 @@ def import_json(self, json_string, stop_on_error=False, verbose=False): try: logger.info('Import started') submission_dict = json.loads(json_string) - logger.debug(' * Importing Data Declaration: "{}"...'.format(submission_dict['title'])) + logger.debug(' * Importing Data Declaration: "{}"...'.format(submission_dict['name'])) if self.is_elixir_submission(submission_dict): project = Project.objects.filter(acronym=self.elixir_project_name).first() dataset = self.process_submission_as_dataset(submission_dict, project) - contract = self.process_submission_as_contract(submission_dict, project) - datadec_dicts = submission_dict.get('datadecs', []) - importer = DatadecsImporter() - for datadec_dict in datadec_dicts: - kwargs = {} - kwargs['dataset_obj'] = dataset - if contract: - kwargs['contract_obj'] = contract - importer.process_datadec(datadec_dict, **kwargs) + # contract = self.process_submission_as_contract(submission_dict, project) - for study_dict in submission_dict.get('studies', []): - study = self.process_study(study_dict) + # for study_dict in submission_dict.get('studies', []): + # study = self.process_study(study_dict) @@ -63,72 +55,72 @@ def import_json(self, json_string, stop_on_error=False, verbose=False): return False return True - def process_submission_as_contract(self, submission_dict, project): - try: - partner_accession = submission_dict['submitting_institution'] - except KeyError: - raise DatasetImportError(data='Submitting institute info missing. Aborting import!') - - try: - partner = Partner.objects.get(elu_accession=partner_accession) - except Partner.DoesNotExist: - raise DatasetImportError( - data='Partner institute with accession {} not found in DB. Aborting import.'.format(partner_accession)) - - if self.is_elixir_submission(submission_dict): - try: - contract = Contract.objects.get(project=project, partners_roles__partner=partner) - except Contract.DoesNotExist: - contract = Contract.objects.create( - project=project, - ) - contract.company_roles.add(GDPRRole["joint_controller"]) - contract.add_partner_with_role(partner, GDPRRole["joint_controller"]) - contract.local_custodians.set(project.local_custodians.all()) - contract.save() - return contract - - def process_study(self, study_dict): - try: - title = study_dict['title'] - except KeyError: - raise DatasetImportError(data='study without title') - - description = study_dict.get('description', None) - ethics_approval_exists = study_dict.get('ethics_approval_exists', False) - ethics_notes = "The submitter confirms that an ethics approval exists for the data collection, sharing and \ - the purposes for which the data is shared." if ethics_approval_exists else None - - existing_project = Project.objects.filter(title=title).first() - if existing_project is not None: - timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - logger.warning( - "Project with title '{}' already found. It will be imported again with timestamp {}.".format(title, - timestamp)) - title = title + timestamp - - project = Project.objects.create(title=title, - description=description, - has_cner=ethics_approval_exists, - cner_notes=ethics_notes - ) - contacts = self.process_external_contacts(study_dict.get('contacts', [])) - - if contacts: - project.contacts.set(contacts) - project.save() - - # study_types = self.process_studytypes(study_dict) - # if study_types: - # project.study_types.set(study_types) - # project.save() - - return project - - @staticmethod - def process_role(role_string): - role, _ = ContactType.objects.get_or_create(name=role_string.strip()) - return role + # def process_submission_as_contract(self, submission_dict, project): + # try: + # partner_accession = submission_dict['submitting_institution'] + # except KeyError: + # raise DatasetImportError(data='Submitting institute info missing. Aborting import!') + # + # try: + # partner = Partner.objects.get(elu_accession=partner_accession) + # except Partner.DoesNotExist: + # raise DatasetImportError( + # data='Partner institute with accession {} not found in DB. Aborting import.'.format(partner_accession)) + # + # if self.is_elixir_submission(submission_dict): + # try: + # contract = Contract.objects.get(project=project, partners_roles__partner=partner) + # except Contract.DoesNotExist: + # contract = Contract.objects.create( + # project=project, + # ) + # contract.company_roles.add(GDPRRole["joint_controller"]) + # contract.add_partner_with_role(partner, GDPRRole["joint_controller"]) + # contract.local_custodians.set(project.local_custodians.all()) + # contract.save() + # return contract + + # def process_study(self, study_dict): + # try: + # title = study_dict['title'] + # except KeyError: + # raise DatasetImportError(data='study without title') + # + # description = study_dict.get('description', None) + # ethics_approval_exists = study_dict.get('ethics_approval_exists', False) + # ethics_notes = "The submitter confirms that an ethics approval exists for the data collection, sharing and \ + # the purposes for which the data is shared." if ethics_approval_exists else None + # + # existing_project = Project.objects.filter(title=title).first() + # if existing_project is not None: + # timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + # logger.warning( + # "Project with title '{}' already found. It will be imported again with timestamp {}.".format(title, + # timestamp)) + # title = title + timestamp + # + # project = Project.objects.create(title=title, + # description=description, + # has_cner=ethics_approval_exists, + # cner_notes=ethics_notes + # ) + # contacts = self.process_external_contacts(study_dict.get('contacts', [])) + # + # if contacts: + # project.contacts.set(contacts) + # project.save() + # + # # study_types = self.process_studytypes(study_dict) + # # if study_types: + # # project.study_types.set(study_types) + # # project.save() + # + # return project + + # @staticmethod + # def process_role(role_string): + # role, _ = ContactType.objects.get_or_create(name=role_string.strip()) + # return role # def process_studytypes(self, study_dict): # studytypes = [] @@ -138,22 +130,22 @@ def process_role(role_string): # studytypes.append(studytype) # return studytypes - def process_external_contacts(self, contact_dicts): - contacts = [] - for contact_dict in contact_dicts: - if 'role' in contact_dict: - role = self.process_role(contact_dict.get('role')) - - partner = ProjectsImporter.process_partner(contact_dict.get('institution')) - contact, _ = Contact.objects.get_or_create(first_name=contact_dict.get('first_name').strip(), - last_name=contact_dict.get('last_name').strip(), - email=contact_dict.get('email').strip(), - type=role) - contact.partners.set([partner]) - contact.save() - contacts.append(contact) - - return contacts + # def process_external_contacts(self, contact_dicts): + # contacts = [] + # for contact_dict in contact_dicts: + # if 'role' in contact_dict: + # role = self.process_role(contact_dict.get('role')) + # + # partner = ProjectsImporter.process_partner(contact_dict.get('institution')) + # contact, _ = Contact.objects.get_or_create(first_name=contact_dict.get('first_name').strip(), + # last_name=contact_dict.get('last_name').strip(), + # email=contact_dict.get('email').strip(), + # type=role) + # contact.partners.set([partner]) + # contact.save() + # contacts.append(contact) + # + # return contacts def is_elixir_submission(self, submission_dict): return submission_dict['scope'] == 'e' @@ -175,14 +167,17 @@ def process_submission_as_dataset(self, submission_dict, project): dataset.project = project created_on_str = submission_dict['created_on'] - title = submission_dict['title'] + title = submission_dict['name'] scope_str = 'Elixir' if submission_dict['scope'] == 'e' else 'LCSB Collaboration' local_project_str = submission_dict.get('local_project', '') dataset.comments = "ELU Accession: {}\nTitle: {}\nCreated On: {}\nScope: {}\nSubmitted to Project: {}".format( elu_accession, title, created_on_str, scope_str, local_project_str) - local_custodians = DatasetsImporter.process_local_custodians(submission_dict) + local_custodians, local_personnel, external_contacts = self.process_contacts(submission_dict) + if local_custodians: - dataset.local_custodians.set(local_custodians) + dataset.local_custodians.set(local_custodians, clear=True) + dataset.save() + return dataset diff --git a/core/importer/projects_importer.py b/core/importer/projects_importer.py index 00fb1f0b..dffe9540 100644 --- a/core/importer/projects_importer.py +++ b/core/importer/projects_importer.py @@ -1,23 +1,11 @@ -import logging import re from datetime import datetime from json import loads -from core.models import Partner, Project, Publication, Contact, ContactType -from core.models import User +from core.importer.base_importer import BaseImporter +from core.models import Partner, Project, Publication - -from django.conf import settings - -PRINCIPAL_INVESTIGATOR = 'Principal_Investigator' - -logger = logging.getLogger(__name__) - - -from core.constants import Groups as GroupConstants -from django.contrib.auth.models import Group - -class ProjectsImporter: +class ProjectsImporter(BaseImporter): """ `ProjectsImporter`, should be able to fill the database with projects' information, based on JSON file complying to the schema in: @@ -36,17 +24,17 @@ class DateImportException(Exception): def import_json(self, json_string, stop_on_error=False): try: - logger.info('Import started"') + self.logger.info('Import started"') all_information = loads(json_string) - logger.debug('Import started"') + self.logger.debug('Import started"') for project in all_information: - logger.debug(' * Importing project: "{}"...'.format(project.get('acronym', "N/A"))) + self.logger.debug(' * Importing project: "{}"...'.format(project.get('acronym', "N/A"))) self.process_project(project) - logger.debug(" ... success!") - logger.info('Import succeeded"') + self.logger.debug(" ... success!") + self.logger.info('Import succeeded"') except Exception as e: - logger.error('Import failed"') - logger.error(str(e)) + self.logger.error('Import failed"') + self.logger.error(str(e)) if stop_on_error: raise e @@ -56,7 +44,7 @@ def process_project(self, project_dict): for publication_dict in project_dict.get('publications', [])] - title = project_dict.get('title', "N/A") + title = project_dict.get('name', "N/A") description = project_dict.get('description', None) has_cner = project_dict.get('has_national_ethics_approval', False) has_erp = project_dict.get('has_institutional_ethics_approval', False) @@ -74,7 +62,7 @@ def process_project(self, project_dict): erp_notes=erp_notes ) else: - logger.warning("Project with acronym '{}' already found. It will be updated.".format(acronym)) + self.logger.warning("Project with acronym '{}' already found. It will be updated.".format(acronym)) project.title = title project.description = description project.has_cner = has_cner @@ -89,7 +77,7 @@ def process_project(self, project_dict): message = "\tCouldn't import the 'start_date'. Does it follow the '%Y-%m-%d' format?\n\t" message = message + 'Was: "{}". '.format(project_dict.get('start_date')) message = message + "Continuing with empty value." - logger.warning(message) + self.logger.warning(message) try: if 'end_date' in project_dict and len(project_dict.get('end_date')) > 0: @@ -98,7 +86,7 @@ def process_project(self, project_dict): message = "\tCouldn't import the 'end_date'. Does it follow the '%Y-%m-%d' format?\n\t" message = message + 'Was: "{}". '.format(project_dict.get('end_date')) message = message + "Continuing with empty value." - logger.warning(message) + self.logger.warning(message) project.save() @@ -110,6 +98,9 @@ def process_project(self, project_dict): if local_custodians: project.local_custodians.set(local_custodians, clear=True) + if external_contacts: + project.contacts.set(external_contacts, clear=True) + for publication in publications: project.publications.add(publication) @@ -119,57 +110,6 @@ def process_project(self, project_dict): local_custodian.assign_permissions_to_dataset(project) - def process_contacts(self, project_dict): - local_custodians = [] - local_personnel = [] - external_contacts = [] - - home_organisation = Partner.objects.get(acronym=settings.COMPANY) - - for contact_dict in project_dict.get('contacts', []): - first_name = contact_dict.get('first_name').strip() - last_name = contact_dict.get('last_name').strip() - full_name = "{} {}".format(first_name, last_name) - role_name = contact_dict.get('role') - if home_organisation.elu_accession == contact_dict.get('institution').strip(): - user = (User.objects.filter(first_name__icontains=first_name.lower(), - last_name__icontains=last_name.lower()) | User.objects.filter( - first_name__icontains=first_name.upper(), last_name__icontains=last_name.upper())).first() - if user is None: - logger.warning('no user found for %s an inactive user will be created', full_name) - - usr_name = first_name.lower() + '.' + last_name.lower() - user = User.objects.create(username=usr_name, password='', first_name=first_name, last_name=last_name, is_active=False, - email='inactive.user@uni.lu', - ) - user.staff = True - - if role_name == PRINCIPAL_INVESTIGATOR: - g = Group.objects.get(name=GroupConstants.VIP.value) - user.groups.add(g) - - user.save() - if role_name == PRINCIPAL_INVESTIGATOR: - local_custodians.append(user) - else: - local_personnel.append(user) - - else: - contact = (Contact.objects.filter(first_name__icontains=first_name.lower(), - last_name__icontains=last_name.lower()) | Contact.objects.filter( - first_name__icontains=first_name.upper(), last_name__icontains=last_name.upper())).first() - if contact is None: - contact = Contact.objects.create(first_name=first_name, last_name=last_name ) - contact.type = ContactType.objects.get_or_create(name=role_name) - affiliation = Partner.objects.get(elu_accession=contact_dict.get('institution')) - if affiliation: - contact.partners.add(affiliation) - contact.save() - external_contacts.append(contact) - - return local_custodians, local_personnel, external_contacts - - @staticmethod @@ -181,7 +121,7 @@ def process_partner(partner_string): @staticmethod def process_publication(publication_dict): - publication = Publication.objects.create(citation=publication_dict.get('citation_string')) + publication = Publication.objects.create(citation=publication_dict.get('citation')) if 'doi' in publication_dict: publication.doi = publication_dict.get('doi') publication.save() diff --git a/core/management/commands/import_datasets.py b/core/management/commands/import_datasets.py index c36467e6..c0bbe686 100644 --- a/core/management/commands/import_datasets.py +++ b/core/management/commands/import_datasets.py @@ -2,7 +2,7 @@ from django.core.management import BaseCommand, CommandError -from core.importer.datadecs_importer import DatadecsImporter + from core.importer.datasets_importer import DatasetsImporter JSON_SUFFIX = '.json' @@ -29,20 +29,13 @@ def handle(self, *args, **options): path_to_json_directory = options.get('d') verbose = options.get('verbose') exxit = options.get('exit') - importer = {"dataset": DatasetsImporter(), "datadec": DatadecsImporter()} + importer = DatasetsImporter() # We import all dataset files first for json_file_path in os.listdir(path_to_json_directory): - if json_file_path.startswith("dataset") and json_file_path.endswith(JSON_SUFFIX): - self.import_file(importer['dataset'], os.path.join(path_to_json_directory, json_file_path), verbose, + if json_file_path.endswith(JSON_SUFFIX): + self.import_file(importer, os.path.join(path_to_json_directory, json_file_path), verbose, exxit) - - # Then we import all datadec files - for json_file_path in os.listdir(path_to_json_directory): - if json_file_path.startswith("datadec") and json_file_path.endswith(JSON_SUFFIX): - self.import_file(importer['datadec'], os.path.join(path_to_json_directory, json_file_path), verbose, - exxit) - except Exception as e: self.stderr.write( self.style.ERROR("Something went wrong during the import! Is the path valid? Is the file valid?")) diff --git a/core/management/commands/load_demo_data.py b/core/management/commands/load_demo_data.py index b0de9b1a..6f2f5a94 100644 --- a/core/management/commands/load_demo_data.py +++ b/core/management/commands/load_demo_data.py @@ -2,7 +2,7 @@ from django.conf import settings import os -from core.importer.datadecs_importer import DatadecsImporter + from core.importer.datasets_importer import DatasetsImporter from core.importer.projects_importer import ProjectsImporter from core.models import User @@ -28,15 +28,6 @@ def handle(self, *args, **options): importer = DatasetsImporter() importer.import_json(json_file_contents) self.stdout.write(self.style.SUCCESS("Dataset import successful!")) - datadecs_json = os.path.join(DEMO_DATA_DIR, 'datadecs.json') - with open(datadecs_json, encoding='utf-8') as json_file: - json_file_contents = json_file.read() - importer = DatadecsImporter() - importer.import_json(json_file_contents) - self.stdout.write(self.style.SUCCESS("Data declaration import successful!")) - - - admin_usr = User.objects.create_user(username='admin', password='', email='demo.admin@uni.lu') admin_usr.is_superuser =True admin_usr.save() @@ -49,8 +40,6 @@ def handle(self, *args, **options): user.set_password('demo') user.save() - - except Exception as e: self.stderr.write( self.style.ERROR("Something went wrong during the import! Is the path valid? Is the file valid?")) diff --git a/core/models/project.py b/core/models/project.py index df68a1a3..6fad164b 100644 --- a/core/models/project.py +++ b/core/models/project.py @@ -180,12 +180,18 @@ def to_dict(self): "role": "Principal_Investigator" if lc.is_part_of(constants.Groups.VIP.name) else "Researcher", "affiliations": [HomeOrganisation().name]}) + pub_dicts = [] + for pub in self.publications.all(): + pub_dicts.append( + {"citation": pub.citation if pub.citation else None, + "doi": pub.doi if pub.doi else None}) + base_dict = { "source": settings.SERVER_URL, "id_at_source": self.id.__str__(), - "name": self.acronym, + "acronym": self.acronym, "elu_accession": self.elu_accession if self.elu_accession else None, - "title": self.title if self.title else None, + "name": self.title if self.title else None, "description": self.description if self.description else None, "has_institutional_ethics_approval": self.has_erp, "has_national_ethics_approval": self.has_cner, diff --git a/core/tests/data/ELX_LU_SUB-1.json b/core/tests/data/ELX_LU_SUB-1.json index fbef30e3..a1ab39a6 100644 --- a/core/tests/data/ELX_LU_SUB-1.json +++ b/core/tests/data/ELX_LU_SUB-1.json @@ -1,12 +1,28 @@ { "elu_accession": "ELX_LU_SUB-1", - "title": "Test Submission to be exported.", + "name": "Test Submission to be exported.", "submitting_institution": "ELU_I_5", "created_on": "2018-10-15", "scope": "e", - "local_custodian": [ - "Elgin Gray", - "Rob Blue" + "contacts": [ + { + "first_name": "Elgin", + "last_name": "Gray", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + }, + { + "first_name": "Rob", + "last_name": "Blue", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + }, + { + "first_name": "Embury", + "last_name": "Bask", + "role": "Researcher", + "institution": "ELU_I_77" + } ], "local_project": "Submitting to NCER PD Diagnosis project", "data_providers": [ @@ -41,9 +57,9 @@ ] } ], - "datadecs": [ + "data_declarations": [ { - "title": "Test datadec 1", + "name": "Test datadec 1", "source_study": "Test Study ABC", "legal_basis_data_collection": "Consent", "legal_basis_data_sharing": "Consent", @@ -67,7 +83,7 @@ ] }, { - "title": "Test datadec 2", + "name": "Test datadec 2", "source_study": "Test Study ABC", "legal_basis_data_collection": "Consent", "legal_basis_data_sharing": "Consent", diff --git a/core/tests/data/datadecs.json b/core/tests/data/datadecs.json deleted file mode 100644 index f54d0159..00000000 --- a/core/tests/data/datadecs.json +++ /dev/null @@ -1,136 +0,0 @@ -[ - { - "dataset": "ABCD data", - "title": "ABCD", - "data_type_notes": "..\n", - "data_types": [ - "Metabolomics", - "Clinical_data", - "Methylation_array" - ], - "de_identification": "anonymization", - "subject_categories": "controls", - "has_special_subjects": true, - "special_subject_notes": "2 year old children", - "source_notes": "Data is from collaborator.", - "use_restrictions": [ - { - "ga4gh_code": "PS", - "note": "Use is restricted to projects: ABCD" - }, - { - "ga4gh_code": "PUB", - "note": "Acknowledgement required." - } - ] - }, - { - "dataset": "Hypertension data", - "title": "Hypertension-ABC disease", - "data_types": [ - "Genotype_data", - "Whole_genome_sequencing" - ], - "de_identification": "pseudonymization", - "subject_categories": "cases_and_controls", - "has_special_subjects": false, - "source_collaboration": { - "collab_inst": "ELU_I_94", - "collab_pi": "Alberto Pico", - "collab_project": "Hypertension", - "collab_role": "joint_controller" - }, - "source_notes": "Data is from collaborator.", - "use_restrictions": [ - { - "ga4gh_code": "PS", - "note": "Use is restricted to projects: Hypertension" - }, - { - "ga4gh_code": "RS-[XX]", - "note": "Use is restricted to research areas: Hypertension-ABC disease" - } - ] - }, - { - "dataset": "PD data", - "title": "XYZ", - "source_notes": "Data is from own cohort.", - "data_types": [ - "Cell_Imaging", - "Clinical_data", - "Other_Phenotype_data", - "Samples" - ], - "de_identification": "pseudonymization", - "ombudsman": "Biobank, Principle Investigator", - "subject_categories": "cases_and_controls", - "has_special_subjects": false, - "consent_status": "homogeneous", - "use_restrictions": [ - { - "ga4gh_code": "PS", - "note": "Consent form restricts data use to projects XYZ" - }, - { - "ga4gh_code": "RS-[XX]", - "note": "Data is consented for research onParkinson's disease" - }, - { - "ga4gh_code": "GS-[XX]", - "note": "Data is consented for sharing outside institute (Within Luxembourg)" - } - ] - }, - { - "dataset": "REPO data", - "title": "ZZZZ", - "source_collaboration": { - "collab_inst": "ELU_I_84" - }, - "source_notes": "Data is obtained from repository.", - "data_types": [ - "Whole_genome_sequencing", - "Other_Phenotype_data" - ], - "de_identification": "pseudonymization", - "subject_categories": "cases_and_controls", - "has_special_subjects": false, - "access_category": "controlled_access", - "use_restrictions": [ - { - "ga4gh_code": "PUB", - "note": "Acknowledgement required." - } - ] - }, - { - "dataset": "PD data", - "title": "PD data (german cohort)", - "data_type_notes": "Small-scale protein, mRNA or mtDNA integrity data, cell and mitochondrial function data\n", - "data_types": [ - "Other", - "Samples" - ], - "de_identification": "pseudonymization", - "subject_categories": "cases_and_controls", - "has_special_subjects": false, - "source_collaboration": { - "collab_inst": "ELU_I_8", - "collab_pi": "Manuela Swift", - "collab_project": "PD_Project", - "collab_role": "joint_controller" - }, - "source_notes": "Data is from collaborator.", - "use_restrictions": [ - { - "ga4gh_code": "PUB", - "note": "Acknowledgement required." - }, - { - "ga4gh_code": "TS-[XX]", - "note": "Data is obtained for a limited duration. 2021-02-28" - } - ] - } -] diff --git a/core/tests/data/datasets.json b/core/tests/data/datasets.json index 5c9b3d28..7b88c281 100644 --- a/core/tests/data/datasets.json +++ b/core/tests/data/datasets.json @@ -1,9 +1,6 @@ [ { - "local_custodian": [ - "Igor Teal" - ], - "title": "ABCD data", + "name": "ABCD data", "project": "ABCD", "shares": [ { @@ -11,37 +8,116 @@ "share_notes": "Melanie Silver" } ], - "storage_locations": [ + "contacts": [ { - "storage_acl_users": "Igor Teal", - "storage_acl_notes":"Test description......", - "storage_resource": "Other", + "first_name": "Igor", + "last_name": "Teal", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + } + + ], + "storages": [ + { + "accesses": [ + "Igor Teal" + ], + "platform": "Other", "locations": [ "https://someserver.edu/Studies/Gambia", "https://someserver.edu/Studies/Gambia_additional"], "category": "master" } + ], + "data_declarations": [ { + "title": "ABCD", + "data_type_notes": "..\n", + "data_types": [ + "Metabolomics", + "Clinical_data", + "Methylation_array" + ], + "de_identification": "anonymization", + "subject_categories": "controls", + "has_special_subjects": true, + "special_subject_notes": "2 year old children", + "source_notes": "Data is from collaborator.", + "use_restrictions": [ + { + "ga4gh_code": "PS", + "note": "Use is restricted to projects: ABCD" + }, + { + "ga4gh_code": "PUB", + "note": "Acknowledgement required." + } + ] + } ] }, { - "local_custodian": [ - "Joanne Swift" - ], - "title": "Hypertension data", + + "name": "Hypertension data", "project": "Hypertension", - "storage_locations": [ + "contacts": [ + { + "first_name": "Joanne", + "last_name": "Swift", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + } + + ], + "storages": [ { - "storage_acl_users": "Elgin Gray, Paul Mauve", - "storage_resource": "application", + "accesses": [ + "Elgin Gray", "Paul Mauve" + ], + "platform": "application", "locations": ["RedCap sevrver at http://address"], "category": "master" } + ], + "data_declarations": [ + { + "title": "Hypertension-ABC disease", + "data_types": [ + "Genotype_data", + "Whole_genome_sequencing" + ], + "de_identification": "pseudonymization", + "subject_categories": "cases_and_controls", + "has_special_subjects": false, + "source_collaboration": { + "collab_inst": "ELU_I_94", + "collab_pi": "Alberto Pico", + "collab_project": "Hypertension", + "collab_role": "joint_controller" + }, + "source_notes": "Data is from collaborator.", + "use_restrictions": [ + { + "ga4gh_code": "PS", + "note": "Use is restricted to projects: Hypertension" + }, + { + "ga4gh_code": "RS-[XX]", + "note": "Use is restricted to research areas: Hypertension-ABC disease" + } + ] + } ] }, { - "local_custodian": [ - "Rob Blue" - ], - "title": "MDPDP data", + "name": "MDPDP data", "project": "MDPDP", + "contacts": [ + { + "first_name": "Rob", + "last_name": "Blue", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + } + + ], "shares": [ { "share_inst": "ELU_I_79", @@ -52,48 +128,89 @@ "share_notes": "Max Ginger" } ], - "storage_locations": [ + "storages": [ { - "storage_acl_users": "Paul Antony", - "storage_resource": "Other", + "accesses": [ + "Paul Antony" + ], + "platform": "Other", "locations": [ "\\\\someserver.edu\\LCSB_HCS\\OperaQEHS", "\\\\someserver.edu\\users\\PA\\PrecisionMedicine","\\\\someserver.edu\\Images\\groups"], "category": "master" }, { - "storage_resource": "sample-storage", + "platform": "sample-storage", "locations": ["Hospital ABCDFE"], "category": "master" } ] }, { - "title": "REPO data", - "local_custodian": [ - "Paul Mauve", - "Rene Sahoo" - ], - "storage_acl_info": "Paul Mauve, John Doe", - "storage_locations": [ + "name": "REPO data", + "storages": [ { - "storage_resource": "lcsb_group_server", + "accesses": [ + "Paul Mauve", "John Doe" + ], + "platform": "lcsb_group_server", "locations": [ "some_server_directory:/work/projects/wgs/cohort" ], "category": "master" } + ], + "contacts": [ + { + "first_name": "Paul", + "last_name": "Mauve", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + }, { + "first_name": "Rene", + "last_name": "Sahoo", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + } + + ], + "data_declarations": [ + { + "title": "ZZZZ", + "source_collaboration": { + "collab_inst": "ELU_I_84" + }, + "source_notes": "Data is obtained from repository.", + "data_types": [ + "Whole_genome_sequencing", + "Other_Phenotype_data" + ], + "de_identification": "pseudonymization", + "subject_categories": "cases_and_controls", + "has_special_subjects": false, + "access_category": "controlled_access", + "use_restrictions": [ + { + "ga4gh_code": "PUB", + "note": "Acknowledgement required." + } + ] + } ] }, { - "title": "PD data", - "local_custodian": [ - "Ali Gator" - ], + "name": "PD data", "project": "PD_Project", "shares": [], - "storage_acl_info": "All group members have access", - "storage_locations": [ + "contacts": [ + { + "first_name": "Ali", + "last_name": "Gator", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + } + ], + "storages": [ { - "storage_resource": "lcsb_group_server", + "platform": "lcsb_group_server", "locations": [ "\\\\some_server\\GROUP\\Projects\\SUB_FOLDER", "\\\\some_server\\GROUP\\General" @@ -101,14 +218,14 @@ "category": "master" }, { - "storage_resource": "Other", + "platform": "Other", "locations": [ "this server is backed-up by IT" ], "category": "backup" }, { - "storage_resource": "atlas_personal", + "platform": "atlas_personal", "locations": [ "\\\\some_server\\users\\first.user", "\\\\some_server\\users\\second.user ", @@ -122,7 +239,7 @@ "category": "copy" }, { - "storage_resource": "lcsb_laptop", + "platform": "lcsb_laptop", "locations": [ "first.user", "second.user", @@ -136,7 +253,7 @@ "category": "copy" }, { - "storage_resource": "External Storage (e.g. Hard disk, DVD)", + "platform": "External Storage (e.g. Hard disk, DVD)", "locations": [ "first.user", "second.user", @@ -150,7 +267,7 @@ "category": "copy" }, { - "storage_resource": "Owncloud", + "platform": "Owncloud", "locations": [ "first.user", "second.user", @@ -164,12 +281,69 @@ "category": "copy" }, { - "storage_resource": "sample-storage", + "platform": "sample-storage", "locations": [ "Hospital/Institute Floor X Cabinet Y" ], "category": "master" } + ], + "data_declarations": [{ + "title": "XYZ", + "source_notes": "Data is from own cohort.", + "data_types": [ + "Cell_Imaging", + "Clinical_data", + "Other_Phenotype_data", + "Samples" + ], + "de_identification": "pseudonymization", + "ombudsman": "Biobank, Principle Investigator", + "subject_categories": "cases_and_controls", + "has_special_subjects": false, + "consent_status": "homogeneous", + "use_restrictions": [ + { + "ga4gh_code": "PS", + "note": "Consent form restricts data use to projects XYZ" + }, + { + "ga4gh_code": "RS-[XX]", + "note": "Data is consented for research onParkinson's disease" + }, + { + "ga4gh_code": "GS-[XX]", + "note": "Data is consented for sharing outside institute (Within Luxembourg)" + } + ] + }, { + "title": "PD data (german cohort)", + "data_type_notes": "Small-scale protein, mRNA or mtDNA integrity data, cell and mitochondrial function data\n", + "data_types": [ + "Other", + "Samples" + ], + "de_identification": "pseudonymization", + "subject_categories": "cases_and_controls", + "has_special_subjects": false, + "source_collaboration": { + "collab_inst": "ELU_I_8", + "collab_pi": "Manuela Swift", + "collab_project": "PD_Project", + "collab_role": "joint_controller" + }, + "source_notes": "Data is from collaborator.", + "use_restrictions": [ + { + "ga4gh_code": "PUB", + "note": "Acknowledgement required." + }, + { + "ga4gh_code": "TS-[XX]", + "note": "Data is obtained for a limited duration. 2021-02-28" + } + ] + } ] } ] \ No newline at end of file diff --git a/core/tests/data/projects.json b/core/tests/data/projects.json index 1f1cfa0d..0f5edaa9 100644 --- a/core/tests/data/projects.json +++ b/core/tests/data/projects.json @@ -1,7 +1,7 @@ [ { "acronym": "In vitro disease modeling", - "title": "Parkinson's disease in vitro disease modeling - Focus on genes ........., .", + "name": "Parkinson's disease in vitro disease modeling - Focus on genes ........., .", "description": "Generation of disease specific iPSCs; ...............", "start_date": "2014-01-01", "end_date": "", @@ -31,16 +31,16 @@ "institutional_ethics_approval_notes": "test notes 123", "publications": [ { - "citation_string": "Paper 1 citation string...." + "citation": "Paper 1 citation string...." }, { - "citation_string": " Paper 2 citation string..." + "citation": " Paper 2 citation string..." } ] }, { "acronym": "CCCC deficiency", - "title": "CCCC deficiency", + "name": "CCCC deficiency", "description": "Analysis of CCCC function in human disease", "start_date": "2016-11-01", "contacts": [ @@ -75,7 +75,7 @@ "national_ethics_approval_notes": "We analyse samples ........", "publications": [ { - "citation_string": "CCCC deficiency: a novel method in ........ (in preparation)" + "citation": "CCCC deficiency: a novel method in ........ (in preparation)" } ] } diff --git a/core/tests/importer/test_bioportal_client.py b/core/tests/importer/test_bioportal_client.py deleted file mode 100644 index f48eddf0..00000000 --- a/core/tests/importer/test_bioportal_client.py +++ /dev/null @@ -1,28 +0,0 @@ -import urllib.request, urllib.error, urllib.parse -import json -import pytest - -REST_URL = "http://data.bioontology.org" -API_KEY = "" - - -def get_json(url): - opener = urllib.request.build_opener() - opener.addheaders = [('Authorization', 'apikey token=' + API_KEY)] - return json.loads(opener.open(url).read()) - -@pytest.mark.skip(reason="we currently use local ontologies. However in the future we may switch to bioportal search.") -@pytest.mark.webtest -def test_term_search(): - terms = [] - terms.append("lewy") - - - # Do a search for every term - search_results = [] - for term in terms: - search_results.append(get_json(REST_URL + "/search?q=" + term)["collection"]) - - # Print the results - for result in search_results: - print(result) \ No newline at end of file diff --git a/core/tests/importer/test_datadecs_importer.py b/core/tests/importer/test_datadecs_importer.py deleted file mode 100644 index 798931f4..00000000 --- a/core/tests/importer/test_datadecs_importer.py +++ /dev/null @@ -1,51 +0,0 @@ -import os - -import pytest - -from core.importer.datadecs_importer import DatadecsImporter -from core.importer.datasets_importer import DatasetsImporter -from core.models import Dataset, DataDeclaration -from test import factories - - -@pytest.mark.xfail -@pytest.mark.django_db -def test_dummy(celery_session_worker, storage_resources, can_defer_constraint_checks): - pass - - -@pytest.mark.django_db -def test_import_datadecs(celery_session_worker, contact_types, partners, gdpr_roles, storage_resources, can_defer_constraint_checks): - - VIP = factories.VIPGroup() - - factories.UserFactory.create(first_name='Igor', last_name='Teal', groups=[VIP]) - factories.UserFactory.create(first_name='Joanne', last_name='Swift', groups=[VIP]) - factories.UserFactory.create(first_name='Elgin', last_name='Gray', groups=[VIP]) - factories.UserFactory.create(first_name='Paul', last_name='Mauve', groups=[VIP]) - factories.UserFactory.create(first_name='Rene', last_name='Sahoo', groups=[VIP]) - factories.UserFactory.create(first_name='Rob', last_name='Blue', groups=[VIP]) - - dataset_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/datasets.json") - with open(dataset_file, "r") as f: - importer = DatasetsImporter() - importer.import_json(f.read(), True) - - datadec_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/datadecs.json") - with open(datadec_file, "r") as f: - importer = DatadecsImporter() - importer.import_json(f.read(), True) - - dsets = Dataset.objects.all() - assert 5 == dsets.count() - - ddecs = DataDeclaration.objects.all() - HyperData = ddecs[1] - assert 'Hypertension-ABC disease' == HyperData.title - contract = HyperData.contract - first_partner_role = contract.partners_roles.first() - assert first_partner_role.contacts.count() > 0 - assert "Alberto" == first_partner_role.contacts.first().first_name - assert "Pico" == first_partner_role.contacts.first().last_name - assert "Hypertension" == contract.project.acronym - assert "ELU_I_94" == first_partner_role.partner.elu_accession diff --git a/core/tests/importer/test_datasets_importer.py b/core/tests/importer/test_datasets_importer.py index f154adea..0958bf6e 100644 --- a/core/tests/importer/test_datasets_importer.py +++ b/core/tests/importer/test_datasets_importer.py @@ -3,10 +3,16 @@ import pytest from core.importer.datasets_importer import DatasetsImporter -from core.models import Dataset, Project +from core.models import Dataset, Project, DataDeclaration from test import factories +@pytest.mark.xfail +@pytest.mark.django_db +def test_dummy(celery_session_worker, storage_resources, can_defer_constraint_checks): + pass + + @pytest.mark.django_db def test_import_datasets(celery_session_worker, storage_resources, partners, gdpr_roles, can_defer_constraint_checks): VIP = factories.VIPGroup() @@ -44,3 +50,6 @@ def test_import_datasets(celery_session_worker, storage_resources, partners, gdp d4 = Dataset.objects.filter(title='PD data').first() assert ["Ali Gator"] == [employee.full_name for employee in d4.local_custodians.all()] assert 7 == d4.data_locations.all().count() + + ddecs = DataDeclaration.objects.all() + assert 5 == ddecs.count() diff --git a/core/tests/importer/test_export.py b/core/tests/importer/test_export.py index 8c24b034..debc0342 100644 --- a/core/tests/importer/test_export.py +++ b/core/tests/importer/test_export.py @@ -26,7 +26,7 @@ def test_export_projects(celery_session_worker, contact_types, partners, gdpr_ro project_dicts = dict['items'] assert 2 == len(project_dicts) - assert "Test_PRJ" == project_dicts[0]['name'] + assert "Title of test project." == project_dicts[0]['name'] assert 2 == len(project_dicts[0]['contacts']) #TODO add check of more fields and schema validation diff --git a/core/tests/importer/test_import_elx_submission.py b/core/tests/importer/test_import_elx_submission.py index 62d343fd..0aad6e6f 100644 --- a/core/tests/importer/test_import_elx_submission.py +++ b/core/tests/importer/test_import_elx_submission.py @@ -25,10 +25,12 @@ def test_import_submission(celery_session_worker, partners, gdpr_roles, can_defe importer = DishSubmissionImporter(elixir_project.title) importer.import_json(file_with_dataset.read(), True, True) assert 1 == Dataset.objects.all().count() - assert 2 == Project.objects.all().count() + # assert 2 == Project.objects.all().count() dataset = Dataset.objects.first() assert 'ELX_LU_SUB-1' == dataset.title - assert 2 == dataset.data_declarations.all().count() + # assert 2 == dataset.data_declarations.all().count() + # TODO finalise Submission importer once elixir-dcp i.e. DISH goes into production. + # Mapping from DISH to DAISY not yet complete... assert 'ELIXIR' == dataset.project.title assert 2 == dataset.local_custodians.all().count() assert ["Elgin Gray", "Rob Blue"] == [custodian.full_name for custodian in dataset.local_custodians.all()] diff --git a/data/demo/datadecs.json b/data/demo/datadecs.json deleted file mode 100644 index e2bde1b5..00000000 --- a/data/demo/datadecs.json +++ /dev/null @@ -1,44 +0,0 @@ -[ - { - "dataset": "LCSB Human Data Survey v-1", - "title": "Survey Responses", - "data_type_notes": "XLS survey files filled in by PIs", - "data_types": [ - "Other"], - "has_special_subjects": false, - "source_collaboration": { - "collab_inst": "ELU_I_77", - "collab_pi": "John Doe", - "collab_project": "DAISY-Dev", - "collab_role": "controller" - }, - "source_notes": "Data is collected from all LCSB PIs working with Human Data.", - "use_restrictions": [ - { - "ga4gh_code": "PS", - "note": "Use is restricted to projects: DAISY-Dev" - } - ] - }, - { - "dataset": "DAISY LCSB Deployment", - "title": "Survey Responses Imported", - "data_type_notes": "XLS survey files filled in by PIs are imported in DAISY", - "data_types": [ - "Other"], - "has_special_subjects": false, - "source_collaboration": { - "collab_inst": "ELU_I_77", - "collab_pi": "John Doe", - "collab_project": "DAISY-Dev", - "collab_role": "controller" - }, - "source_notes": "Data is collected from all LCSB PIs working with Human Data.", - "use_restrictions": [ - { - "ga4gh_code": "PS", - "note": "Use is restricted to projects: DAISY-Dev" - } - ] - } -] \ No newline at end of file diff --git a/data/demo/datasets.json b/data/demo/datasets.json index 7fc91f2d..d810603d 100644 --- a/data/demo/datasets.json +++ b/data/demo/datasets.json @@ -1,61 +1,54 @@ [ { - "title": "LCSB Human Data Survey v-1", - "local_custodian": [ - "Alice White", - "Jane Doe" - ], - "project": "DAISY-Dev", - "shares": [ - { - "share_inst": "ELU_I_96", - "share_notes": "Joe Bloggs" - } - ], - "storage_locations": [ + "project": "EPIC-DREM", + "name": "EPIC-DREM Sequencing data", + "description": null, + "data_declarations": [ { - "storage_resource": "atlas_project", - "locations": [ - "smb://atlas.uni.lux/Data_Protection/Survey/XLS" + "title": "ChIP-Seq, RNA-seq, ...", + "data_types": [ + "RNASeq", + "Samples", + "ChIP-seq" ], - "category": "master", - "storage_acl_info": "Alice White and Jane Doe has access to folder." + "access_category": null, + "subjects_category": "controls", + "de_identification": "pseudonymization", + "consent_status": "homogeneous", + "has_special_subjects": null, + "special_subjects_description": "", + "embargo_date": null, + "storage_end_date": "2021-04-05", + "storage_duration_criteria": "The project PI will assess whether data should be retained for further research at the storage end date." } - ] - }, - { - "title": "DAISY LCSB Deployment", - "local_custodian": [ - "Alice White", - "Jane Doe" ], - "project": "DAISY-Dev", - "storage_locations": [ + "storages": [ { - "storage_resource": "application", + "platform": "application", "locations": [ - "https://daisy.lcsb.uni.lu" + "https://webdav-r3lab.uni.lu/" ], "category": "master", - "storage_acl_info": "All LCSB staff with an active LDAP account can access." + "accesses": [ + "The data is available on the sequencing platform's webdav server." + ] } - ] - }, - { - "title": "DAISY Demo Deployment", - "local_custodian": [ - "Alice White", - "Jane Doe" ], - "project": "DAISY-Dev", - "storage_locations": [ + "transfers": [], + "contacts": [ { - "storage_resource": "application", - "locations": [ - "https://daisy-demo.lcsb.uni.lu" - ], - "category": "master", - "storage_acl_info": "This is a demo deployment for external parties. IP Restricted access." + "first_name": "Alice", + "last_name": "White", + "email": "alice.white@uni.lu", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + }, + { + "first_name": "John", + "last_name": "Black", + "email": "john.black@uni.lu", + "role": "Principal_Investigator", + "institution": "ELU_I_77" } ] } diff --git a/data/demo/projects.json b/data/demo/projects.json index 330b408c..c56031d2 100644 --- a/data/demo/projects.json +++ b/data/demo/projects.json @@ -1,40 +1,77 @@ + [ { - "acronym": "DAISY-Dev", - "title": "Data Information System DAISY Development.", - "description": "Development of software to record GDPR-relevant metadata of human data and biosamples used in research.", - "start_date": "2018-03-01", - "end_date": "2019-09-01", + "acronym": "EPIC-DREM", + "name": "Understanding the gene regulatory interactions underlying cell differentiation and identity", + "description": "\"Temporal data on gene expression and context-specific open chromatin states can improve identification of key transcription factors (TFs) and the gene regulatory networks (GRNs) controlling cellular differentiation. However, their integration remains challenging. Here, we delineate a general approach for data-driven and unbiased identification of key TFs and dynamic GRNs, called EPIC-DREM. We generated time-series transcriptomic and epigenomic profiles during differentiation of mouse multipotent bone marrow stromal cell line (ST2) toward adipocytes and osteoblasts. Using our novel approach we constructed time-resolved GRNs for both lineages and identifed the shared TFs involved in both differentiation processes. To take an alternative approach to prioritize the identified shared regulators, we mapped dynamic super-enhancers in both lineages and associated them to target genes with correlated expression profiles. The combination of the two approaches identified aryl hydrocarbon receptor (AHR) and Glis family zinc finger 1 (GLIS1) as mesenchymal key TFs controlled by dynamic cell type-specific super-enhancers that become repressed in both lineages. AHR and GLIS1 control differentiation-induced genes and their overexpression can inhibit the lineage commitment of the multipotent bone marrow-derived ST2 cells..\" \r\n\r\n...taken from D G\u00e9rard, et.al. Nucleic Acids Research, Volume 47, Issue 3, 20 February 2019, Pages 1141\u20131163, https://doi.org/10.1093/nar/gky1240", + "has_institutional_ethics_approval": true, + "has_national_ethics_approval": true, + "institutional_ethics_approval_notes": "The umbrella project LUX-Epigen has an institutional and a national ethics approval.", + "national_ethics_approval_notes": "The umbrella project LUX-Epigen has an institutional and a national ethics approval.", + "start_date": "2020-03-02", + "end_date": "2021-04-30", "contacts": [ { "first_name": "Alice", "last_name": "White", + "email": "alice.white@uni.lu", "role": "Principal_Investigator", "institution": "ELU_I_77" }, { "first_name": "John", - "last_name": "Doe", + "last_name": "Black", + "email": "john.black@uni.lu", "role": "Principal_Investigator", "institution": "ELU_I_77" }, { - "first_name": "Jane", - "last_name": "Doe", + "first_name": "Roy", + "last_name": "Blue", + "email": "roy.blue@uni.lu", "role": "Researcher", "institution": "ELU_I_77" } ], - "has_institutional_ethics_approval": false, - "has_national_ethics_approval": false, - "national_ethics_approval_notes": "Not needed as it is a software development project. No data collection.", "publications": [ { - "citation_string": "Provenance-enabled stewardship of human data in the GDPR era. Pinar Alper, Regina Becker, Venkata Satagopam, Christophe Trefois, Valentin Groues, Jacek Lebioda, Yohan Jarosz. 
ProvenanceWeek 2018 IPAW Proceedings, London, July 9-10 2018." + "citation": "Gérard D, Schmidt F, Ginolhac A, Schmitz M, Halder R, Ebert P, Schulz MH,\nSauter T, Sinkkonen L. Temporal enhancer profiling of parallel lineages\nidentifies AHR and GLIS1 as regulators of mesenchymal multipotency. Nucleic Acids\nRes. 2019 Feb 20;47(3):1141-1163. PubMed PMID:\n30544251; PubMed Central PMCID: PMC6380961.", + "doi": "doi: 10.1093/nar/gky1240" + } + ] + }, + { + "acronym": "LUX-Epigen", + "name": "LUX-Epigen Research Programme - Phase1", + "description": "This is an imaginary research project created to demonstrate features of DAISY. LUX-Epigen is created as an umbrella project, which holds an ethics approval and contract among the participant institutes. There can be several PhD and Postdoc sub-projects cover under the umbrella of Lux-Epigen.", + "has_institutional_ethics_approval": true, + "has_national_ethics_approval": true, + "institutional_ethics_approval_notes": "See attached Uni-LU ERP approval. This is a dummy document for the imaginary LUX-Epigen programme.", + "national_ethics_approval_notes": "See attached CNER approval. This is a dummy document for the imaginary LUX-Epigen programme.", + "start_date": "2017-04-03", + "end_date": "2022-04-25", + "contacts": [ + { + "first_name": "Alice", + "last_name": "White", + "email": "alice.white@uni.lu", + "role": "Principal_Investigator", + "institution": "ELU_I_77" }, { - "citation_string": "ELIXIR-Luxembourg: providing sustainability to clinical and translational medicine data for research. Venkata Satagopam, Pinar Alper, Regina Becker, Dietlind Gerloff, Wei Gu, Roland Krause, Jacek Lebioda, Noua Toukourou, Christophe Trefois, Reinhard Schneider. Poster presentations at Intelligent Systems for Molecular Biology 2018, ISMB, Chicago, July 6-10 2018." + "first_name": "John", + "last_name": "Black", + "email": "john.black@uni.lu", + "role": "Principal_Investigator", + "institution": "ELU_I_77" + }, + { + "first_name": "Igor", + "last_name": "Green", + "email": "igor.green@chl.lu", + "role": "Principal_Investigator", + "institution": "ELU_I_9" } ] } -] \ No newline at end of file +] diff --git a/elixir_daisy/settings.py b/elixir_daisy/settings.py index 6ad9f293..74c4f698 100644 --- a/elixir_daisy/settings.py +++ b/elixir_daisy/settings.py @@ -15,7 +15,7 @@ import pytz COMPANY = 'LCSB' # Used for generating some models' verbose names - +DEMO_MODE = False AUTH_USER_MODEL = 'core.User' diff --git a/web/templates/about.html b/web/templates/about.html index 755807e8..72674212 100644 --- a/web/templates/about.html +++ b/web/templates/about.html @@ -20,8 +20,8 @@

About

https://github.com/elixir-luxembourg/daisy/issues
Notes
- - {% comment %} + + {% if demo_mode %}
This is a demo deployment. It contains fictitious user accounts, projects and datasets to help you understand DAISY's information model.
Data in the demo deployment is cleaned periodically. Please refrain from recording information that @@ -40,23 +40,18 @@

About

VIP User (Principle Investigator) - jane.doe - demo - Standard User (Researcher) - - - john.doe + john.black demo VIP User (Principle Investigator) - admin + roy.blue demo - Super User (Application Admin) + Standard User (Researcher)
- {% endcomment %} + {% endif %} diff --git a/web/views/about.py b/web/views/about.py index 4d405eb5..e07bd4c5 100644 --- a/web/views/about.py +++ b/web/views/about.py @@ -1,8 +1,16 @@ from django.shortcuts import render from stronghold.decorators import public +from django.conf import settings @public def about(request): + + context = { + "app_version": pkg_resources.require("elixir-daisy")[0].version, + "demo_mode":settings.DEMO_MODE + + + } return render( request, 'about.html' From e5c34ee0ea7124cfa61fad3a4b0e86c454db71e3 Mon Sep 17 00:00:00 2001 From: Pinar Alper Date: Thu, 7 May 2020 01:35:23 +0200 Subject: [PATCH 4/4] removed discrepancies between elu json schema and import/export results --- core/importer/base_importer.py | 2 +- core/importer/datasets_importer.py | 33 ++++++++++++++-------------- core/tests/data/ELX_LU_SUB-1.json | 2 +- core/tests/data/datasets.json | 35 +++++++++++++++++------------- core/tests/data/projects.json | 7 ++++++ data/demo/datasets.json | 24 ++++++++++++++++---- 6 files changed, 66 insertions(+), 37 deletions(-) diff --git a/core/importer/base_importer.py b/core/importer/base_importer.py index 58903b2a..a5a19f27 100644 --- a/core/importer/base_importer.py +++ b/core/importer/base_importer.py @@ -23,7 +23,7 @@ def process_contacts(self, project_dict): for contact_dict in project_dict.get('contacts', []): first_name = contact_dict.get('first_name').strip() last_name = contact_dict.get('last_name').strip() - email = contact_dict.get('email').strip() + email = contact_dict.get('email','').strip() full_name = "{} {}".format(first_name, last_name) role_name = contact_dict.get('role') if home_organisation.elu_accession == contact_dict.get('institution').strip(): diff --git a/core/importer/datasets_importer.py b/core/importer/datasets_importer.py index 8f365837..58969ce1 100644 --- a/core/importer/datasets_importer.py +++ b/core/importer/datasets_importer.py @@ -79,7 +79,7 @@ def process_dataset(self, dataset_dict): # if 'storage_acl_notes' in storage_location_dict: # dl.access_notes = storage_location_dict['storage_acl_notes'] - shares = self.process_shares(dataset_dict, dataset) + shares = self.process_transfers(dataset_dict, dataset) if shares: dataset.shares.set(shares, bulk=False) @@ -184,13 +184,13 @@ def process_data_locations(self, dataset, dataset_dict): return data_locations - def process_shares(self, dataset_dict, dataset): + def process_transfers(self, dataset_dict, dataset): - def process_share(share_dict, dataset): + def process_transfer(share_dict, dataset): share = Share() - share.access_notes = share_dict.get('share_notes') + share.share_notes = share_dict.get('transfer_details') share.dataset = dataset - share_institution_elu = share_dict.get('share_inst') + share_institution_elu = share_dict.get('partner') share_institution = Partner.objects.get(elu_accession=share_institution_elu.strip()) share.partner = share_institution # project = dataset.project @@ -212,8 +212,8 @@ def process_share(share_dict, dataset): # share.contract = contract return share - shares = dataset_dict.get('shares', []) - return [process_share(share_object, dataset) for share_object in shares] + transfers = dataset_dict.get('transfers', []) + return [process_transfer(transfer_dict, dataset) for transfer_dict in transfers] def process_category(self, storage_location_dict): category_str = storage_location_dict.get('category', '').strip().lower() @@ -255,7 +255,7 @@ def process_datadeclaration(self, datadec_dict, dataset): datadec.data_types_notes = datadec_dict.get('data_type_notes', None) datadec.deidentification_method = self.process_deidentification_method(datadec_dict) datadec.subjects_category = self.process_subjects_category(datadec_dict) - datadec.special_subjects_description = datadec_dict.get('special_subject_notes', None) + datadec.special_subjects_description = datadec_dict.get('special_subjects_description', None) datadec.other_external_id = datadec_dict.get('other_external_id', None) datadec.share_category = self.process_access_category(datadec_dict) datadec.consent_status = self.process_constent_status(datadec_dict) @@ -293,8 +293,8 @@ def process_deidentification_method(self, datadec_dict): return DeidentificationMethod.pseudonymization def process_subjects_category(self, datadec_dict): - if 'subject_categories' in datadec_dict: - sub_category_str = datadec_dict.get('subject_categories', '').strip() + if 'subjects_category' in datadec_dict: + sub_category_str = datadec_dict.get('subjects_category', '').strip() try: return SubjectCategory[sub_category_str] except KeyError: @@ -382,12 +382,13 @@ def process_use_restrictions(self, data_dec, datadec_dict): return use_restrictions def process_access_category(self, datadec_dict): - share_category_str = datadec_dict.get('access_category', '').strip() - try: - return ShareCategory[share_category_str] - except KeyError: - return None - + share_category_str = datadec_dict.get('access_category','') + if share_category_str: + try: + return ShareCategory[share_category_str] + except KeyError: + return None + return None def process_constent_status(self, datadec_dict): if 'consent_status' in datadec_dict: consent_status_str = datadec_dict.get('consent_status', '').strip() diff --git a/core/tests/data/ELX_LU_SUB-1.json b/core/tests/data/ELX_LU_SUB-1.json index a1ab39a6..43f3e8fd 100644 --- a/core/tests/data/ELX_LU_SUB-1.json +++ b/core/tests/data/ELX_LU_SUB-1.json @@ -70,7 +70,7 @@ "data_size_category": "m", "metadata_exists": true, "has_special_subjects": true, - "special_subject_notes": "Subjects minors. mothers and babies", + "special_subjects_description": "Subjects minors. mothers and babies", "consent_status": "homogeneous", "consent_notes": "Consent is consistent among all subjects", "de_identification": "pseudonymized", diff --git a/core/tests/data/datasets.json b/core/tests/data/datasets.json index 7b88c281..436fa05f 100644 --- a/core/tests/data/datasets.json +++ b/core/tests/data/datasets.json @@ -2,16 +2,17 @@ { "name": "ABCD data", "project": "ABCD", - "shares": [ + "transfers": [ { - "share_inst": "ELU_I_44", - "share_notes": "Melanie Silver" + "partner": "ELU_I_44", + "transfer_details": "Melanie Silver" } ], "contacts": [ { "first_name": "Igor", "last_name": "Teal", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" } @@ -36,9 +37,9 @@ "Methylation_array" ], "de_identification": "anonymization", - "subject_categories": "controls", + "subjects_category": "controls", "has_special_subjects": true, - "special_subject_notes": "2 year old children", + "special_subjects_description": "2 year old children", "source_notes": "Data is from collaborator.", "use_restrictions": [ { @@ -61,6 +62,7 @@ { "first_name": "Joanne", "last_name": "Swift", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" } @@ -84,7 +86,7 @@ "Whole_genome_sequencing" ], "de_identification": "pseudonymization", - "subject_categories": "cases_and_controls", + "subjects_category": "cases_and_controls", "has_special_subjects": false, "source_collaboration": { "collab_inst": "ELU_I_94", @@ -113,19 +115,20 @@ { "first_name": "Rob", "last_name": "Blue", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" } ], - "shares": [ + "transfers": [ { - "share_inst": "ELU_I_79", - "share_notes": "Feng Xiao" + "partner": "ELU_I_79", + "transfer_details": "Feng Xiao" }, { - "share_inst": "ELU_I_80", - "share_notes": "Max Ginger" + "partner": "ELU_I_80", + "transfer_details": "Max Ginger" } ], "storages": [ @@ -162,6 +165,7 @@ { "first_name": "Paul", "last_name": "Mauve", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" }, { @@ -184,7 +188,7 @@ "Other_Phenotype_data" ], "de_identification": "pseudonymization", - "subject_categories": "cases_and_controls", + "subjects_category": "cases_and_controls", "has_special_subjects": false, "access_category": "controlled_access", "use_restrictions": [ @@ -199,11 +203,12 @@ { "name": "PD data", "project": "PD_Project", - "shares": [], + "transfers": [], "contacts": [ { "first_name": "Ali", "last_name": "Gator", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" } @@ -299,7 +304,7 @@ ], "de_identification": "pseudonymization", "ombudsman": "Biobank, Principle Investigator", - "subject_categories": "cases_and_controls", + "subjects_category": "cases_and_controls", "has_special_subjects": false, "consent_status": "homogeneous", "use_restrictions": [ @@ -324,7 +329,7 @@ "Samples" ], "de_identification": "pseudonymization", - "subject_categories": "cases_and_controls", + "subjects_category": "cases_and_controls", "has_special_subjects": false, "source_collaboration": { "collab_inst": "ELU_I_8", diff --git a/core/tests/data/projects.json b/core/tests/data/projects.json index 0f5edaa9..d9f44218 100644 --- a/core/tests/data/projects.json +++ b/core/tests/data/projects.json @@ -9,18 +9,21 @@ { "first_name": "Joanne", "last_name": "Swift", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" }, { "first_name": "Rebecca", "last_name": "Kafe", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" }, { "first_name": "Embury", "last_name": "Bask", + "email": "user@uni.edu", "role": "Researcher", "institution": "ELU_I_77" } @@ -47,24 +50,28 @@ { "first_name": "Colman", "last_name": "Level", + "email": "user@uni.edu", "role": "Principal_Investigator", "institution": "ELU_I_77" }, { "first_name": "Colman", "last_name": "Level", + "email": "user@uni.edu", "role": "Researcher", "institution": "ELU_I_77" }, { "first_name": "Nic", "last_name": "Purple", + "email": "user@uni.edu", "role": "Researcher", "institution": "ELU_I_77" }, { "first_name": "James", "last_name": "BK", + "email": "user@uni.edu", "role": "Researcher", "institution": "ELU_I_77" } diff --git a/data/demo/datasets.json b/data/demo/datasets.json index d810603d..8e83749c 100644 --- a/data/demo/datasets.json +++ b/data/demo/datasets.json @@ -9,17 +9,33 @@ "data_types": [ "RNASeq", "Samples", - "ChIP-seq" + "Whole_genome_sequencing" ], - "access_category": null, + "data_type_notes": "Chip-seq time series data", + "access_category": "controlled_access", "subjects_category": "controls", "de_identification": "pseudonymization", "consent_status": "homogeneous", - "has_special_subjects": null, "special_subjects_description": "", "embargo_date": null, "storage_end_date": "2021-04-05", - "storage_duration_criteria": "The project PI will assess whether data should be retained for further research at the storage end date." + "storage_duration_criteria": "The project PI will assess whether data should be retained for further research at the storage end date.", + "has_special_subjects": false, + "consent_status": "homogeneous", + "use_restrictions": [ + { + "ga4gh_code": "COL-[XX]", + "note": "Data is shared as part of the LUX-Epigen programme, a collaboration with CHL, IBBL, LCSB and LSRU." + }, + { + "ga4gh_code": "DS-[XX](CC)", + "note": "Data is consented for cancer research studies only." + }, + { + "ga4gh_code": "GS-[XX]", + "note": "Data is consented for sharing outside EU/EEA region." + } + ] } ], "storages": [