diff --git a/.github/workflows/jekyll.yml b/.github/workflows/jekyll.yml index 53e614a16..59958f45f 100644 --- a/.github/workflows/jekyll.yml +++ b/.github/workflows/jekyll.yml @@ -55,10 +55,12 @@ jobs: JEKYLL_BUILD_BRANCH: ${{ github.ref_name }} JEKYLL_ENV: ${{ steps.name.outputs.jekyll_env }} JEKYLL_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + JEKYLL_BASE_PATH: ${{ steps.pages.outputs.base_path }} - name: Upload artifact uses: actions/upload-pages-artifact@v1 + deploy: runs-on: ubuntu-latest needs: build diff --git a/.github/workflows/tool_list_conversion.yml b/.github/workflows/tool_list_conversion.yml index ea1fc7705..13821ac02 100644 --- a/.github/workflows/tool_list_conversion.yml +++ b/.github/workflows/tool_list_conversion.yml @@ -22,10 +22,10 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pyyaml requests python-frontmatter + pip install ruamel.yaml requests python-frontmatter - name: Run tool table 2 yaml run: | - python var/conversions.py + python var/tools_validator.py registry-lookup: if: | @@ -49,11 +49,11 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pyyaml requests python-frontmatter + pip install ruamel.yaml requests python-frontmatter - - name: Run tool table 2 yaml + - name: Run regsitry lookup for the tools file run: | - python var/conversions.py --reg --username ${{ secrets.FAIRSHARING_USERNAME }} --password "${{ secrets.FAIRSHARING_PASSWORD }}" + python var/tools_validator.py --reg --username ${{ secrets.FAIRSHARING_USERNAME }} --password "${{ secrets.FAIRSHARING_PASSWORD }}" - name: Fetch country names with corresponding iso run: | diff --git a/Gemfile b/Gemfile index 20c36cc74..dbd5e325a 100644 --- a/Gemfile +++ b/Gemfile @@ -1,8 +1,17 @@ source "https://rubygems.org" -# GitHub pages dependencies -gem 'github-pages', group: :jekyll_plugins +gem "elixir-toolkit-theme-plugins", "~> 0.1.7" +gem "webrick", "~> 1.7" +gem "jekyll", "~> 4.3.1" +gem "jemoji", "~> 0.13.0" +gem "kramdown-parser-gfm", "~> 1.1" -# Webrick needed for Ruby v3.0+ compatibility -gem "webrick" +group :jekyll_plugins do + gem "jekyll-redirect-from", "~> 0.16.0" + gem "jekyll-sitemap", "~> 1.4" + gem "jekyll-github-metadata", "~> 2.15" + gem "jekyll-relative-links", "~> 0.6" + gem "jekyll-seo-tag", "~> 2.8" + gem "jekyll-remote-theme" +end diff --git a/_config.yml b/_config.yml index 31472c704..2b9d0f8c7 100644 --- a/_config.yml +++ b/_config.yml @@ -4,7 +4,7 @@ title: RDMkit description: "Best practices and guidelines you can use for FAIR management of your research data." # Metadata description of the website -remote_theme: ELIXIR-Belgium/elixir-toolkit-theme@1.26.0 +remote_theme: ELIXIR-Belgium/elixir-toolkit-theme@2.0.0 sass: style: compressed @@ -136,6 +136,8 @@ defaults: sidebar: about plugins: + - elixir-toolkit-theme-plugins - jemoji - jekyll-redirect-from - jekyll-sitemap + - jekyll-github-metadata diff --git a/_data/main_tool_and_resource_list.csv b/_data/main_tool_and_resource_list.csv deleted file mode 100644 index 7b853e996..000000000 --- a/_data/main_tool_and_resource_list.csv +++ /dev/null @@ -1,350 +0,0 @@ -name,url,description,registry,related_pages -3D-Beacons,https://3d-beacons.org,Network providing unified programmatic access to experimentally determined and predicted structure models,,struct_bioinfo -4DN-BINA-OME-QUAREP (NBO-Q) Microscopy Metadata Specifications,https://www.nature.com/articles/s41592-021-01327-9,"Rigorous record-keeping and quality control are required to ensure the quality, reproducibility and value of imaging data. The 4DN Initiative and BINA have published light Microscopy Metadata Specifications that extend the OME Data Model, scale with experimental intent and complexity, and make it possible for scientists to create comprehensive records of imaging experiments. -The Microscopy Metadata Specifications have been adopted by QUAREP-LiMi and are being revised in QUAREP-LiMi in collaboration with instrument manufacturers",fairsharing:4747,"data_publication, ome, bioimaging_data" -Access to Biological Collection Data Schema (ABCD),https://www.tdwg.org/standards/abcd/,A standard schema for primary biodiversity data,biotools:NA,micro_biotech -Ada Discovery Analytics (Ada),https://ada-discovery.github.io/,"Ada is a performant and highly configurable system for secured integration, visualization, and collaborative analysis of heterogeneous data sets, primarily targeting clinical and experimental sources.",,"data_analysis, transmed" -Addgene,https://www.addgene.org/browse/,A searchable repository with a focus on plasmids,,micro_biotech -AgroPortal,http://agroportal.lirmm.fr/,Browser for ontologies for agricultural science based on NBCO BioPortal.,biotools:AgroPortal,"plant_pheno_assembly, plants, metadata" -Amazon Web Services,https://aws.amazon.com/,Amazon Web Services,,"storage, data_analysis, transfer" -Amnesia,https://amnesia.openaire.eu/,Amnesia is a GDPR compliant high accuracy data anonymization tool,,sensitive -AOP4EUpest,http://www.biomedicale.parisdescartes.fr/aop4EUpest/home.php,AOP4EUpest web server is devoted to the identification of pesticides involved in an Adverse Outcome Pathway via text mining approaches.,biotools:aop4eupest,toxicology_data -APID Interactomes,http://apid.dep.usal.es/,APID (Agile Protein Interactomes DataServer) is a server that provides a comprehensive collection of protein interactomes for more than 400 organisms based in the integration of known experimentally validated protein-protein physical interactions (PPIs),biotools:apid,idp -Argos,https://argos.openaire.eu/splash/,"Plan and follow your data. Bring your Data Management Plans closer to where data are generated, analysed and stored.",,"dmp, researcher, data_manager" -ArrayExpress,https://www.ebi.ac.uk/arrayexpress/,A repository of array based genomics data,,"micro_biotech, data_publication" -Arvados,https://arvados.org,"With Arvados, bioinformaticians run and scale compute-intensive workflows, developers create biomedical applications, and IT administrators manage large compute and storage resources.",,"it_support, policy_officer, researcher, data_analysis" -ATCC,https://www.lgcstandards-atcc.org/en.aspx#,"Biological materials resource including cell-lines, strains and genomics tools",,micro_biotech -Atlas,https://github.com/OHDSI/Atlas/wiki,"Free, publicly available web-based, open-source software application developed by the OHDSI community to support the design and execution of observational analyses to generate real world evidence from patient level observational data.",fairsharing:NA,"data_manager, researcher, transmed" -b2share,https://b2share.eudat.eu/,Store and publish your research data. Can be used to bridge between domains,,"storage, data_publication, bioimaging_data" -BacDive,https://bacdive.dsmz.de,A searchable database for bacteria specific information,,micro_biotech -Bacillus Genetic Stock Center (BGSC),http://www.bgsc.org/,A repository specific to Bacillus strains,biotools:NA,micro_biotech -BASE,https://www.base-search.net/,A search engine for academic web resources,,existing_data -BBMRI-ERIC's ELSI Knowledge Base,https://www.bbmri-eric.eu/elsi/knowledge-base/,The ELSI Knowledge Base is an open-access resource platform that aims at providing practical know-how for responsible research.,,"data_protection, sensitive, policy_officer, data_manager, human_data" -Beacon,https://beacon-project.io/,The Beacon protocol defines an open standard for genomics data discovery.,biotools:ga4gh_beacon,"researcher, data_manager, it_support, human_data" -Benchling,https://www.benchling.com,R&D Platform for Life Sciences,,micro_biotech -BIAFLOWS,https://biaflows.neubias.org/,BIAFLOWS is an open-soure web framework to reproducibly deploy and benchmark bioimage analysis workflows,biotools:biaflows,data_analysis -BigNASim,https://mmb.irbbarcelona.org/BigNASim/,Repository for Nucleic Acids MD simulations,biotools:bignasim,"biomol_sim, data_publication" -BIII,https://biii.eu/,"The BioImage Informatics Index is a registry of software tools, image databases for benchmarking, and training materials for bioimage analysis",biotools:BISE,"it_support, data_analysis" -BindingDB,https://www.bindingdb.org/,"Public, web-accessible database of measured binding affinities","fairsharing:3b36hk, biotools:bindingdb",biomol_sim -Bio-Formats,https://www.openmicroscopy.org/bio-formats/,"Bio-Formats is a software tool for reading and writing image data using standardized, open formats",biotools:bio-formats,"ome, bioimaging_data" -Bioactive Conformational Ensemble,https://mmb.irbbarcelona.org/BCE/,Platform designed to efficiently generate bioactive conformers and speed up the drug discovery process.,biotools:bce,biomol_sim -Bioconda,https://bioconda.github.io/,Bioconda is a bioinformatics channel for the Conda package manager,biotools:bioconda,"it_support, data_analysis" -Biodiversity Information Standards (TDWG),https://www.tdwg.org,"Biodiversity Information Standards (TDWG), historically the Taxonomic Databases Working Group, work to develop biodiversity information standards",,micro_biotech -BioExcel COVID-19,https://bioexcel-cv19.bsc.es/#/,Platform designed to provide web-access to atomistic-MD trajectories for macromolecules involved in the COVID-19 disease.,,biomol_sim -BioImageArchive,https://www.ebi.ac.uk/bioimage-archive/,The BioImage Archive stores and distributes biological images that are useful to life-science researchers.,fairsharing:x38D2k,"data_publication, bioimaging_data" -BioModels,https://www.ebi.ac.uk/biomodels/,A repository of mathematical models for application in biological sciences,fairsharing:paz6mh,"micro_biotech, data_publication" -BIONDA,http://bionda.mpc.ruhr-uni-bochum.de/start.php,"BIONDA is a free and open-access biomarker database, which employs various text mining methods to extract structured information on biomarkers from abstracts of scientific publications",biotools:bionda,"storage, researcher, human_data, proteomics" -BioSamples,https://www.ebi.ac.uk/biosamples/,BioSamples stores and supplies descriptions and metadata about biological samples used in research and development by academia and industry.,fairsharing:ewjdq6,"plants, plant_geno_assembly, plant_pheno_assembly" -Bioschemas,https://bioschemas.org,"Bioschemas aims to improve the Findability on the Web of life sciences resources such as datasets, software, and training materials",,"machine_actionability, it_support" -BioStudies,https://www.ebi.ac.uk/biostudies/,A database hosting datasets from biological studies. Useful for storing or accessing data that is not compliant for mainstream repositories.,,"micro_biotech, plants, data_publication" -BisQue,https://bioimage.ucsb.edu/bisque,Resource for management and analysis of 5D biological images,,"data_organisation, data_manager, data_analysis, bioimaging_data" -Bitbucket,https://bitbucket.org/,"Git based code hosting and collaboration tool, built for teams.",,"data_organisation, data_manager, it_support" -BMRB,https://bmrb.io/,Biological Magnetic Resonance Data Bank,,"idp, researcher" -BoostDM,https://www.intogen.org/boostdm/search,BoostDM is a method to score all possible point mutations (single base substitutions) in cancer genes for their potential to be involved in tumorigenesis.,biotools:boostdm,"data_analysis, human_data" -Box,https://www.box.com,Cloud storage and file sharing service,,"storage, it_support, transfer" -BrAPI,https://www.brapi.org,"Specification for a standard API for plant data: plant material, plant phenotyping data",,"it_support, plants, plant_pheno_assembly" -BRENDA,https://www.brenda-enzymes.org/,"Database of enzyme and enzyme-ligand information, across all taxonomic groups, manually extracted from primary literature and extended by text mining procedures",fairsharing:etp533,micro_biotech -Bulk Rename Utility,https://www.bulkrenameutility.co.uk/,File renaming software for Windows,,"data_organisation, data_manager, researcher" -CalibraCurve,https://github.com/mpc-bioinformatics/CalibraCurve,A highly useful and flexible tool for calibration of targeted MS?based measurements. CalibraCurve enables an automated batch-mode determination of dynamic linear ranges and quantification limits for both targeted proteomics and similar assays. The software uses a variety of measures to assess the accuracy of the calibration and provides intuitive visualizations.,biotools:calibracurve,"data_analysis, proteomics" -CAMEO,https://cameo3d.org,Continuous evaluation of the accuracy and reliability of protein structure prediction methods in a fully automated manner,"biotools:cameo, fairsharing:dq34p2",struct_bioinfo -Cancer Genome Interpreter,https://www.cancergenomeinterpreter.org/home,Cancer Genome Interpreter (CGI) is designed to support the identification of tumor alterations that drive the disease and detect those that may be therapeutically actionable.,biotools:cgi,"data_analysis, human_data" -CAPRI,https://www.ebi.ac.uk/pdbe/complex-pred/capri/,Critical assessment of structure prediction methods for protein-protein interactions,,struct_bioinfo -CAS Registry,https://www.cas.org/cas-data/cas-registry,"The CAS Registry (Chemical Abstracts Service Registry) includes more than 188 million unique chemicals. CAS Registry Numbers are broadly used as a unique identifier for chemical substances. The Registry is maintained by CAS, a subdivision of the American Chemical Society.",,toxicology_data -CASP,https://predictioncenter.org,Biennial critical assessment of techniques for protein structure prediction,,struct_bioinfo -Castor,https://www.castoredc.com,"Castor is an EDC system for researchers and institutions. With Castor, you can create and customize your own database in no time. Without any prior technical knowledge, you can build a study in just a few clicks using our intuitive Form Builder. Simply define your data points and start collecting high quality data, all you need is a web browser.",fairsharing:NA,"identifiers, it_support, data_manager" -CATH,http://www.cathdb.info/,A hierarchical domain classification of protein structures in the Protein Data Bank.,fairsharing:xgcyyn, -CEDAR,https://metadatacenter.org,"CEDAR is making data submission smarter and faster, so that scientific researchers and analysts can create and use better metadata.",,"metadata, machine_actionability, researcher, data_manager" -CellRepo,https://cellrepo.herokuapp.com/,A version management tool for modifying strains,,micro_biotech -Cellular Microscopy Phenotype Ontology (CMPO),https://www.ebi.ac.uk/cmpo/,An ontology for expressing cellular (or multi-cellular) terms with applications in microscopy,,micro_biotech -CERNBox,https://cernbox.web.cern.ch/cernbox/,"CERNBox cloud data storage, sharing and synchronization",,storage -ChEBI,https://www.ebi.ac.uk/chebi/,Dictionary of molecular entities focused on 'small' chemical compounds,fairsharing:62qk8w,micro_biotech -ChEMBL,https://www.ebi.ac.uk/chembl/,"Database of bioactive drug-like small molecules, it contains 2-D structures, calculated properties and abstracted bioactivities.","biotools:chembl, fairsharing:m3jtpg","data_analysis, researcher, toxicology_data" -ChIPSummitDB,http://summit.med.unideb.hu/summitdb/,ChIPSummitDB is a database of transcription factor binding sites and the distances of the binding sites relative to the peak summits.,biotools:chipsummitdb,human_data -Choose a license,https://choosealicense.com,Choose an open source license,,"licensing, researcher, data_manager, policy_officer" -ClinicalTrials.gov,https://clinicaltrials.gov/,ClinicalTrials.gov is a resource depending on the National Library of medicine which makes available private and public-funded clinical trials.,,toxicology_data -Common Data Elements (CDE) in a box ,https://github.com/ejp-rd-vp/cde-in-box,"A collection of software applications which enables creation, storing and publishing of ""Common Data Elements"" according to the CDE semantic model.",,rare_disease -Common Workflow Language (CWL),https://www.commonwl.org,An open standard for describing workflows that are build from command line tools,fairsharing:8y5ayx,"it_support, researcher, data_analysis" -Comptox,https://comptox.epa.gov/dashboard,The CompTox Chemicals Dashboard provides toxicological information for over 800.000 chemical compounds. It is a part of a suite of databases and web applications developed by the US Environmental Protection Agency's Chemical Safety for Sustainability Research Program. These databases and apps support EPA's computational toxicology research efforts to develop innovative methods to change how chemicals are currently evaluated for potential health risks.,biotools:comptox_chemistry_dashboard,toxicology_data -COmputational Modeling in BIology NEtwork (COMBINE),http://co.mbine.org,An initiative to bring together various formats and standard for computational models in biology,,micro_biotech -Conda,https://docs.conda.io/en/latest/,Open source package management system,,"it_support, data_analysis" -Consent Clauses for Genomic Research,https://drive.google.com/file/d/1O5Ti7g7QJqS3h0ABm-LyTe02Gtq8wlKM/view?usp=sharing,A resource for researchers when drafting consent forms so they can use language matching cutting-edge GA4GH international standards,,human_data -Cookiecutter,https://github.com/cookiecutter/cookiecutter,"A command-line utility that creates projects from cookiecutters (project templates), e.g. creating a Python package project from a Python package project template.",,"data_organisation, it_support, data_manager" -COPO,https://copo-project.org/,Portal for scientists to broker more easily rich metadata alongside data to public repos.,"biotools:copo, fairsharing-coll:bsg-d001247","metadata, researcher, plants, machine_actionability, plant_pheno_assembly, plant_geno_assembly" -COVID-19 Disease Map,https://fairdomhub.org/projects/190,"An assembly of molecular interaction diagrams, established based on literature evidence",, -COVID-19 Molecular Structure and Therapeutics Hub,https://covid.bioexcel.eu/,COVID-19 Molecular Structure and Therapeutics Hub,,biomol_sim -Create a Codebook,https://ddialliance.org/training/getting-started-new-content/create-a-codebook,Examples and tools to create a codebook by the Data Documentation Initiative (DDI),,"metadata, researcher, data_manager" -Creative Commons License Chooser,https://creativecommons.org/choose/,It helps you choose the right Creative Commons license for your needs.,,"licensing, researcher, data_manager, policy_officer" -Crop Ontology,https://www.cropontology.org,"The Crop Ontology compiles concepts to curate phenotyping assays on crop plants, including anatomy, structure and phenotype.",fairsharing:wgfrmg,"researcher, data_manager, it_support, plants, plant_pheno_assembly" -Crypt4GH,https://crypt4gh.readthedocs.io/en/latest/,"A Python tool to encrypt, decrypt or re-encrypt files, according to the GA4GH encryption file format.",,human_data -CS3,https://www.cs3community.org/,Cloud Storage Services for Synchronization and Sharing (CS3),,storage -CTD,http://ctdbase.org/,A database that aims to advance understanding about how environmental exposures affect human health.,biotools:ctd,toxicology_data -cURL,https://curl.se,Command line tool and library for transferring data with URLs,,"transfer, it_support" -Cytomine-IMS,https://github.com/cytomine/Cytomine-IMS,Image Data management,,"data_manager, bioimaging_data" -DAISY,https://daisy-demo.elixir-luxembourg.org,Data Information System to keep sensitive data inventory and meet GDPR accountability requirement.,biotools:Data_Information_System_DAISY,"it_support, policy_officer, human_data, data_protection, transmed" -DAMAP,https://damap.org/,"It guides you step by step through a DMP and lets you export a pre-filled DMP as a Word document that you can customize and use for submission to funders. Also, DAMAP is compatible with the RDA recommendation for machine-actionable DMPs and offers an export of JSON DMPs. DAMAP is open source and to be self deployed.",,"dmp, researcher, data_manager" -Data Catalog,https://datacatalog.elixir-luxembourg.org/,"Unique collection of project-level metadata from large research initiatives in a diverse range of fields, including clinical, molecular and observational studies. Its aim is to improve the findability of these projects following FAIR data principles.",fairsharing:NA,"metadata, transmed" -Data Catalog Vocabulary (DCAT),https://www.w3.org/TR/vocab-dcat-2/,DCAT is an RDF vocabulary designed to facilitate interoperability between data catalogs published on the Web.,,"machine_actionability, it_support, rare_disease" -Data Curation Centre Metadata list,https://www.dcc.ac.uk/guidance/standards/metadata/list,List of metadata standards,,"metadata, researcher, data_manager" -Data Stewardship Wizard,https://ds-wizard.org/,Publicly available online tool for composing smart data management plans,biotools:Data_Stewardship_Wizard,"dmp, researcher, data_manager, it_support, nels, tsd, plant_pheno_assembly, plant_geno_assembly" -Data Stewardship Wizard Storage Costs Evaluator,https://storage-costs-evaluator.ds-wizard.org/,This service provides simple estimation of storage costs based on desired properties and local/actual configuration.,,costs -Data Use Ontology,https://github.com/EBISPOT/DUO,DUO allows to semantically tag datasets with restriction about their usage.,fairsharing:5dnjs2,"data_manager, researcher, human_data" -data.world Data License list,https://help.data.world/hc/en-us/articles/115006114287-Common-license-types-for-datasets,Overview of typical licenses used for data resources,,"licensing, biomol_sim" -DataCite,https://search.datacite.org/,A search engine for the complete collection of publicly available DataCite DOIs,fairsharing:yknezb,existing_data -DATAVERSE,https://dataverse.org/,Open source research data respository software.,fairsharing:NA,"storage, researcher, data_manager, it_support, ifb" -DAWID,https://dawid.elixir-luxembourg.org/,The Data Agreement Wizard is a tool developed by ELIXIR-Luxembourg to facilitate data sharing agreements.,,"data_protection, policy_officer, human_data" -dbGAP,https://www.ncbi.nlm.nih.gov/gap/,The database of Genotypes and Phenotypes (dbGaP) archives and distributes data from studies investigating the interaction of genotype and phenotype in Humans,fairsharing:88v2k0,"data_publication, researcher, it_support, human_data" -DisGeNET,https://www.disgenet.org/,A discovery platform containing collections of genes and variants associated to human diseases.,biotools:disgenet,"data_analysis, human_data, researcher, toxicology_data" -DisProt,https://disprot.org/,A database of intrinsically disordered proteins,biotools:disprot,"idp, researcher" -DMP Canvas Generator,https://dmp.vital-it.ch,"Questionnaire, which generates a pre-filled a DMP",,"dmp, researcher, data_manager" -DMPlanner,https://dmplanner.athenarc.gr/,"Semi-automatically generated, searchable catalogue of resources that are relevant to data management plans.",,"dmp, researcher, data_manager" -DMPRoadmap,https://github.com/DMPRoadmap/roadmap,DMP Roadmap is a Data Management Planning tool ,,"dmp, researcher, data_manager" -DMPTool,https://dmptool.org,Build your Data Management Plan,,"dmp, researcher, data_manager" -DNA Data Bank of Japan (DDBJ),https://www.ddbj.nig.ac.jp/index-e.html,A database of DNA sequences,,micro_biotech -Docker,https://www.docker.com/,"Docker is a software for the execution of applications in virtualized environments called containers. It is linked to DockerHub, a library for sharing container images",fairsharing-coll:bsg-d001254,"it_support, data_analysis" -DPIA Knowledge Model,https://converge.ds-wizard.org/knowledge-models/elixir.lu:dpia-research:0.1.0,A DSW knowledge model guiding users through a set of questions to collect information necessary for a research project Data Protection Impact Assessment (DPIA).,,"data_protection, policy_officer, human_data" -Dropbox,https://www.dropbox.com/?landing=dbv2,Cloud storage and file sharing service,,"storage, it_support, transfer" -Drug Matrix,https://ntp.niehs.nih.gov/data/drugmatrix/,A toxicogenomic resource that provides access to the gene expression profiles of over 600 different compounds in several cell types from rats and primary rat hepatocytes.,,toxicology_data -Dryad,https://datadryad.org/,"Open-source, community-led data curation, publishing, and preservation platform for CC0 publicly available research data",fairsharing:wkggtx,"data_publication, biomol_sim, bioimaging_data" -Dynameomics,http://www.dynameomics.org/,Database of folding / unfolding pathway of representatives from all known protein folds by MD simulation,,biomol_sim -e!DAL,https://edal.ipk-gatersleben.de/,Electronic data archive library is a framework for publishing and sharing research data,biotools:edal,"storage, it_support" -e!DAL-PGP,https://edal-pgp.ipk-gatersleben.de/,Plant Genomics and Phenomics Research Data Repository,fairsharing:rf3m4g,"plants, plant_geno_assembly, researcher, data_manager, it_support, data_publication, metadata, plant_pheno_assembly" -ECOTOX,https://cfpub.epa.gov/ecotox/,"The ECOTOXicology Knowledgebase (ECOTOX) is a comprehensive, publicly available Knowledgebase providing single chemical environmental toxicity data on aquatic life, terrestrial plants, and wildlife.",,toxicology_data -ECPGR,https://www.ecpgr.cgiar.org/,Hub for the identification of plant genetic resources in Europe,,"plants, researcher, data_manager" -EDKB,https://www.fda.gov/science-research/bioinformatics-tools/endocrine-disruptor-knowledge-base,"Endocrine Disruptor Knowledge Base is a platform designed to foster the development of computational predictive toxicology. This platform allows direct access to ten libraries containing the following resources: a biological activity database, QSAR training sets, in vitro and in vivo experimental data for more than 3,000 chemicals, literature citations, chemical-structure search capabilities.",,toxicology_data -ELIXIR Core Data Resources,https://elixir-europe.org/platforms/data/core-data-resources,Set of European data resources of fundamental importance to the wider life-science community and the long-term preservation of biological data,fairsharing-coll:bsg-c000039,"existing_data, covid-19" -ELIXIR Deposition Databases for Biomolecular Data,https://elixir-europe.org/platforms/data/elixir-deposition-databases,List of discipline-specific deposition databases recommended by ELIXIR.,fairsharing-coll:bsg-c000039,"data_publication, researcher, data_manager, it_support, covid-19, nels, ifb, csc" -ELIXIR-AAI,https://elixir-europe.org/services/compute/aai,The ELIXIR Authentication and Authorisation Infrastructure (AAI),,"sensitive, nels, tsd, transmed" -EMBL-EBI Ontology Lookup Service,https://www.ebi.ac.uk/ols/index,EMBL-EBI’s web portal for finding ontologies,,"metadata, data_manager, researcher" -EMBL-EBI's data submission wizard,https://www.ebi.ac.uk/submission/,EMBL-EBI's wizard for finding the right EMBL-EBI repository for your data.,,"data_publication, researcher, data_manager" -EMPIAR,https://www.ebi.ac.uk/pdbe/emdb/empiar/,"Electron Microscopy Public Image Archive is a public resource for raw, 2D electron microscopy images. You can browse, upload and download the raw images used to build a 3D structure",fairsharing:dff3ef,"data_publication, ome, bioimaging_data" -ENA COMPARE Data Hubs,https://github.com/nadimm-rahman/ena-datahub-setup,This tool carries out data hub set up at the European Nucleotide Archive (ENA).,,"dm_coordination, it_support, data_manager" -ENA upload tool,https://github.com/usegalaxy-eu/ena-upload-cli,The program submits experimental data and respective metadata to the European Nucleotide Archive (ENA).,,"it_support, data_manager, researcher, data_brokering" -Ensembl,https://www.ensembl.org/index.html,"Genome browser for vertebrate genomes that supports research in comparative genomics, evolution, sequence variation and transcriptional regulation.",fairsharing:fx0mw7, -Ensembl Genomes,https://ensemblgenomes.org/,"Comparative analysis, data mining and visualisation for the genomes of non-vertebrate species",fairsharing:923a0p, -Ensembl Plants,https://plants.ensembl.org/,Open-access database of full genomes of plant species.,fairsharing:j8g2cv,"plant_geno_assembly, plants" -ERPA,https://gitlab.sib.swiss/clinbio/erpa-app,Web-based tool allowing users to create and manage a register of personal data processing activities (ROPA).,,"policy_officer, human_data, data_protection" -EU General Data Protection Regulation,https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32016R0679&from=EN,"Regulation (eu) 2016/679 of the european parliament and of the council on the protection of natural persons with regard to the processing of personal data and on the free movement of such data, and repealing directive 95/46/ec (general data protection regulation).",,"data_protection, policy_officer, human_data, tsd" -EUDAT licence selector wizard,https://ufal.github.io/public-license-selector/,EUDAT's wizard for finding the right licence for your data or code.,,"licensing, researcher, data_manager, policy_officer" -EudraVigilance,https://www.ema.europa.eu/en/human-regulatory/research-development/pharmacovigilance/eudravigilance,The European database of suspected adverse drug reaction reports is a public resource aimed to provide access to reported suspected side-effects of drugs. Side-effects are defined according to the MedDRA ontology.,,toxicology_data -EUPID,https://eupid.eu/#/concept,"EUPID provides a method for identity management, pseudonymisation and record linkage to bridge the gap between multiple contexts.",,"it_support, policy_officer, human_data" -EURISCO,https://eurisco.ipk-gatersleben.de,European Search Catalogue for Plant Genetic Resources,biotools:eurisco,"plants, researcher, data_manager, plant_pheno_assembly" -Europe PMC,https://europepmc.org/,"Europe PMC is a repository, providing access to worldwide life sciences articles, books, patents and clinical guidelines.",fairsharing:cmw6mm,researcher -European Joint Programme on Rare Diseases (EJP RD),https://www.ejprarediseases.org/,"A programme aiming to create an effective rare diseases research ecosystem for progress, innovation and for the benefit of everyone with a rare disease.",,rare_disease -European Joint Programme on Rare Diseases Metadata Model,https://github.com/ejp-rd-vp/resource-metadata-schema,"This core model is designed to represent data about a rare disease patient and biosample registries. The model is based on and builds on existing standards, such as the European Rare Disease Registry Infrastructure and the Common Data Elements from the rare disease community and other more generalised standards for data sharing such as the W3C DCAT vocabulary.",,rare_disease -European Joint Programme on Rare Diseases Virtual Platform (EJP RD),https://vp.ejprarediseases.org/,"The Virtual Platform is a federated ecosystem, in which resources are enhanced to be amenable to rare disease research, and made Findable, Accessible, Interoperable and Reusable: data stays at the source level but can be queyrable at distance from an EJP RD query point.",,rare_disease -European Nucleotide Archive (ENA),https://www.ebi.ac.uk/ena/browser/home,A record of sequence information scaling from raw sequcning reads to assemblies and functional annotation,fairsharing:dj8nt8,"micro_biotech, plant_geno_assembly, data_brokering" -European Rare Disease Registry Infrastructure directory of registries (ERDRI.dor),https://eu-rd-platform.jrc.ec.europa.eu/erdridor/,ERDRI.dor provides an overview of participating rare disease registries with their main characteristics and description.,,rare_disease -European Rare Disease Registry Infrastructure metadata repository (ERDRI.mdr),https://eu-rd-platform.jrc.ec.europa.eu/mdr/,"ERDRI.mdr serves to ease the integration of heterogeneous data from different rare disease registries. For this purpose, it contains a collection of metadata which specifies the used data elements of a registry including the designation of the used data elements, their definition and units of measurement.",,rare_disease -European Reference Networks (ERNs),https://health.ec.europa.eu/european-reference-networks_en,"Virtual networks involving healthcare providers across Europe. They aim to facilitate discussion on complex or rare diseases and conditions that require highly specialised treatment, and concentrated knowledge and resources.",,rare_disease -European Variation Archive (EVA),https://www.ebi.ac.uk/eva/,Open-access database of all types of genetic variation data from all species.,fairsharing:6824pv,plant_geno_assembly -Evidence and Conclusion Ontology (ECO),https://evidenceontology.org/,Controlled vocabulary that describes types of evidence and assertion methods,"fairsharing:wvpgwn, biotools:NA","existing_data, metadata" -FAERS,https://www.fda.gov/drugs/surveillance/questions-and-answers-fdas-adverse-event-reporting-system-faers,"The FDA Adverse Event Reporting System (FAERS) is an american resource that contains adverse event reports, medication error reports and product quality complaints submitted by healthcare professionals, consumers, and manufacturers. MedDRA ontology is used for coding adverse effects. Note that reports available in FAERS do not require a causal relationship between a product and an adverse event and further evaluations are conducted by FDA to monitor the safety of products.",biotools:faers,toxicology_data -FAIDARE,https://urgi.versailles.inra.fr/faidare/,FAIDARE is a tool allowing to search data across dinstinct databases that implemented BrAPI.,biotools:faidare,"researcher, data_manager, plants, ifb, plant_pheno_assembly, plant_geno_assembly" -FAIR Cookbook,https://fairplus.github.io/the-fair-cookbook/content/recipes/assessing-fairness.html#,"FAIR Cookbook is an online resource for the Life Sciences with recipes that help you to make and keep data Findable, Accessible, Interoperable and Reusable (FAIR)",,"compliance, data_manager, transmed" -FAIR Data Point (FDP),https://www.fairdatapoint.org/,A FAIR Data Point stores metadata in a standardized and sharable way.,fairsharing:298,"rare_disease, metadata, it_support" -FAIR Evaluation Services,https://fairsharing.github.io/FAIR-Evaluator-FrontEnd/#!/#%2F!,Resources and guidelines to assess the FAIRness of digital resources.,,"compliance, data_manager, policy_officer" -FAIR Implementation Profile,https://www.go-fair.org/how-to-go-fair/fair-implementation-profile/,The FIP is a collection of FAIR implementation choices made by a community of practice for each of the FAIR Principles.,biotools:NA,"dm_coordination, dmp, researcher, data_manager" -FAIR-Wizard,https://wwwdev.ebi.ac.uk/ait/fair-wizard/home,"The FAIR wizard utilizes FAIRification resources developed by the FAIRplus project and other platforms, suggests FAIRification materials based on the FAIRification requirements, and designs FAIRification solutions for data owners, data stewards, and other people involved in FAIRification.",,"compliance, data_manager, policy_officer" -FAIRassist.org,https://fairassist.org/#!/,Help you discover resources to measure and improve FAIRness.,,"compliance, data_manager, policy_officer" -FAIRDOM-SEEK,https://seek4science.org/,"A data Management Platform for organising, sharing and publishing research datasets, models, protocols, samples, publications and other research outcomes.",biotools:seek,"storage, it_support, nels, micro_biotech, ifb, machine_actionability, plant_pheno_assembly, plant_geno_assembly" -FAIRDOMHub,https://fairdomhub.org,"Data, model and SOPs management for projects, from preliminary data to publication, support for running SBML models, etc. (public SEEK instance)",fairsharing:nnvcr9,"storage, researcher, nels, metadata, micro_biotech, machine_actionability, data_manager" -FAIRshake,https://fairshake.cloud,A System to Evaluate the FAIRness of Digital Objects,,"compliance, data_manager, it_support" -FAIRsharing,https://fairsharing.org/,"A curated, informative and educational resource on data and metadata standards, inter-related to databases and data policies.",fairsharing:2abjs5,"metadata, data_publication, policy_officer, data_manager, researcher, micro_biotech, existing_data" -FigShare,https://figshare.com/,Data publishing platform,fairsharing:drtwnh,"data_publication, biomol_sim, bioimaging_data, identifiers" -FileZilla,https://filezilla-project.org,A free FTP (FTPS and SFTP) solution with graphical interface,,"transfer, it_support" -FIP Wizard,https://fip-wizard.readthedocs.io/en/latest/,FIP Wizard is a toolset to facilitate the capture of data in FAIR Convergence Matrix questionnaire prompting communities to explicitly declare their FAIR Implementation Profiles. These profiles can be then stored and published as nanopublications.,,"dm_coordination, dmp, researcher, data_manager" -Free-IPA,https://www.freeipa.org/,FreeIPA is an integrated Identity and Authentication solution for Linux/UNIX networked environments.,,"it_support, transmed" -Freegenes,https://stanford.freegenes.org/collections/open-genes,Repository of IP-free synthetic biological parts,,micro_biotech -GA4GH Data Security Toolkit,https://www.ga4gh.org/genomic-data-toolkit/data-security-toolkit/,Principled and practical framework for the responsible sharing of genomic and health-related data.,,"data_publication, policy_officer, data_manager, it_support, human_data, sensitive" -GA4GH Genomic Data Toolkit,https://www.ga4gh.org/genomic-data-toolkit/,Open standards for genomic data sharing.,,"data_manager, it_support, human_data" -GA4GH Regulatory and Ethics toolkit,https://www.ga4gh.org/genomic-data-toolkit/regulatory-ethics-toolkit/,Framework for Responsible Sharing of Genomic and Health-Related Data,,"data_protection, sensitive, policy_officer, data_manager, it_support, human_data" -Galaxy,https://galaxyproject.org/,"Open, web-based platform for data intensive biomedical research. Whether on the free public server or your own instance, you can perform, reproduce, and share complete analyses.",biotools:galaxy,"nels, marine_assembly, data_analysis, researcher, it_support, ifb, galaxy" -GenBank,https://www.ncbi.nlm.nih.gov/genbank/,A database of genetic sequence information. GenBank may also refer to the data format used for storing information around genetic sequence data.,,micro_biotech -Gene Expression Omnibus (GEO),https://www.ncbi.nlm.nih.gov/geo/,A repository of MIAME-compliant genomics data from arrays and high-throughput sequencing,,"micro_biotech, data_publication, metadata, transfer, ome, bioimaging_data, toxicology_data" -GENEID,https://genome.crg.cat/software/geneid/index.html,Geneid is an ab initio gene finding program used to predict genes along DNA sequences in a large set of organisms.,biotools:geneid,"data_analysis, researcher" -GHS Classification,https://pubchem.ncbi.nlm.nih.gov/ghs/,"GHS (Globally Harmonized System of Classification and Labelling of Chemicals) classification was developed by the United Nations in an attempt to align standards and chemical regulations in different countries. GHS includes criteria for the classification of health, physical and environmental hazards, and what information should be included on labels of hazardous chemicals and safety data sheets.",,toxicology_data -Git,https://git-scm.com/,Distributed version control system designed to handle everything from small to very large projects,,"data_organisation, data_manager, it_support" -GitHub,https://github.com,"Versioning system, used for sharing code, as well as for sharing of small data",fairsharing-coll:bsg-d001160,"data_publication, data_organisation, it_support, data_manager" -GitLab,https://gitlab.com/gitlab-org/gitlab,"GitLab is an open source end-to-end software development platform with built-in version control, issue tracking, code review, CI/CD, and more. Self-host GitLab on your own servers, in a container, or on a cloud provider.",,"data_organisation, data_publication, it_support, data_manager" -Globus,https://www.globus.org,High-performance data transfers between systems within and across organizations,,"transfer, it_support" -GnpIS,https://urgi.versailles.inrae.fr/gnpis/,"A multispecies integrative information system dedicated to plant and fungi pests. It allows researchers to access genetic, phenotypic and genomic data. It is used by both large international projects and the French National Research Institute for Agriculture, Food and Environment.","fairsharing:dw22y3, biotools:gnpis","plant_pheno_assembly" -Google Dataset Search,https://datasetsearch.research.google.com/,Search engine for datasets,,existing_data -Google Drive,https://www.google.com/intl/en_us/drive/,Cloud Storage for Work and Home,,"storage, transfer" -GPCRmd,http://gpcrmd.org/,Repository of GPCR protein simulations,biotools:GPCRmd,"biomol_sim, data_publication" -GRAPE 2.0,https://github.com/guigolab/grape-nf,"The GRAPE pipeline provides an extensive pipeline for RNA-Seq analyses. It allows the creation of an automated and integrated workflow to manage, analyse and visualize RNA-Seq data.",biotools:grape_2.0,data_analysis -Harvard Medical School - Electronic Lab Notebooks,https://datamanagement.hms.harvard.edu/analyze/electronic-lab-notebooks,ELN Comparison Grid by Hardvard Medical School,,"metadata, identifiers, researcher, data_manager" -Haz-Map,https://haz-map.com/,Haz-Map is an occupational health database that makes available information about the adverse effects of exposures to chemical and biological agents at the workplace. These associations have been established using current scientific evidence.,,toxicology_data -How to License Research Data - DCC,https://www.dcc.ac.uk/guidance/how-guides/license-research-data,Guidelines about how to license research data from Digital Curation Centre,,"licensing, researcher, data_manager, policy_officer" -Human Protein Atlas,https://www.proteinatlas.org/,The Human Protein Atlas contains information for a large majority of all human protein-coding genes regarding the expression and localization of the corresponding proteins based on both RNA and protein data.,fairsharing:j0t0pe,proteomics -HumanMine,https://www.humanmine.org/,"HumanMine integrates many types of human data and provides a powerful query engine, export for results, analysis for lists of data and FAIR access via web services.",biotools:humanmine,"data_organisation, data_manager, researcher, human_data, data_analysis" -IBM Aspera,https://www.ibm.com/products/aspera,"With fast file transfer and streaming solutions built on the award-winning IBM FASP protocol, IBM Aspera software moves data of any size across any distance",,"transfer, it_support" -iCloud,https://www.icloud.com/,Data sharing,,"storage, data_analysis, transfer" -Identifiers.org,http://identifiers.org,The Identifiers.org Resolution Service provides consistent access to life science data using Compact Identifiers. Compact Identifiers consist of an assigned unique prefix and a local provider designated accession number (prefix:accession).,"biotools:identifiers.org, fairsharing:n14rc8","identifiers, it_support, data_manager" -iGEM Parts Registry,http://parts.igem.org/Main_Page,A collection of standard biological parts to which all entrants in the iGEM competition must submit their parts,,micro_biotech -Image Data Resource (IDR),https://idr.openmicroscopy.org,A repository of image datasets from scientific publications,fairsharing:6wf1zw,"micro_biotech, data_publication, metadata, transfer, ome, bioimaging_data" -Informed Consent Ontology,http://purl.obolibrary.org/obo/ICO.owl,The Informed Consent Ontology (ICO) is an ontology for the informed consent and informed consent process in the medical field.,fairsharing:b9znd5,"it_support, policy_officer, human_data" -International Compilation of Human Research Standards,https://www.hhs.gov/ohrp/sites/default/files/2020-international-compilation-of-human-research-standards.pdf,"The International Compilation of Human Research Standards enumerates over 1,000 laws, regulations, and guidelines (collectively referred to as standards) that govern human subject protections in 133 countries, as well as standards from a number of international and regional organizations",,human_data -International Nucleotide Sequence Database Collaboration (INSDC),http://www.insdc.org,"A collaborative database of genetic sequence datasets from DDBJ, EMBL-EBI and NCBI",,"micro_biotech, data_brokering" -International Society for the Advancement of Cytometry (ISAC),https://isac-net.org/page/Data-Standards,Data standards and formats for reporting flow cytometry data,biotools:NA,micro_biotech -International Union of Biochemistry and Molecular Biology (IUBMB),https://www.qmul.ac.uk/sbcs/iubmb/,Resource for naming standards in biochemistry and molecular biology,,micro_biotech -InterPro,https://www.ebi.ac.uk/interpro/,Functional analysis of protein sequences by classifying them into families and predicting the presence of domains and important sites,fairsharing:pda11d, -IntoGen,https://www.intogen.org/search,IntoGen collects and analyses somatic mutations in thousands of tumor genomes to identify cancer driver genes.,biotools:intogen,"data_analysis, human_data" -Intrinsically disordered proteins ontology (IDPO),https://disprot.org/ontology,Intrinsically disordered proteins ontology,,"idp, metadata" -IRIS,https://www.epa.gov/iris,The Integrated Risk Information System (IRIS) resource evaluates information on health that might arise after exposure to environmental contaminants.,,toxicology_data -iRODS,https://irods.org/,Integrated Rule-Oriented Data System (iRODS) is open source data management software for a cancer genome analysis workflow.,biotools:irods,"storage, it_support, transmed, bioimaging_data" -ISA-tools,https://isa-tools.org/,"Open source framework and tools helping to manage a diverse set of life science, environmental and biomedical experiments using the Investigation Study Assay (ISA) standard",fairsharing:53gp75,"it_support, data_manager, micro_biotech, machine_actionability" -ISA4J,https://doi.org/10.12688/f1000research.27188.1,Open source software library that can be used to generate a ISA-TAB export from in-house data sets. These comprises e.g. local database or local file system based experimental.,biotools:isa4j,"plants, machine_actionability, plant_pheno_assembly" -ISO/IEC 27001,https://en.wikipedia.org/wiki/ISO/IEC_27001,International information security standard,,"data_protection, policy_officer, human_data" -IUPAC-IUBMB Joint Commission on Biochemical Nomenclature (JCBN),https://www.qmul.ac.uk/sbcs/iupac/jcbn/,A collaborative resource from IUPAC and IUBMB for naming standards in biochemistry,,micro_biotech -JBEI-ICE,https://ice.jbei.org,A registry platform for biological parts,,micro_biotech -Jupyter,https://jupyter.org,"Jupyter notebooks allow to share code, documentation",,"it_support, data_analysis" -Keycloak,https://www.keycloak.org/,Keycloak is an open source identity and data access management solution.,,"it_support, transmed" -LimTox,http://limtox.bioinfo.cnio.es/,"The LiMTox system is a text mining approach that tries to extract associations between compounds and a particular toxicological endpoint at various levels of granularity and evidence types, all inspired by the content of toxicology reports. It integrates direct ranking of associations between compounds and hepatotoxicity through combination of heterogeneous complementary strategies from term co-mention, rules, and patterns to machine learning-based text classification. It also provides indirect associations to hepatotoxicity through the extraction of relations reflecting the effect of compounds at the level of metabolism and liver enzymes.",biotools:limtox,toxicology_data -Linked Open Vocabularies (LOV),https://lov.linkeddata.es/dataset/lov/,Web portal for finding ontologies,,"metadata, data_manager, researcher" -List of Prokaryotic names with Standing in Nomenclature (LPSN),https://lpsn.dsmz.de,A database of prokaryote specific biodiversity information,,micro_biotech -LUMI,https://www.lumi-supercomputer.eu/,EuroHPC world-class supercomputer,,"data_analysis, researcher, it_support, csc" -maDMP - Research Bridge,https://library.ust.hk/sc/machine-actionable-dmp/,Machine-Actionable Data Management Plan | Webinar (2016) on making a good data management plan.,,"dmp, it_support" -MarDB,https://mmp2.sfb.uit.no/mardb/,"MarDB includes all non-complete marine microbial genomes regardless of level of completeness. Each entry contains 120 metadata fields including information about sampling environment or host, organism and taxonomy, phenotype, pathogenicity, assembly and annotation.",biotools:mardb,"marine_assembly, data_analysis" -MarFun,https://mmp2.sfb.uit.no/marfun/,MarFun is a manually curated marine fungi genome database.,,"marine_assembly, data_analysis" -Marine metagenomics portal,https://mmp2.sfb.uit.no/,High-quality curated and freely accessible microbial genomics and metagenomics resources for the marine scientific community,biotools:mmp,marine_assembly -MarRef,https://mmp2.sfb.uit.no/marref/,"MarRef is a manually curated marine microbial reference genome database that equenced genomes. Each entry contains 120 metadata fields including information about sampling environment or host, organism and taxonomy, phenotype, pathogenicity, assembly and annotation information",biotools:marref,marine_assembly -MemProtMD,http://memprotmd.bioch.ox.ac.uk/,Database of over 5000 intrinsic membrane protein structures,,biomol_sim -Mendeley data,https://data.mendeley.com/,"Multidisciplinary, free-to-use open repository specialized for research data",fairsharing:3epmpp,"data_publication, biomol_sim" -MetabolomeXchange,http://www.metabolomexchange.org/site/,A repository of genomics data relating to the study of the metabolome,,"micro_biotech, data_publication" -Metagen-FastQC,https://github.com/alakob/Metagen-FastQC-Docker,"Cleans metagenomic reads to remove adapters, low-quality bases and host (e.g. human) contamination",,data_publication -MIADE,https://www.psidev.info/intrinsically-disordered-proteins-workgroup,Minimum Information About Disorder Experiments (MIADE) standard,,"metadata, researcher, data_manager, idp" -MIAPPE,https://www.miappe.org/,Minimum Information About a Plant Phenotyping Experiment,fairsharing:nd9ce9,"metadata, researcher, data_manager, plants, plant_geno_assembly, plant_pheno_assembly" -Microsoft Azure,https://azure.microsoft.com/en-gb/,Cloud storage and file sharing service from Microsoft,,"storage, it_support, transfer" -Microsoft OneDrive,https://www.microsoft.com/en-us/microsoft-365/onedrive/online-cloud-storage,Cloud storage and file sharing service from Microsoft,,"storage, it_support" -MIGS/MIMS,https://www.gensc.org/pages/projects/mixs-gsc-project.html,Minimum Information about a (Meta)Genome Sequence,fairsharing:va1hck,"metadata, researcher, data_manager, marine, micro_biotech" -MINT,https://mint.bio.uniroma2.it/,"MINT, the Molecular INTeraction database, focuses on experimentally verified protein-protein interactions mined from the scientific literature by expert curators",fairsharing:2bdvmk, -MIxS,https://genomicsstandardsconsortium.github.io/mixs/,Minimum Information about any (x) Sequence,fairsharing:9aa0zp,"metadata, researcher, data_manager, marine, plant_geno_assembly" -MobiDB,https://mobidb.org/,A database of protein disorder and mobility annotations,biotools:mobidb,"idp, researcher" -MoDEL,https://mmb.irbbarcelona.org/MoDEL/,Database of Protein Molecular Dynamics simulations representing different structural clusters of the PDB,"biotools:model, fairsharing:NA",biomol_sim -MoDEL Covid19,https://bioexcel-cv19.bsc.es/#/,Database of COVID-19 related atomistic Molecular Dynamic Trajectories,,biomol_sim -MoDEL-CNS,https://mmb.irbbarcelona.org/MoDEL-CNS/#/,Repository for Central Nervous System-related mainly membrane protein MD simulations,,"biomol_sim, data_publication" -ModelArchive,https://www.modelarchive.org/,Repository for theoretical models of macromolecular structures with DOIs for models,fairsharing:tpqndj,"biomol_sim, struct_bioinfo, data_publication" -MOLGENIS,https://molgenis.gitbooks.io/molgenis/content/,"Molgenis is a modular web application for scientific data. Molgenis provides researchers with user friendly and scalable software infrastructures to capture, exchange, and exploit the large amounts of data that is being produced by scientific organisations all around the world.",biotools:molgenis,"identifiers, it_support, data_manager" -MolMeDB,https://molmedb.upol.cz/,Database about interactions of molecules with membranes,"biotools:MolMeDB, fairsharing:cwzk3c",biomol_sim -MONARC,https://open-source-security-software.net/project/MONARC,A risk assessment tool that can be used to do Data Protection Impact Assessments,fairsharing:NA,"data_protection, policy_officer, human_data, transmed" -MRI2DICOM,https://github.com/szullino/XNAT-PIC,"a Magnetic Resonance Imaging (MRI) converter from ParaVision® (Bruker, Inc. Billerica, MA) file format to DICOM standard",,"researcher, data_manager, xnat-pic" -Multi-Crop Passport Descriptor (MCPD),https://www.bioversityinternational.org/e-library/publications/detail/faobioversity-multi-crop-passport-descriptors-v21-mcpd-v21/,The Multi-Crop Passport Descriptor is the metadata standard for plant genetic resources maintained ex situ by genbanks.,"biotools:NA, fairsharing:hn155r","metadata, researcher, it_support, policy_officer, plants, plant_pheno_assembly, plant_geno_assembly" -MyTARDIS,http://www.mytardis.org/,A file-system based platform handling the transfer of data,,"data_manager, transfer, bioimaging_data" -National Center for Biotechnology Information (NCBI),https://www.ncbi.nlm.nih.gov,"Online database hosting a vast amount of biotechnological information including nucleic acids, proteins, genomes and publications. Also boasts integrated tools for analysis.",,micro_biotech -NBP,https://www.cdc.gov/biomonitoring/,The National Biomonitoring Program (NBP) is a public resource that offers an assessment of nutritional status and the exposure of the U.S. population to environmental chemicals and toxic substances.,,toxicology_data -NCBI Taxonomy,https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/,NCBI's taxonomy browser is a database of biodiversity information,,micro_biotech -NCIMB,https://www.ncimb.com/culture-collection/,"Hosts information relating to strains, cultures and more",,micro_biotech -Nettskjema,https://nettskjema.no/,"Form and survey tool, also for sensitive data",,"sensitive, tsd" -Nextcloud,https://nextcloud.com,"As fully on-premises solution, Nextcloud Hub provides the benefits of online collaboration without the compliance and security risks",,"storage, it_support, transfer" -Nextflow,https://www.nextflow.io,Nextflow is a framework for data analysis workflow execution,,"it_support, data_analysis" -NMRlipids,http://nmrlipids.blogspot.com/,Repository for lipid MD simulations to validate force fields with NMR data,,"biomol_sim, data_publication" -NPDS,https://www.aapcc.org/national-poison-data-system,The National Poison Data System (NPDS) is a resource that provides poisson exposure occurring in the US and some freely associated states.,,toxicology_data -OHDSI,https://ohdsi.org/,"Multi-stakeholder, interdisciplinary collaborative to bring out the value of health data through large-scale analytics. All our solutions are open-source.",biotools:ohdsi,"researcher, data_manager, data_analysis, storage, transmed, toxicology_data" -OMERO,https://www.openmicroscopy.org,"OMERO is an open-source client-server platform for managing, visualizing and analyzing microscopy images and associated metadata","biotools:omero, fairsharing:NA","metadata, data_manager, it_support, storage, ome, bioimaging_data" -OmicsDI,https://www.omicsdi.org,"Omics Discovery Index (OmicsDI) provides a knowledge discovery framework across heterogeneous omics data (genomics, proteomics, transcriptomics and metabolomics)","biotools:omicsdi, fairsharing:re1278","existing_data, proteomics" -OMOP-CDM,https://ohdsi.github.io/CommonDataModel/,OMOP is a common data model for the harmonisation for of observational health data.,,transmed -OnotoMaton,https://github.com/ISA-tools/OntoMaton,OntoMaton facilitates ontology search and tagging functionalities within Google Spreadsheets.,,"researcher, data_manager, it_support, metadata, identifiers" -Ontobee,http://www.ontobee.org,A web portal to search and visualise ontologies,,"metadata, data_manager, researcher" -Open Definition Conformant Licenses,https://opendefinition.org/licenses/,Licenses that are conformant with the principles laid out in the Open Definition.,,"licensing, researcher, data_manager, policy_officer" -OpenAIRE Explore,https://explore.openaire.eu/search/find,Explore Open Access research outcomes from OpenAIRE network,,existing_data -OpenEBench,https://openebench.bsc.es/,ELIXIR benchmarking platform to support community-led scientific benchmarking efforts and the technical monitoring of bioinformatics reosurces,biotools:openebench,"data_analysis, data_manager, it_support" -OpenRefine,https://openrefine.org/,Data curation tool for working with messy data,,data_quality -OpenScienceFramework,https://osf.io/,"free and open source project management tool that supports the entire research lifecycle: planning, execution, reporting, archiving, and discovery",fairsharing:g4z879,"data_publication, biomol_sim" -OpenStack,https://www.openstack.org/,OpenStack is an open source cloud computing infrastructure software project and is one of the three most active open source projects in the world,,"storage, data_analysis, transmed, ifb" -Orphadata,http://www.orphadata.org/cgi-bin/index.php,"The Orphadata platform provides the scientific community with comprehensive, high-quality datasets related to rare diseases and orphan drugs, in a reusable and computable format",fairsharing-coll:bsg-d001249, -OSF,https://osf.io,"OSF (Open Science Framework) is a free, open platform to support your research and enable collaboration.",,"storage, researcher, data_manager" -OTP,https://gitlab.com/one-touch-pipeline/otp,"One Touch Pipeline (OTP) is a data management platform for running bioinformatics pipelines in a high-throughput setting, and for organising the resulting data and metadata.",,"human_data, metadata, dmp, data_analysis" -ownCloud,https://owncloud.com,Cloud storage and file sharing service,,"storage, it_support, transfer, data_analysis" -PAA,https://bioconductor.org/packages/PAA/,PAA is an R/Bioconductor tool for protein microarray data analysis aimed at biomarker discovery.,biotools:paa,"data_analysis, researcher, human_data, proteomics" -PANGAEA,https://www.pangaea.de/,Data Publisher for Earth and Environmental Science,biotools:pangaea,"data_publication, metadata, researcher, data_manager" -PCDDB,https://pcddb.cryst.bbk.ac.uk/,The Protein Circular Dichroism Data Bank,biotools:pcddb,"idp, researcher" -PDB,https://www.wwpdb.org/,The Protein Data Bank (PDB),biotools:pdb,"researcher, idp, struct_bioinfo" -PDB-Dev,https://pdb-dev.wwpdb.org/,Prototype archiving system for structural models obtained using integrative or hybrid modeling,,"biomol_sim, struct_bioinfo" -PDBx/mmCIF format and tools,https://mmcif.wwpdb.org/,"Information about the standard PDB archive format PDBx/mmCIF, its dictionaries and related software tools",fairsharing:fd28en,struct_bioinfo -PDBx/mmCIF ModelCIF Extension Dictionary,https://mmcif.wwpdb.org/dictionaries/mmcif_ma.dic/Index/,Extension of the PDBx/mmCIF dictionary for theoretical models of macromolecular structures,,struct_bioinfo -PharmGKB,https://www.pharmgkb.org/,A resource that curates knowledge about the impact of genetic variation on drug response.,biotools:pharmgkb,toxicology_data -Pharos,https://pharosproject.net/,"Pharos provides hazard, use, and exposure information on 140,872 chemicals and 180 different kinds of building products.",biotools:pharos,toxicology_data -PHIS,http://www.phis.inra.fr/,The open-source Phenotyping Hybrid Information System (PHIS) manages and collects data from plants phenotyping and high throughput phenotyping experiments on a day to day basis.,,"plant_pheno_assembly, plants, ifb" -PIA - Protein Inference Algorithms,https://github.com/mpc-bioinformatics/pia,PIA is a toolbox for mass spectrometrey based protein inference and identification analysis.,biotools:pia,"data_analysis, researcher, proteomics" -pISA-tree,https://github.com/NIB-SI/pISA-tree,"A data management solution for intra-institutional organization and structured storage of life science project-associated research data, with emphasis on the generation of adequate metadata.",biotools:pisa-tree,"micro_biotech, researcher, data_manager, data_organisation, metadata, plant_pheno_assembly, plant_geno_assembly" -PLAZA,https://bioinformatics.psb.ugent.be/plaza/,"Access point for plant comparative genomics, centralizing genomic data produced by different genome sequencing initiatives.",fairsharing:wBOua0,"plants, plant_geno_assembly, researcher" -PMut,http://mmb.irbbarcelona.org/PMut,Platform for the study of the impact of pathological mutations in protein stuctures.,biotools:pmut,"data_analysis, human_data" -PRIDE,https://www.ebi.ac.uk/pride/,PRoteomics IDEntifications (PRIDE) Archive database,fairsharing:e1byny,proteomics -ProteomeXchange,http://www.proteomexchange.org/,ProteomeXchange provides globally coordinated standard data submission and dissemination pipelines,fairsharing:92dt9d,proteomics -Proteomics Standards Initiative,https://www.psidev.info/,"The HUPO Proteomics Standards Initiative defines community standards for data representation in proteomics and interactomics to facilitate data comparison, exchange and verification.",,proteomics -protocols.io,https://www.protocols.io,A secure platform for developing and sharing reproducible methods.,,micro_biotech -PROV-DM: The PROV Data Model,https://www.w3.org/TR/prov-dm/,PROV-DM is the conceptual data model that forms a basis for the W3C provenance (PROV) family of specifications.,,data_provenance -R Markdown,https://rmarkdown.rstudio.com,"R Markdown documents are fully reproducible. Use a productive notebook interface to weave together narrative text and code to produce elegantly formatted output. Use multiple languages including R, Python, and SQL.",,"data_analysis, researcher" -RD-Connect Genome Phenome Analysis Platform,https://rd-connect.eu/what-we-do/omics/gpap/,The RD-Connect GPAP is an online tool for diagnosis and gene discovery in rare disease research.,,"researcher, human_data, rare_disease" -RDA Standards,https://rd-alliance.github.io/metadata-directory/standards/,"Directory of standard metadata, divided into different research areas",,"metadata, researcher, data_manager" -re3data,https://www.re3data.org/,Registry of Research Data Repositories,,"existing_data, data_publication" -REACH registered substances,https://echa.europa.eu/information-on-chemicals/registered-substances,"Portal with public data submitted to ECHA in REACH registration dossiers by substance manufacturers, importers, or their representatives, as laid out by the REACH Regulation (see Understanding REACH regulation).",,toxicology_data -REDCap,https://projectredcap.org,"REDCap is a secure web application for building and managing online surveys and databases. While REDCap can be used to collect virtually any type of data in any environment, it is specifically geared to support online and offline data capture for research studies and operations.",biotools:redcap,"identifiers, it_support, data_manager, data_quality" -REDIportal,http://srv00.recas.ba.infn.it/atlas/,"Database of A-to-I (deamination of adenosines to inosines) events that enables to search RNA editing sites by genomic region, gene name and other relevant features as the tissue of origin.",biotools:rediportal,epitrans -REDItools,https://github.com/BioinfoUNIBA/REDItools,Python scripts to detect RNA editing events in RNAseq experiments,biotools:reditools,epitrans -REDItools2,https://github.com/BioinfoUNIBA/REDItools2,"REDItools2 is the optimized, parallel multi-node version of REDItools.",,epitrans -REMS,https://github.com/CSCfi/rems,"REMS (Resource Entitlement Management System), developed by CSC, is a tool that can be used to manage researchers’ access rights to datasets.",biotools:rems,"it_support, transmed" -Renamer4Mac,https://renamer.com/,File renaming software for Mac,,"data_organisation, data_manager, researcher" -Repository Finder,https://repositoryfinder.datacite.org,Repository Finder can help you find an appropriate repository to deposit your research data. The tool is hosted by DataCite and queries the re3data registry of research data repositories.,,"data_publication, researcher, data_manager" -Research Data Management Organiser,https://rdmorganiser.github.io/en,"Supports the systematic planning, organisation and implementation of research data management throughout the course of a project",,"dmp, researcher, data_manager, it_support" -Research Data Management Platform (RDMP),https://www.dundee.ac.uk/hic/data-team/researchdatamanagementplatform/,"Data management platform for automated loading, storage, linkage and provision of data sets",biotools:rdmp,"storage, it_support" -Research Management Plan,https://researcheracademy.elsevier.com/research-preparation/research-data-management/creating-good-research-data-management-plan,Machine actionable DMPs.,,"dmp, researcher, data_manager" -Research Object Crate (RO-Crate),https://w3id.org/ro/crate,"RO-Crate is a lightweight approach to packaging research data with their metadata, using schema.org. An RO-Crate is a structured archive of all the items that contributed to the research outcome, including their identifiers, provenance, relations and annotations.",fairsharing:wUoZKE,"metadata, storage, data_organisation, data_manager, researcher, micro_biotech, machine_actionability, data_provenance" -Reva,https://reva.link/,Reva connects cloud storages and application providers,,"data_analysis, transfer" -Rightfield,https://rightfield.org.uk,RightField is an open-source tool for adding ontology term selection to Excel spreadsheets,,"researcher, metadata, data_manager, micro_biotech, identifiers, machine_actionability" -Rstudio,https://rstudio.com,"Rstudio notebooks allow to share code, documentation",,"data_analysis, it_support, researcher" -Rucio,https://rucio.cern.ch/,Rucio - Scientific Data Management,,"storage, data_analysis, transfer" -RxNorm,https://www.nlm.nih.gov/research/umls/rxnorm/index.html,RxNorm is a normalized naming system for medications that is maintained by the National Library of Medicine. Rxnorm provides unique identifiers and allows unambiguous communication of drug-related information across the American health computer systems.,biotools:rxnorm,toxicology_data -salDB,https://mmp.sfb.uit.no/databases/saldb/,SalDB is a salmon specific database of genome sequenced prokaryotes representing the microbiota of fishes found in the taxonomic family of Salmonidae.,,marine_assembly -SASBDB,https://www.sasbdb.org/,Small Angle Scattering Biological Data Bank,,"idp, researcher" -SBOL Visual,https://sbolstandard.org/visual-glyphs/,A standard library of visual glyphs used to represent SBOL designs and interactions.,,micro_biotech -SBOLDesigner,https://sboldesigner.github.io,A CAD tool to create SBOL designs through the use of SBOL Visual glyphs.,,micro_biotech -Schema.org,https://schema.org,"Schema.org is a collaborative, community activity with a mission to create, maintain, and promote schemas for structured data on the Internet, on web pages, in email messages, and beyond.",,"machine_actionability, it_support" -ScienceMesh,https://sciencemesh.io/,ScienceMesh - frictionless scientific collaboration and access to research services,,"storage, data_analysis, transfer" -Scientific Data's Recommended Repositories,https://www.nature.com/sdata/policies/repositories,"List of respositories recommended by Scientific Data, contains both discipline-specific and general repositories.",,"data_publication, researcher, data_manager, it_support" -SeaFile,https://www.seafile.com/,SeaFile File Synchronization and Share Solution,,"storage, transfer" -Semantic data model of the set of common data elements for rare diseases registration,https://github.com/ejp-rd-vp/CDE-semantic-model,A semantic data model describing the common data elements for rare diseases registration.,,rare_disease -Semares,https://www.genevention.com/products,"All-in-one platform for life science data management, semantic data integration, data analysis and visualization",,"researcher, data_manager, metadata, data_analysis, it_support, storage" -Set of common data elements for rare diseases registration,https://eu-rd-platform.jrc.ec.europa.eu/set-of-common-data-elements_en,"Contains 16 data elements to be registered by each rare disease registry across Europe, which are considered to be essential for further research.",,rare_disease -ShortBOL,http://shortbol.org,A scripting language for creating Synthetic Biology Open Language (SBOL) in a more abstract way.,,micro_biotech -SIFTS,https://www.ebi.ac.uk/pdbe/docs/sifts/,"Structure integration with function, taxonomy and sequence",,"researcher, idp" -Silva,https://www.arb-silva.de/,"SILVA provides comprehensive, quality checked and regularly updated datasets of aligned small (16S/18S, SSU) and large subunit (23S/28S, LSU) ribosomal RNA (rRNA) sequences for all three domains of life (Bacteria, Archaea and Eukarya).",fairsharing:5vtYGG, -Singularity,https://sylabs.io,Singularity is a container platform.,,"it_support, data_analysis, tsd" -SMASCH,https://smasch.pages.uni.lu,"SMASCH (Smart Scheduling) system, is a web-based tooldesigned for longitudinal clinical studies requiring recurrent follow-upvisits of the participants. SMASCH controls and simplifies the scheduling of big database of patients. Smasch is also used to organize the daily plannings (delegation of tasks) for the different medical professionals such as doctors, nurses and neuropsychologists.",,"data_organisation, transmed" -Snakemake,https://snakemake.github.io,Snakemake is a framework for data analysis workflow execution,biotools:snakemake,"it_support, data_analysis" -SSBD:database,https://ssbd.riken.jp/database/,Added-value database for biological dynamics images,fairsharing:we2r5a,"bioimaging_data, data_publication" -SSBD:repository,https://ssbd.riken.jp/repository/,An open data archive that stores and publishes bioimaging and biological quantitative datasets,,"bioimaging_data, data_publication" -Standards for Reporting Enzyme Data (STRENDA),https://www.beilstein-institut.de/en/projects/strenda/,Resource of standards for reporting enzyme data,,micro_biotech -STRING,https://string-db.org/,Known and predicted protein-protein interactions.,fairsharing:9b7wvk,proteomics -SynBioHub,https://synbiohub.org,A searchable design repository for biological constructs,,micro_biotech -Synthetic Biology Open Language (SBOL),https://sbolstandard.org,An open standard for the representation of in silico biological designs and their place in the Design-Build-Test-Learn cycle of synthetic biology.,,micro_biotech -Systems Biology Markup Language (SBML),http://sbml.org/Main_Page,An open format for computational models of biological processes,,micro_biotech -T3DB,http://www.t3db.ca/,"The Toxin and Toxin Target Database is a bioinformatics resource that combines exhaustive toxin data with toxin target information. Currently it presents more than 42,000 toxin-target associations extracted from other databases, government documents, books and scientific literature. Each toxin record includes data on chemical properties and descriptors, toxicity values and medical information.",biotools:t3db,toxicology_data -Talend,https://www.talend.com/,Talend is an open source data integration platform.,,"data_manager, researcher, transmed" -TG-GATES,https://toxico.nibiohn.go.jp/english/,"A toxicogenomics database that stores gene expression data and biochemistry, hematology, and histopathology findings derived from in vivo (rat) and in vitro (primary rat hepatocytes, primary human hepatocytes) exposure to 170 compounds at multiple dosages and time points.",biotools:open_tg-gates,toxicology_data -The Environment Ontology (EnvO),https://sites.google.com/site/environmentontology/,An ontology for expressing environmental terms,,micro_biotech -The European Genome-phenome Archive (EGA),https://ega-archive.org/,EGA is a service for permanent archiving and sharing of all types of personally identifiable genetic and phenotypic data resulting from biomedical research projects,"biotools:ega, fairsharing:mya1ff","data_publication, human_data, policy_officer, csc, tsd" -The Genomic Standards Consortium (GSC),https://www.gensc.org/pages/about.html ,"The Genomic Standards Consortium (GSC) is an open-membership working body enabling genomic data integration, discovery and comparison through international community-driven standards.","fairsharing:9aa0zp, biotools:NA","metadata, researcher, it_support, policy_officer, human_data" -The Open Biological and Biomedical Ontology (OBO) Foundry,http://obofoundry.org,Collaborative effort to develob interoperable ontologies for the biological sciences,fairsharing-coll:bsg-d001083,"metadata, data_manager, researcher" -Tox21_Toolbox,https://ntp.niehs.nih.gov/whatwestudy/tox21/toolbox/index.html,"The Toxicology in the 21st Century program, or Tox21, is a unique collaboration between several federal agencies to develop new ways to rapidly test whether substances adversely affect human health. The Tox21 Toolbox contains data-analysis tools for accessing and visualizing Tox21 quantitative high-throughput screening (qHTS) 10K library data, as well as integrating with other publicly available data.",,toxicology_data -ToxCast_data,https://www.epa.gov/chemical-research/exploring-toxcast-data-downloadable-data,"The Toxicology in the 21st Century program, or Tox21, is a unique collaboration between several federal agencies to develop new ways to rapidly test whether substances adversely affect human health. This portal contains diverse downloadable results of the ToxCast project.",,toxicology_data -TOXNET,https://www.nlm.nih.gov/toxnet/index.html,"The Toxicology Data Network (TOXNET) was a portal that allowed access to several relevant sources in the toxicological field. Nowadays, these sources have been integrated into other NLM resources.",,toxicology_data -tranSMART,https://github.com/transmart,"Knowledge management and high-content analysis platform enabling analysis of integrated data for the purposes of hypothesis generation, hypothesis validation, and cohort discovery in translational research.",biotools:transmart,"researcher, data_manager, data_analysis, storage, transmed" -Tryggve ELSI Checklist,https://scilifelab-data-guidelines.readthedocs.io/en/latest/docs/general/sensitive_data.html,"A list of Ethical, Legal, and Societal Implications (ELSI) to consider for research projects on human subjects",,"sensitive, policy_officer, data_manager, human_data, nels, csc, tsd, data_protection" -TU Delft data management costing tool,https://www.tudelft.nl/en/library/research-data-management/r/plan/data-management-costs,TU Delft costing tool helps to budget for data management personnel costs in proposals.,,costs -TXG-MAPr,https://txg-mapr.eu/,"A tool that contains weighted gene co-expression networks obtained from the Primary Human Hepatocytes, rat kidney, and liver TG-GATEs dataset.",,"data_analysis, researcher, toxicology_data" -UK Data Service Data Management costing Tool,https://ukdataservice.ac.uk/learning-hub/research-data-management/plan-to-share/costing/,UK Data Service activity-based costing tool.,,costs -UMLS,https://www.nlm.nih.gov/research/umls/index.html,The Unified Medical Language System (UMLS) is a set of tools that establishes a mapping structure among different vocabularies in the biomedical sciences field to enable interoperativity between computer systems.,,toxicology_data -UniChem,https://www.ebi.ac.uk/unichem/,"UniChem is a very simple, large-scale non-redundant database of pointers between chemical structures and EMBL-EBI chemistry resources. Primarily, this service has been designed to maintain cross references between EBI chemistry resources. These include primary chemistry resources (ChEMBL and ChEBI), and other resources where the main focus is not small molecules, but which may nevertheless contain some small molecule information (eg: Gene Expression Atlas, PDBe).",,toxicology_data -UniProt,https://www.uniprot.org/,Comprehensive resource for protein sequence and annotation data,fairsharing:s1ne3g,"metadata, researcher, idp, micro_biotech, proteomics, struct_bioinfo" -University of Cambridge - Electronic Research Notebook Products,https://www.data.cam.ac.uk/data-management-guide/electronic-research-notebooks/electronic-research-notebook-products,List of Electronic Research Notebook Products by University of Cambridge,,"metadata, identifiers, researcher, data_manager" -VisBOL,http://visbol.org,A JavaScript library for the visualisation of SBOL.,,micro_biotech -Wellcome Open Research - Data Guidelines,https://wellcomeopenresearch.org/for-authors/data-guidelines,"Wellcome Open Research requires that the source data underlying the results are made available as soon as an article is published. This page provides information about data you need to include, where your data can be stored, and how your data should be presented.",,"data_publication, researcher, data_manager" -WinSCP,https://winscp.net/eng/index.php,"WinSCP is a popular SFTP client and FTP client for Microsoft Windows! Copy file between a local computer and remote servers using FTP, FTPS, SCP, SFTP, WebDAV or S3 file transfer protocols.",,"transfer, it_support" -WorkflowHub,https://workflowhub.eu,"WorkflowHub is a registry for describing, sharing and publishing scientific computational workflows.",,"data_publication, data_manager, researcher" -XNAT,https://www.xnat.org/,"Open source imaging informatics platform. It facilitates common management, productivity, and quality assurance tasks for imaging and associated data.",,"researcher, data_analysis, transmed, xnat-pic, bioimaging_data" -XNAT-PIC Pipelines,https://github.com/szullino/XNAT-PIC,Analysing of single or multiple subjects within the same project in XNAT,,"researcher, data_manager, data_analysis, xnat-pic" -XNAT-PIC Uploader,https://github.com/szullino/XNAT-PIC,Import tool for multimodal DICOM image datasets to XNAT,,"researcher, data_manager, xnat-pic" -Zenodo,https://zenodo.org/,Generalist research data repository built and developed by OpenAIRE and CERN,fairsharing:wy4egf,"data_publication, biomol_sim, bioimaging_data, plant_pheno_assembly" -Zooma,https://www.ebi.ac.uk/spot/zooma/,Find possible ontology mappings for free text terms in the ZOOMA repository.,,"metadata, data_manager, researcher" -BioPortal,https://bioportal.bioontology.org,A comprehensive repository of biomedical ontologies,fairsharing:4m97ah,metadata diff --git a/_data/tool_and_resource_list.yml b/_data/tool_and_resource_list.yml index c090de17a..22eadbf93 100644 --- a/_data/tool_and_resource_list.yml +++ b/_data/tool_and_resource_list.yml @@ -1,1074 +1,772 @@ -- description: Network providing unified programmatic access to experimentally determined - and predicted structure models +- description: BRAVA is a tool designed to help developers test servers that comply with the BrAPI specifications. + id: brava + name: BRAVA + url: https://webapps.ipk-gatersleben.de/brapivalidator +- description: Plant 3D is a plant phenotyping toolkit for 3D point clouds. Plant 3D (P3D) automatically extracts common phenotyping features of interest from high-resolution 3D scans of plant architectures. + id: plant3d + name: Plant 3D + registry: + biotools: plant-3d + url: https://bio.tools/plant-3d +- description: LeafNet is a convenient tool that can robustly localize stomata and segment pavement cells for light-microscope images of leaves + id: leafnet + name: LeafNet + registry: + biotools: leafnet + url: https://bio.tools/leafnet +- description: Plant Computer Vision (PlantCV)is an image processing toolkit for plant phenotyping analysis. + id: plantcv + name: PlantCV + registry: + biotools: plantcv_v2 + url: https://bio.tools/plantcv_v2 +- description: Phenomenal-3D is an automatic open source library for 3D shoot architecture reconstruction and analysis for image-based plant phenotyping + id: phenomenal-3d + name: Phenomenal 3D + registry: + biotools: phenomenal-3d + url: https://bio.tools/phenomenal-3d +- description: C library that provides a simple interface to read whole-slide images (also known as virtual slides) + id: openslide + name: OpenSlide + url: https://openslide.org/ +- description: Image Reading, Metadata Conversion, and Image Writing for Microscopy Images in Pure Python + id: aicsimageio + name: AICSImageIO + url: https://github.com/AllenCellModeling/aicsimageio +- description: Bio-Formats is a software tool for reading and writing image data using standardized, open formats. + id: bioformats + name: Bio-Formats + registry: + biotools: bio-formats + tess: Bio-Formats + url: https://www.openmicroscopy.org/bio-formats/ +- description: Java application to convert image file formats, including .mrxs, to an intermediate Zarr structure compatible with the OME-NGFF specification. + id: bioformats2raw + name: bioformats2raw + url: https://github.com/glencoesoftware/bioformats2raw +- description: Java application to convert a directory of tiles to an OME-TIFF pyramid. This is the second half of iSyntax/.mrxs => OME-TIFF conversion. + id: raw2ometiff + name: raw2ometiff + url: https://github.com/glencoesoftware/raw2ometiff +- description: The bfconvert command line tool can be used to convert files between supported formats. + id: bfconvert + name: bfconvert + url: https://bio-formats.readthedocs.io/en/stable/users/comlinetools/conversion.html +- description: Aggregating critical information to accelerate COVID-19 drug discovery for the molecular modeling and simulation community. + id: molssi + name: MolSSI - BioExcel COVID-19 therapeutics hub + url: https://covid.bioexcel.eu/ +- description: The open source PIA software helps to carry out data protection impact assessment + id: pia + name: Privacy Impact Assessment Tool (PIA) + registry: + biotools: pia + url: https://www.cnil.fr/en/open-source-pia-software-helps-carry-out-data-protection-impact-assessment +- description: The Japanese Genotype-phenotype Archive (JGA) is a service for permanent archiving and sharing of all types of individual-level genetic and de-identified phenotypic data resulting from biomedical research projects. + id: jga + name: JGA + url: https://www.ddbj.nig.ac.jp/jga/index-e.html +- description: Network providing unified programmatic access to experimentally determined and predicted structure models + id: 3d-beacons name: 3D-Beacons registry: biotools: 3d-beacons - related_pages: - - struct_bioinfo url: https://3d-beacons.org -- description: 'Rigorous record-keeping and quality control are required to ensure - the quality, reproducibility and value of imaging data. The 4DN Initiative and - BINA have published light Microscopy Metadata Specifications that extend the OME - Data Model, scale with experimental intent and complexity, and make it possible - for scientists to create comprehensive records of imaging experiments. +- description: 'Rigorous record-keeping and quality control are required to ensure the quality, reproducibility and value of imaging data. The 4DN Initiative and BINA have published light Microscopy Metadata Specifications that extend the OME Data Model, scale with experimental intent and complexity, and make it possible for scientists to create comprehensive records of imaging experiments. - The Microscopy Metadata Specifications have been adopted by QUAREP-LiMi and are - being revised in QUAREP-LiMi in collaboration with instrument manufacturers' + The Microscopy Metadata Specifications have been adopted by QUAREP-LiMi and are being revised in QUAREP-LiMi in collaboration with instrument manufacturers' + id: 4dn-bina-ome-quarep name: 4DN-BINA-OME-QUAREP (NBO-Q) Microscopy Metadata Specifications registry: fairsharing: '4747' - related_pages: - - data_publication - - ome - - bioimaging_data url: https://www.nature.com/articles/s41592-021-01327-9 - description: A standard schema for primary biodiversity data + id: access-to-biological-collection-data-schema name: Access to Biological Collection Data Schema (ABCD) - related_pages: - - micro_biotech + registry: + biotools: NA url: https://www.tdwg.org/standards/abcd/ -- description: Ada is a performant and highly configurable system for secured integration, - visualization, and collaborative analysis of heterogeneous data sets, primarily - targeting clinical and experimental sources. +- description: Ada is a performant and highly configurable system for secured integration, visualization, and collaborative analysis of heterogeneous data sets, primarily targeting clinical and experimental sources. + id: ada-discovery-analytics name: Ada Discovery Analytics (Ada) - related_pages: - - data_analysis - - transmed url: https://ada-discovery.github.io/ - description: A searchable repository with a focus on plasmids + id: addgene name: Addgene registry: fairsharing: 8hcczk - related_pages: - - micro_biotech url: https://www.addgene.org/browse/ - description: Browser for ontologies for agricultural science based on NBCO BioPortal. + id: agroportal name: AgroPortal registry: biotools: AgroPortal fairsharing: z4xpxx - related_pages: - - plant_pheno_assembly - - plants - - metadata url: http://agroportal.lirmm.fr/ - description: Amazon Web Services + id: amazon-web-services name: Amazon Web Services registry: tess: Amazon Web Services - related_pages: - - storage - - data_analysis - - transfer url: https://aws.amazon.com/ - description: Amnesia is a GDPR compliant high accuracy data anonymization tool + id: amnesia name: Amnesia - related_pages: - - sensitive url: https://amnesia.openaire.eu/ -- description: AOP4EUpest web server is devoted to the identification of pesticides - involved in an Adverse Outcome Pathway via text mining approaches. +- description: AOP4EUpest web server is devoted to the identification of pesticides involved in an Adverse Outcome Pathway via text mining approaches. + id: aop4eupest name: AOP4EUpest registry: biotools: aop4eupest - related_pages: - - toxicology_data url: http://www.biomedicale.parisdescartes.fr/aop4EUpest/home.php -- description: APID (Agile Protein Interactomes DataServer) is a server that provides - a comprehensive collection of protein interactomes for more than 400 organisms - based in the integration of known experimentally validated protein-protein physical - interactions (PPIs) +- description: APID (Agile Protein Interactomes DataServer) is a server that provides a comprehensive collection of protein interactomes for more than 400 organisms based in the integration of known experimentally validated protein-protein physical interactions (PPIs) + id: apid-interactomes name: APID Interactomes registry: biotools: apid - related_pages: - - idp url: http://apid.dep.usal.es/ -- description: Plan and follow your data. Bring your Data Management Plans closer - to where data are generated, analysed and stored. +- description: Plan and follow your data. Bring your Data Management Plans closer to where data are generated, analysed and stored. + id: argos name: Argos - related_pages: - - dmp - - researcher - - data_manager url: https://argos.openaire.eu/splash/ - description: A repository of array based genomics data + id: arrayexpress name: ArrayExpress registry: biotools: arrayexpress fairsharing: 6k0kwd tess: ArrayExpress - related_pages: - - micro_biotech - - data_publication url: https://www.ebi.ac.uk/arrayexpress/ -- description: With Arvados, bioinformaticians run and scale compute-intensive workflows, - developers create biomedical applications, and IT administrators manage large - compute and storage resources. +- description: With Arvados, bioinformaticians run and scale compute-intensive workflows, developers create biomedical applications, and IT administrators manage large compute and storage resources. + id: arvados name: Arvados - related_pages: - - it_support - - policy_officer - - researcher - - data_analysis url: https://arvados.org -- description: Biological materials resource including cell-lines, strains and genomics - tools +- description: Biological materials resource including cell-lines, strains and genomics tools + id: atcc name: ATCC registry: biotools: atcc - related_pages: - - micro_biotech url: https://www.lgcstandards-atcc.org/en.aspx# -- description: Free, publicly available web-based, open-source software application - developed by the OHDSI community to support the design and execution of observational - analyses to generate real world evidence from patient level observational data. +- description: Free, publicly available web-based, open-source software application developed by the OHDSI community to support the design and execution of observational analyses to generate real world evidence from patient level observational data. + id: atlas name: Atlas registry: biotools: atlas + fairsharing: NA tess: Atlas - related_pages: - - data_manager - - researcher - - transmed url: https://github.com/OHDSI/Atlas/wiki -- description: Store and publish your research data. Can be used to bridge between - domains +- description: Store and publish your research data. Can be used to bridge between domains + id: b2share name: b2share registry: fairsharing: da9307 - related_pages: - - storage - - data_publication - - bioimaging_data url: https://b2share.eudat.eu/ - description: A searchable database for bacteria specific information + id: bacdive name: BacDive registry: biotools: bacdive tess: BacDive - related_pages: - - micro_biotech url: https://bacdive.dsmz.de - description: A repository specific to Bacillus strains + id: bacillus-genetic-stock-center name: Bacillus Genetic Stock Center (BGSC) - related_pages: - - micro_biotech + registry: + biotools: NA url: http://www.bgsc.org/ - description: A search engine for academic web resources + id: base name: BASE registry: biotools: base tess: BASE - related_pages: - - existing_data url: https://www.base-search.net/ -- description: The ELSI Knowledge Base is an open-access resource platform that aims - at providing practical know-how for responsible research. +- description: The ELSI Knowledge Base is an open-access resource platform that aims at providing practical know-how for responsible research. + id: bbmri-eric-s-elsi-knowledge-base name: BBMRI-ERIC's ELSI Knowledge Base - related_pages: - - data_protection - - sensitive - - policy_officer - - data_manager - - human_data url: https://www.bbmri-eric.eu/elsi/knowledge-base/ - description: The Beacon protocol defines an open standard for genomics data discovery. + id: beacon name: Beacon registry: biotools: ga4gh_beacon fairsharing: 6fba91 tess: Beacon - related_pages: - - researcher - - data_manager - - it_support - - human_data url: https://beacon-project.io/ - description: R&D Platform for Life Sciences + id: benchling name: Benchling - related_pages: - - micro_biotech url: https://www.benchling.com -- description: BIAFLOWS is an open-soure web framework to reproducibly deploy and - benchmark bioimage analysis workflows +- description: BIAFLOWS is an open-soure web framework to reproducibly deploy and benchmark bioimage analysis workflows + id: biaflows name: BIAFLOWS registry: biotools: biaflows - related_pages: - - data_analysis url: https://biaflows.neubias.org/ - description: Repository for Nucleic Acids MD simulations + id: bignasim name: BigNASim registry: biotools: bignasim - related_pages: - - biomol_sim - - data_publication url: https://mmb.irbbarcelona.org/BigNASim/ -- description: The BioImage Informatics Index is a registry of software tools, image - databases for benchmarking, and training materials for bioimage analysis +- description: The BioImage Informatics Index is a registry of software tools, image databases for benchmarking, and training materials for bioimage analysis + id: biii name: BIII registry: biotools: BISE - related_pages: - - it_support - - data_analysis url: https://biii.eu/ - description: Public, web-accessible database of measured binding affinities + id: bindingdb name: BindingDB registry: biotools: bindingdb fairsharing: 3b36hk - related_pages: - - biomol_sim url: https://www.bindingdb.org/ -- description: Bio-Formats is a software tool for reading and writing image data using - standardized, open formats +- description: Bio-Formats is a software tool for reading and writing image data using standardized, open formats + id: bio-formats name: Bio-Formats registry: biotools: bio-formats tess: Bio-Formats - related_pages: - - ome - - bioimaging_data url: https://www.openmicroscopy.org/bio-formats/ -- description: Platform designed to efficiently generate bioactive conformers and - speed up the drug discovery process. +- description: Platform designed to efficiently generate bioactive conformers and speed up the drug discovery process. + id: bioactive-conformational-ensemble name: Bioactive Conformational Ensemble registry: biotools: bce - related_pages: - - biomol_sim url: https://mmb.irbbarcelona.org/BCE/ - description: Bioconda is a bioinformatics channel for the Conda package manager + id: bioconda name: Bioconda registry: biotools: bioconda tess: Bioconda - related_pages: - - it_support - - data_analysis url: https://bioconda.github.io/ -- description: Biodiversity Information Standards (TDWG), historically the Taxonomic - Databases Working Group, work to develop biodiversity information standards +- description: Biodiversity Information Standards (TDWG), historically the Taxonomic Databases Working Group, work to develop biodiversity information standards + id: biodiversity-information-standards name: Biodiversity Information Standards (TDWG) - related_pages: - - micro_biotech url: https://www.tdwg.org -- description: Platform designed to provide web-access to atomistic-MD trajectories - for macromolecules involved in the COVID-19 disease. +- description: Platform designed to provide web-access to atomistic-MD trajectories for macromolecules involved in the COVID-19 disease. + id: bioexcel-covid-19 name: BioExcel COVID-19 - related_pages: - - biomol_sim url: https://bioexcel-cv19.bsc.es/#/ -- description: The BioImage Archive stores and distributes biological images that - are useful to life-science researchers. +- description: The BioImage Archive stores and distributes biological images that are useful to life-science researchers. + id: bioimagearchive name: BioImageArchive registry: fairsharing: x38D2k - related_pages: - - data_publication - - bioimaging_data url: https://www.ebi.ac.uk/bioimage-archive/ - description: A repository of mathematical models for application in biological sciences + id: biomodels name: BioModels registry: biotools: biomodels fairsharing: paz6mh tess: BioModels - related_pages: - - micro_biotech - - data_publication url: https://www.ebi.ac.uk/biomodels/ -- description: BIONDA is a free and open-access biomarker database, which employs - various text mining methods to extract structured information on biomarkers from - abstracts of scientific publications +- description: BIONDA is a free and open-access biomarker database, which employs various text mining methods to extract structured information on biomarkers from abstracts of scientific publications + id: bionda name: BIONDA registry: biotools: bionda - related_pages: - - storage - - researcher - - human_data - - proteomics url: http://bionda.mpc.ruhr-uni-bochum.de/start.php -- description: BioSamples stores and supplies descriptions and metadata about biological - samples used in research and development by academia and industry. +- description: BioSamples stores and supplies descriptions and metadata about biological samples used in research and development by academia and industry. + id: biosamples name: BioSamples registry: biotools: biosamples fairsharing: ewjdq6 tess: BioSamples - related_pages: - - plants - - plant_geno_assembly - - plant_pheno_assembly url: https://www.ebi.ac.uk/biosamples/ -- description: Bioschemas aims to improve the Findability on the Web of life sciences - resources such as datasets, software, and training materials +- description: Bioschemas aims to improve the Findability on the Web of life sciences resources such as datasets, software, and training materials + id: bioschemas name: Bioschemas registry: fairsharing: f3a3ca tess: Bioschemas - related_pages: - - machine_actionability - - it_support url: https://bioschemas.org -- description: A database hosting datasets from biological studies. Useful for storing - or accessing data that is not compliant for mainstream repositories. +- description: A database hosting datasets from biological studies. Useful for storing or accessing data that is not compliant for mainstream repositories. + id: biostudies name: BioStudies registry: biotools: biostudies fairsharing: mtjvme tess: BioStudies - related_pages: - - micro_biotech - - plants - - data_publication url: https://www.ebi.ac.uk/biostudies/ - description: Resource for management and analysis of 5D biological images + id: bisque name: BisQue registry: biotools: bisque - related_pages: - - data_organisation - - data_manager - - data_analysis - - bioimaging_data url: https://bioimage.ucsb.edu/bisque - description: Git based code hosting and collaboration tool, built for teams. + id: bitbucket name: Bitbucket registry: fairsharing: fc3431 - related_pages: - - data_organisation - - data_manager - - it_support url: https://bitbucket.org/ - description: Biological Magnetic Resonance Data Bank + id: bmrb name: BMRB registry: biotools: bmrb - related_pages: - - idp - - researcher url: https://bmrb.io/ -- description: BoostDM is a method to score all possible point mutations (single base - substitutions) in cancer genes for their potential to be involved in tumorigenesis. +- description: BoostDM is a method to score all possible point mutations (single base substitutions) in cancer genes for their potential to be involved in tumorigenesis. + id: boostdm name: BoostDM registry: biotools: boostdm - related_pages: - - data_analysis - - human_data url: https://www.intogen.org/boostdm/search - description: Cloud storage and file sharing service + id: box name: Box registry: tess: Box - related_pages: - - storage - - it_support - - transfer url: https://www.box.com -- description: 'Specification for a standard API for plant data: plant material, plant - phenotyping data' +- description: 'Specification for a standard API for plant data: plant material, plant phenotyping data' + id: brapi name: BrAPI registry: tess: BrAPI - related_pages: - - it_support - - plants - - plant_pheno_assembly url: https://www.brapi.org -- description: Database of enzyme and enzyme-ligand information, across all taxonomic - groups, manually extracted from primary literature and extended by text mining - procedures +- description: Database of enzyme and enzyme-ligand information, across all taxonomic groups, manually extracted from primary literature and extended by text mining procedures + id: brenda name: BRENDA registry: biotools: brenda fairsharing: etp533 tess: BRENDA - related_pages: - - micro_biotech url: https://www.brenda-enzymes.org/ - description: File renaming software for Windows + id: bulk-rename-utility name: Bulk Rename Utility - related_pages: - - data_organisation - - data_manager - - researcher url: https://www.bulkrenameutility.co.uk/ -- description: A highly useful and flexible tool for calibration of targeted MS?based - measurements. CalibraCurve enables an automated batch-mode determination of dynamic - linear ranges and quantification limits for both targeted proteomics and similar - assays. The software uses a variety of measures to assess the accuracy of the - calibration and provides intuitive visualizations. - name: CalibraCurve - registry: - biotools: calibracurve - related_pages: - - data_analysis - - proteomics - url: https://github.com/mpc-bioinformatics/CalibraCurve -- description: Continuous evaluation of the accuracy and reliability of protein structure - prediction methods in a fully automated manner +- description: Open source tandem mass spectrometry (MS/MS) sequence database search tool. + id: comet + name: Comet + registry: + biotools: comet + url: https://uwpr.github.io/Comet/ +- description: Continuous evaluation of the accuracy and reliability of protein structure prediction methods in a fully automated manner + id: cameo name: CAMEO registry: biotools: cameo fairsharing: dq34p2 - related_pages: - - struct_bioinfo url: https://cameo3d.org -- description: Cancer Genome Interpreter (CGI) is designed to support the identification - of tumor alterations that drive the disease and detect those that may be therapeutically - actionable. +- description: Cancer Genome Interpreter (CGI) is designed to support the identification of tumor alterations that drive the disease and detect those that may be therapeutically actionable. + id: cancer-genome-interpreter name: Cancer Genome Interpreter registry: biotools: cgi - related_pages: - - data_analysis - - human_data url: https://www.cancergenomeinterpreter.org/home -- description: Critical assessment of structure prediction methods for protein-protein - interactions +- description: Critical assessment of structure prediction methods for protein-protein interactions + id: capri name: CAPRI - related_pages: - - struct_bioinfo url: https://www.ebi.ac.uk/pdbe/complex-pred/capri/ -- description: The CAS Registry (Chemical Abstracts Service Registry) includes more - than 188 million unique chemicals. CAS Registry Numbers are broadly used as a - unique identifier for chemical substances. The Registry is maintained by CAS, - a subdivision of the American Chemical Society. +- description: The CAS Registry (Chemical Abstracts Service Registry) includes more than 188 million unique chemicals. CAS Registry Numbers are broadly used as a unique identifier for chemical substances. The Registry is maintained by CAS, a subdivision of the American Chemical Society. + id: cas-registry name: CAS Registry registry: fairsharing: r7Kwy7 - related_pages: - - toxicology_data url: https://www.cas.org/cas-data/cas-registry - description: Biennial critical assessment of techniques for protein structure prediction + id: casp name: CASP registry: biotools: casp tess: CASP - related_pages: - - struct_bioinfo url: https://predictioncenter.org -- description: Castor is an EDC system for researchers and institutions. With Castor, - you can create and customize your own database in no time. Without any prior technical - knowledge, you can build a study in just a few clicks using our intuitive Form - Builder. Simply define your data points and start collecting high quality data, - all you need is a web browser. +- description: Castor is an EDC system for researchers and institutions. With Castor, you can create and customize your own database in no time. Without any prior technical knowledge, you can build a study in just a few clicks using our intuitive Form Builder. Simply define your data points and start collecting high quality data, all you need is a web browser. + id: castor name: Castor registry: biotools: castor - related_pages: - - identifiers - - it_support - - data_manager + fairsharing: NA url: https://www.castoredc.com -- description: A hierarchical domain classification of protein structures in the Protein - Data Bank. +- description: A hierarchical domain classification of protein structures in the Protein Data Bank. + id: cath name: CATH registry: biotools: cath fairsharing: xgcyyn tess: CATH url: http://www.cathdb.info/ -- description: CEDAR is making data submission smarter and faster, so that scientific - researchers and analysts can create and use better metadata. +- description: CEDAR is making data submission smarter and faster, so that scientific researchers and analysts can create and use better metadata. + id: cedar name: CEDAR registry: biotools: cedar fairsharing: pmygc7 - related_pages: - - metadata - - machine_actionability - - researcher - - data_manager url: https://metadatacenter.org +- description: Image analysis software + id: cellprofiler + name: CellProfiler + registry: + biotools: cellprofiler + tess: CellProfiler + url: https://cellprofiler.org/ +- description: ilastik is a user-friendly tool for interactive image classification, segmentation and analysis + id: ilastik + name: ilastik + registry: + biotools: ilastik + url: https://www.ilastik.org/ - description: A version management tool for modifying strains + id: cellrepo name: CellRepo - related_pages: - - micro_biotech url: https://cellrepo.herokuapp.com/ -- description: An ontology for expressing cellular (or multi-cellular) terms with - applications in microscopy +- description: An ontology for expressing cellular (or multi-cellular) terms with applications in microscopy + id: cellular-microscopy-phenotype-ontology name: Cellular Microscopy Phenotype Ontology (CMPO) registry: tess: Cellular Microscopy Phenotype Ontology (CMPO) - related_pages: - - micro_biotech url: https://www.ebi.ac.uk/cmpo/ - description: CERNBox cloud data storage, sharing and synchronization + id: cernbox name: CERNBox - related_pages: - - storage url: https://cernbox.web.cern.ch/cernbox/ - description: Dictionary of molecular entities focused on 'small' chemical compounds + id: chebi name: ChEBI registry: biotools: chebi fairsharing: 62qk8w tess: ChEBI - related_pages: - - micro_biotech url: https://www.ebi.ac.uk/chebi/ -- description: Database of bioactive drug-like small molecules, it contains 2-D structures, - calculated properties and abstracted bioactivities. +- description: Database of bioactive drug-like small molecules, it contains 2-D structures, calculated properties and abstracted bioactivities. + id: chembl name: ChEMBL registry: biotools: chembl fairsharing: m3jtpg tess: ChEMBL - related_pages: - - data_analysis - - researcher - - toxicology_data url: https://www.ebi.ac.uk/chembl/ -- description: ChIPSummitDB is a database of transcription factor binding sites and - the distances of the binding sites relative to the peak summits. +- description: ChIPSummitDB is a database of transcription factor binding sites and the distances of the binding sites relative to the peak summits. + id: chipsummitdb name: ChIPSummitDB registry: biotools: chipsummitdb - related_pages: - - human_data url: http://summit.med.unideb.hu/summitdb/ - description: Choose an open source license + id: choose-a-license name: Choose a license - related_pages: - - licensing - - researcher - - data_manager - - policy_officer url: https://choosealicense.com -- description: ClinicalTrials.gov is a resource depending on the National Library - of medicine which makes available private and public-funded clinical trials. +- description: ClinicalTrials.gov is a resource depending on the National Library of medicine which makes available private and public-funded clinical trials. + id: clinicaltrials-gov name: ClinicalTrials.gov registry: fairsharing: mewhad - related_pages: - - toxicology_data url: https://clinicaltrials.gov/ -- description: A collection of software applications which enables creation, storing - and publishing of "Common Data Elements" according to the CDE semantic model. +- description: A collection of software applications which enables creation, storing and publishing of Common Data Elements according to the CDE semantic model. + id: common-data-elements-in-a-box name: Common Data Elements (CDE) in a box registry: biotools: cde - related_pages: - - rare_disease url: https://github.com/ejp-rd-vp/cde-in-box -- description: An open standard for describing workflows that are build from command - line tools +- description: An open standard for describing workflows that are build from command line tools + id: common-workflow-language name: Common Workflow Language (CWL) registry: fairsharing: 8y5ayx tess: Common Workflow Language (CWL) - related_pages: - - it_support - - researcher - - data_analysis url: https://www.commonwl.org -- description: The CompTox Chemicals Dashboard provides toxicological information - for over 800.000 chemical compounds. It is a part of a suite of databases and - web applications developed by the US Environmental Protection Agency's Chemical - Safety for Sustainability Research Program. These databases and apps support EPA's - computational toxicology research efforts to develop innovative methods to change - how chemicals are currently evaluated for potential health risks. +- description: The CompTox Chemicals Dashboard provides toxicological information for over 800.000 chemical compounds. It is a part of a suite of databases and web applications developed by the US Environmental Protection Agency's Chemical Safety for Sustainability Research Program. These databases and apps support EPA's computational toxicology research efforts to develop innovative methods to change how chemicals are currently evaluated for potential health risks. + id: comptox name: Comptox registry: biotools: comptox_chemistry_dashboard fairsharing: tfj7gt - related_pages: - - toxicology_data url: https://comptox.epa.gov/dashboard -- description: An initiative to bring together various formats and standard for computational - models in biology +- description: An initiative to bring together various formats and standard for computational models in biology + id: computational-modeling-in-biology-network name: COmputational Modeling in BIology NEtwork (COMBINE) - related_pages: - - micro_biotech url: http://co.mbine.org - description: Open source package management system + id: conda name: Conda registry: tess: Conda - related_pages: - - it_support - - data_analysis url: https://docs.conda.io/en/latest/ -- description: A resource for researchers when drafting consent forms so they can - use language matching cutting-edge GA4GH international standards +- description: A resource for researchers when drafting consent forms so they can use language matching cutting-edge GA4GH international standards + id: consent-clauses-for-genomic-research name: Consent Clauses for Genomic Research - related_pages: - - human_data url: https://drive.google.com/file/d/1O5Ti7g7QJqS3h0ABm-LyTe02Gtq8wlKM/view?usp=sharing -- description: A command-line utility that creates projects from cookiecutters (project - templates), e.g. creating a Python package project from a Python package project - template. +- description: A command-line utility that creates projects from cookiecutters (project templates), e.g. creating a Python package project from a Python package project template. + id: cookiecutter name: Cookiecutter - related_pages: - - data_organisation - - it_support - - data_manager url: https://github.com/cookiecutter/cookiecutter -- description: Portal for scientists to broker more easily rich metadata alongside - data to public repos. +- description: Portal for scientists to broker more easily rich metadata alongside data to public repos. + id: copo name: COPO registry: biotools: copo fairsharing-coll: bsg-d001247 - related_pages: - - metadata - - researcher - - plants - - machine_actionability - - plant_pheno_assembly - - plant_geno_assembly url: https://copo-project.org/ -- description: An assembly of molecular interaction diagrams, established based on - literature evidence +- description: An assembly of molecular interaction diagrams, established based on literature evidence + id: covid-19-disease-map name: COVID-19 Disease Map url: https://fairdomhub.org/projects/190 -- description: COVID-19 Molecular Structure and Therapeutics Hub - name: COVID-19 Molecular Structure and Therapeutics Hub - related_pages: - - biomol_sim - url: https://covid.bioexcel.eu/ -- description: Examples and tools to create a codebook by the Data Documentation Initiative - (DDI) +- description: Examples and tools to create a codebook by the Data Documentation Initiative (DDI) + id: create-a-codebook name: Create a Codebook - related_pages: - - metadata - - researcher - - data_manager url: https://ddialliance.org/training/getting-started-new-content/create-a-codebook - description: It helps you choose the right Creative Commons license for your needs. + id: creative-commons-license-chooser name: Creative Commons License Chooser - related_pages: - - licensing - - researcher - - data_manager - - policy_officer url: https://creativecommons.org/choose/ -- description: The Crop Ontology compiles concepts to curate phenotyping assays on - crop plants, including anatomy, structure and phenotype. +- description: The Crop Ontology compiles concepts to curate phenotyping assays on crop plants, including anatomy, structure and phenotype. + id: crop-ontology name: Crop Ontology registry: fairsharing: wgfrmg tess: Crop Ontology - related_pages: - - researcher - - data_manager - - it_support - - plants - - plant_pheno_assembly url: https://www.cropontology.org -- description: A Python tool to encrypt, decrypt or re-encrypt files, according to - the GA4GH encryption file format. +- description: A Python tool to encrypt, decrypt or re-encrypt files, according to the GA4GH encryption file format. + id: crypt4gh name: Crypt4GH registry: tess: Crypt4GH - related_pages: - - human_data url: https://crypt4gh.readthedocs.io/en/latest/ - description: Cloud Storage Services for Synchronization and Sharing (CS3) + id: cs3 name: CS3 - related_pages: - - storage url: https://www.cs3community.org/ -- description: A database that aims to advance understanding about how environmental - exposures affect human health. +- description: A database that aims to advance understanding about how environmental exposures affect human health. + id: ctd name: CTD registry: biotools: ctd - related_pages: - - toxicology_data url: http://ctdbase.org/ - description: Command line tool and library for transferring data with URLs + id: curl name: cURL - related_pages: - - transfer - - it_support url: https://curl.se - description: Image Data management + id: cytomine-ims name: Cytomine-IMS - related_pages: - - data_manager - - bioimaging_data url: https://github.com/cytomine/Cytomine-IMS -- description: Data Information System to keep sensitive data inventory and meet GDPR - accountability requirement. +- description: Data Information System to keep sensitive data inventory and meet GDPR accountability requirement. + id: daisy name: DAISY registry: biotools: Data_Information_System_DAISY tess: DAISY - related_pages: - - it_support - - policy_officer - - human_data - - data_protection - - transmed url: https://daisy-demo.elixir-luxembourg.org -- description: It guides you step by step through a DMP and lets you export a pre-filled - DMP as a Word document that you can customize and use for submission to funders. - Also, DAMAP is compatible with the RDA recommendation for machine-actionable DMPs - and offers an export of JSON DMPs. DAMAP is open source and to be self deployed. +- description: It guides you step by step through a DMP and lets you export a pre-filled DMP as a Word document that you can customize and use for submission to funders. Also, DAMAP is compatible with the RDA recommendation for machine-actionable DMPs and offers an export of JSON DMPs. DAMAP is open source and to be self deployed. + id: damap name: DAMAP - related_pages: - - dmp - - researcher - - data_manager url: https://damap.org/ -- description: Unique collection of project-level metadata from large research initiatives - in a diverse range of fields, including clinical, molecular and observational - studies. Its aim is to improve the findability of these projects following FAIR - data principles. +- description: Unique collection of project-level metadata from large research initiatives in a diverse range of fields, including clinical, molecular and observational studies. Its aim is to improve the findability of these projects following FAIR data principles. + id: data-catalog name: Data Catalog - related_pages: - - metadata - - transmed + registry: + fairsharing: NA url: https://datacatalog.elixir-luxembourg.org/ -- description: DCAT is an RDF vocabulary designed to facilitate interoperability between - data catalogs published on the Web. +- description: DCAT is an RDF vocabulary designed to facilitate interoperability between data catalogs published on the Web. + id: data-catalog-vocabulary name: Data Catalog Vocabulary (DCAT) - related_pages: - - machine_actionability - - it_support - - rare_disease url: https://www.w3.org/TR/vocab-dcat-2/ - description: List of metadata standards + id: data-curation-centre-metadata-list name: Data Curation Centre Metadata list - related_pages: - - metadata - - researcher - - data_manager url: https://www.dcc.ac.uk/guidance/standards/metadata/list -- description: Publicly available online tool for composing smart data management - plans +- description: Publicly available online tool for composing smart data management plans + id: data-stewardship-wizard name: Data Stewardship Wizard registry: biotools: Data_Stewardship_Wizard tess: Data Stewardship Wizard - related_pages: - - dmp - - researcher - - data_manager - - it_support - - nels - - tsd - - plant_pheno_assembly - - plant_geno_assembly url: https://ds-wizard.org/ -- description: This service provides simple estimation of storage costs based on desired - properties and local/actual configuration. +- description: This service provides simple estimation of storage costs based on desired properties and local/actual configuration. + id: data-stewardship-wizard-storage-costs-evaluator name: Data Stewardship Wizard Storage Costs Evaluator - related_pages: - - costs url: https://storage-costs-evaluator.ds-wizard.org/ -- description: DUO allows to semantically tag datasets with restriction about their - usage. - name: Data Use Ontology +- description: DUO allows to semantically tag datasets with restriction about their usage. + id: data-use-ontology + name: Data Use Ontology (DUO) registry: fairsharing: 5dnjs2 - tess: Data Use Ontology - related_pages: - - data_manager - - researcher - - human_data + tess: Data Use Ontology (DUO) url: https://github.com/EBISPOT/DUO - description: Overview of typical licenses used for data resources + id: data-world-data-license-list name: data.world Data License list - related_pages: - - licensing - - biomol_sim url: https://help.data.world/hc/en-us/articles/115006114287-Common-license-types-for-datasets -- description: A search engine for the complete collection of publicly available DataCite - DOIs +- description: A search engine for the complete collection of publicly available DataCite DOIs + id: datacite name: DataCite registry: fairsharing: yknezb - related_pages: - - existing_data url: https://search.datacite.org/ - description: Open source research data respository software. + id: dataverse name: DATAVERSE registry: + fairsharing: NA tess: DATAVERSE - related_pages: - - storage - - researcher - - data_manager - - it_support - - ifb url: https://dataverse.org/ -- description: The Data Agreement Wizard is a tool developed by ELIXIR-Luxembourg - to facilitate data sharing agreements. - name: DAWID - related_pages: - - data_protection - - policy_officer - - human_data +- description: The Data Agreement Wizard is a tool developed by ELIXIR-Luxembourg to facilitate data sharing agreements. + id: dawid + name: Data Agreement Wizard (DAWID) url: https://dawid.elixir-luxembourg.org/ -- description: The database of Genotypes and Phenotypes (dbGaP) archives and distributes - data from studies investigating the interaction of genotype and phenotype in Humans +- description: The database of Genotypes and Phenotypes (dbGaP) archives and distributes data from studies investigating the interaction of genotype and phenotype in Humans + id: dbgap name: dbGAP registry: biotools: dbgap fairsharing: 88v2k0 tess: dbGAP - related_pages: - - data_publication - - researcher - - it_support - - human_data url: https://www.ncbi.nlm.nih.gov/gap/ -- description: A discovery platform containing collections of genes and variants associated - to human diseases. +- description: A discovery platform containing collections of genes and variants associated to human diseases. + id: disgenet name: DisGeNET registry: biotools: disgenet fairsharing: fssydn tess: DisGeNET - related_pages: - - data_analysis - - human_data - - researcher - - toxicology_data url: https://www.disgenet.org/ - description: A database of intrinsically disordered proteins + id: disprot name: DisProt registry: biotools: disprot fairsharing: dt9z89 tess: DisProt - related_pages: - - idp - - researcher url: https://disprot.org/ - description: Questionnaire, which generates a pre-filled a DMP + id: dmp-canvas-generator name: DMP Canvas Generator - related_pages: - - dmp - - researcher - - data_manager url: https://dmp.vital-it.ch -- description: Semi-automatically generated, searchable catalogue of resources that - are relevant to data management plans. +- description: Semi-automatically generated, searchable catalogue of resources that are relevant to data management plans. + id: dmplanner name: DMPlanner - related_pages: - - dmp - - researcher - - data_manager url: https://dmplanner.athenarc.gr/ - description: DMP Roadmap is a Data Management Planning tool + id: dmproadmap name: DMPRoadmap - related_pages: - - dmp - - researcher - - data_manager url: https://github.com/DMPRoadmap/roadmap - description: Build your Data Management Plan + id: dmptool name: DMPTool - related_pages: - - dmp - - researcher - - data_manager url: https://dmptool.org - description: A database of DNA sequences + id: dna-data-bank-of-japan name: DNA Data Bank of Japan (DDBJ) registry: biotools: ddbj - related_pages: - - micro_biotech url: https://www.ddbj.nig.ac.jp/index-e.html -- description: Docker is a software for the execution of applications in virtualized - environments called containers. It is linked to DockerHub, a library for sharing - container images +- description: Docker is a software for the execution of applications in virtualized environments called containers. It is linked to DockerHub, a library for sharing container images + id: docker name: Docker registry: fairsharing: afc2b3 fairsharing-coll: bsg-d001254 tess: Docker - related_pages: - - it_support - - data_analysis url: https://www.docker.com/ -- description: A DSW knowledge model guiding users through a set of questions to collect - information necessary for a research project Data Protection Impact Assessment - (DPIA). +- description: A DSW knowledge model guiding users through a set of questions to collect information necessary for a research project Data Protection Impact Assessment (DPIA). + id: dpia-knowledge-model name: DPIA Knowledge Model - related_pages: - - data_protection - - policy_officer - - human_data url: https://converge.ds-wizard.org/knowledge-models/elixir.lu:dpia-research:0.1.0 - description: Cloud storage and file sharing service + id: dropbox name: Dropbox - related_pages: - - storage - - it_support - - transfer url: https://www.dropbox.com/?landing=dbv2 -- description: A toxicogenomic resource that provides access to the gene expression - profiles of over 600 different compounds in several cell types from rats and primary - rat hepatocytes. +- description: A toxicogenomic resource that provides access to the gene expression profiles of over 600 different compounds in several cell types from rats and primary rat hepatocytes. + id: drug-matrix name: Drug Matrix - related_pages: - - toxicology_data url: https://ntp.niehs.nih.gov/data/drugmatrix/ -- description: Open-source, community-led data curation, publishing, and preservation - platform for CC0 publicly available research data +- description: Open-source, community-led data curation, publishing, and preservation platform for CC0 publicly available research data + id: dryad name: Dryad registry: fairsharing: wkggtx - related_pages: - - data_publication - - biomol_sim - - bioimaging_data url: https://datadryad.org/ -- description: Database of folding / unfolding pathway of representatives from all - known protein folds by MD simulation +- description: Database of folding / unfolding pathway of representatives from all known protein folds by MD simulation + id: dynameomics name: Dynameomics - related_pages: - - biomol_sim url: http://www.dynameomics.org/ -- description: Electronic data archive library is a framework for publishing and sharing - research data +- description: Electronic data archive library is a framework for publishing and sharing research data + id: e-dal name: e!DAL registry: biotools: edal - related_pages: - - storage - - it_support url: https://edal.ipk-gatersleben.de/ - description: Plant Genomics and Phenomics Research Data Repository + id: e-dal-pgp name: e!DAL-PGP registry: fairsharing: rf3m4g - related_pages: - - plants - - plant_geno_assembly - - researcher - - data_manager - - it_support - - data_publication - - metadata - - plant_pheno_assembly url: https://edal-pgp.ipk-gatersleben.de/ -- description: The ECOTOXicology Knowledgebase (ECOTOX) is a comprehensive, publicly - available Knowledgebase providing single chemical environmental toxicity data - on aquatic life, terrestrial plants, and wildlife. +- description: The ECOTOXicology Knowledgebase (ECOTOX) is a comprehensive, publicly available Knowledgebase providing single chemical environmental toxicity data on aquatic life, terrestrial plants, and wildlife. + id: ecotox name: ECOTOX registry: fairsharing: 4b2234 - related_pages: - - toxicology_data url: https://cfpub.epa.gov/ecotox/ - description: Hub for the identification of plant genetic resources in Europe + id: ecpgr name: ECPGR - related_pages: - - plants - - researcher - - data_manager url: https://www.ecpgr.cgiar.org/ -- description: 'Endocrine Disruptor Knowledge Base is a platform designed to foster - the development of computational predictive toxicology. This platform allows direct - access to ten libraries containing the following resources: a biological activity - database, QSAR training sets, in vitro and in vivo experimental data for more - than 3,000 chemicals, literature citations, chemical-structure search capabilities.' +- description: 'Endocrine Disruptor Knowledge Base is a platform designed to foster the development of computational predictive toxicology. This platform allows direct access to ten libraries containing the following resources: a biological activity database, QSAR training sets, in vitro and in vivo experimental data for more than 3,000 chemicals, literature citations, chemical-structure search capabilities.' + id: edkb name: EDKB - related_pages: - - toxicology_data url: https://www.fda.gov/science-research/bioinformatics-tools/endocrine-disruptor-knowledge-base -- description: Set of European data resources of fundamental importance to the wider - life-science community and the long-term preservation of biological data +- description: Set of European data resources of fundamental importance to the wider life-science community and the long-term preservation of biological data + id: elixir-core-data-resources name: ELIXIR Core Data Resources registry: fairsharing-coll: bsg-c000039 - related_pages: - - existing_data - - covid-19 url: https://elixir-europe.org/platforms/data/core-data-resources - description: List of discipline-specific deposition databases recommended by ELIXIR. + id: elixir-deposition-databases-for-biomolecular-data name: ELIXIR Deposition Databases for Biomolecular Data registry: fairsharing-coll: bsg-c000039 - related_pages: - - data_publication - - researcher - - data_manager - - it_support - - covid-19 - - nels - - ifb - - csc url: https://elixir-europe.org/platforms/data/elixir-deposition-databases -- description: The ELIXIR Authentication and Authorisation Infrastructure (AAI) - name: ELIXIR-AAI - registry: - tess: ELIXIR-AAI - related_pages: - - sensitive - - nels - - tsd - - transmed - url: https://elixir-europe.org/services/compute/aai -- description: "EMBL-EBI\u2019s web portal for finding ontologies" - name: EMBL-EBI Ontology Lookup Service - related_pages: - - metadata - - data_manager - - researcher +- description: An authentication service from EOSC-Life + id: life-science-login + name: Life Science Login (LS Login) + url: https://lifescience-ri.eu/ls-login/ +- description: EMBL-EBI's web portal for finding ontologies + id: ontology-lookup-service + name: Ontology Lookup Service + registry: + biotools: ols + fairsharing: Mkl9RR + tess: Ontology Lookup Service url: https://www.ebi.ac.uk/ols/index -- description: EMBL-EBI's wizard for finding the right EMBL-EBI repository for your - data. +- description: EMBL-EBI's wizard for finding the right EMBL-EBI repository for your data. + id: embl-ebi-s-data-submission-wizard name: EMBL-EBI's data submission wizard - related_pages: - - data_publication - - researcher - - data_manager url: https://www.ebi.ac.uk/submission/ -- description: Electron Microscopy Public Image Archive is a public resource for raw, - 2D electron microscopy images. You can browse, upload and download the raw images - used to build a 3D structure +- description: Electron Microscopy Public Image Archive is a public resource for raw, 2D electron microscopy images. You can browse, upload and download the raw images used to build a 3D structure + id: empiar name: EMPIAR registry: biotools: empiar fairsharing: dff3ef tess: EMPIAR - related_pages: - - data_publication - - ome - - bioimaging_data url: https://www.ebi.ac.uk/pdbe/emdb/empiar/ -- description: This tool carries out data hub set up at the European Nucleotide Archive - (ENA). +- description: This tool carries out data hub set up at the European Nucleotide Archive (ENA). + id: ena-compare-data-hubs name: ENA COMPARE Data Hubs - related_pages: - - dm_coordination - - it_support - - data_manager url: https://github.com/nadimm-rahman/ena-datahub-setup -- description: The program submits experimental data and respective metadata to the - European Nucleotide Archive (ENA). +- description: The program submits experimental data and respective metadata to the European Nucleotide Archive (ENA). + id: ena-upload-tool name: ENA upload tool - related_pages: - - it_support - - data_manager - - researcher - - data_brokering url: https://github.com/usegalaxy-eu/ena-upload-cli -- description: Genome browser for vertebrate genomes that supports research in comparative - genomics, evolution, sequence variation and transcriptional regulation. +- description: Genome browser for vertebrate genomes that supports research in comparative genomics, evolution, sequence variation and transcriptional regulation. + id: ensembl name: Ensembl registry: biotools: ensembl fairsharing: fx0mw7 tess: Ensembl url: https://www.ensembl.org/index.html -- description: Comparative analysis, data mining and visualisation for the genomes - of non-vertebrate species +- description: Comparative analysis, data mining and visualisation for the genomes of non-vertebrate species + id: ensembl-genomes name: Ensembl Genomes registry: biotools: ensembl_genomes @@ -1076,1681 +774,1058 @@ tess: Ensembl Genomes url: https://ensemblgenomes.org/ - description: Open-access database of full genomes of plant species. + id: ensembl-plants name: Ensembl Plants registry: fairsharing: j8g2cv tess: Ensembl Plants - related_pages: - - plant_geno_assembly - - plants url: https://plants.ensembl.org/ -- description: Web-based tool allowing users to create and manage a register of personal - data processing activities (ROPA). +- description: Web-based tool allowing users to create and manage a register of personal data processing activities (ROPA). + id: erpa name: ERPA - related_pages: - - policy_officer - - human_data - - data_protection url: https://gitlab.sib.swiss/clinbio/erpa-app -- description: Regulation (eu) 2016/679 of the european parliament and of the council - on the protection of natural persons with regard to the processing of personal - data and on the free movement of such data, and repealing directive 95/46/ec (general - data protection regulation). +- description: Regulation (eu) 2016/679 of the european parliament and of the council on the protection of natural persons with regard to the processing of personal data and on the free movement of such data, and repealing directive 95/46/ec (general data protection regulation). + id: eu-general-data-protection-regulation name: EU General Data Protection Regulation - related_pages: - - data_protection - - policy_officer - - human_data - - tsd url: https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32016R0679&from=EN - description: EUDAT's wizard for finding the right licence for your data or code. + id: eudat-licence-selector-wizard name: EUDAT licence selector wizard - related_pages: - - licensing - - researcher - - data_manager - - policy_officer url: https://ufal.github.io/public-license-selector/ -- description: The European database of suspected adverse drug reaction reports is - a public resource aimed to provide access to reported suspected side-effects of - drugs. Side-effects are defined according to the MedDRA ontology. +- description: The European database of suspected adverse drug reaction reports is a public resource aimed to provide access to reported suspected side-effects of drugs. Side-effects are defined according to the MedDRA ontology. + id: eudravigilance name: EudraVigilance - related_pages: - - toxicology_data url: https://www.ema.europa.eu/en/human-regulatory/research-development/pharmacovigilance/eudravigilance -- description: EUPID provides a method for identity management, pseudonymisation and - record linkage to bridge the gap between multiple contexts. +- description: EUPID provides a method for identity management, pseudonymisation and record linkage to bridge the gap between multiple contexts. + id: eupid name: EUPID - related_pages: - - it_support - - policy_officer - - human_data url: https://eupid.eu/#/concept - description: European Search Catalogue for Plant Genetic Resources + id: eurisco name: EURISCO registry: biotools: eurisco - related_pages: - - plants - - researcher - - data_manager - - plant_pheno_assembly url: https://eurisco.ipk-gatersleben.de -- description: Europe PMC is a repository, providing access to worldwide life sciences - articles, books, patents and clinical guidelines. +- description: Europe PMC is a repository, providing access to worldwide life sciences articles, books, patents and clinical guidelines. + id: europe-pmc name: Europe PMC registry: biotools: europe_pmc fairsharing: cmw6mm tess: Europe PMC - related_pages: - - researcher url: https://europepmc.org/ -- description: A programme aiming to create an effective rare diseases research ecosystem - for progress, innovation and for the benefit of everyone with a rare disease. +- description: A programme aiming to create an effective rare diseases research ecosystem for progress, innovation and for the benefit of everyone with a rare disease. + id: european-joint-programme-on-rare-diseases name: European Joint Programme on Rare Diseases (EJP RD) - related_pages: - - rare_disease url: https://www.ejprarediseases.org/ -- description: This core model is designed to represent data about a rare disease - patient and biosample registries. The model is based on and builds on existing - standards, such as the European Rare Disease Registry Infrastructure and the Common - Data Elements from the rare disease community and other more generalised standards - for data sharing such as the W3C DCAT vocabulary. +- description: This core model is designed to represent data about a rare disease patient and biosample registries. The model is based on and builds on existing standards, such as the European Rare Disease Registry Infrastructure and the Common Data Elements from the rare disease community and other more generalised standards for data sharing such as the W3C DCAT vocabulary. + id: european-joint-programme-on-rare-diseases-metadata-model name: European Joint Programme on Rare Diseases Metadata Model - related_pages: - - rare_disease url: https://github.com/ejp-rd-vp/resource-metadata-schema -- description: 'The Virtual Platform is a federated ecosystem, in which resources - are enhanced to be amenable to rare disease research, and made Findable, Accessible, - Interoperable and Reusable: data stays at the source level but can be queyrable - at distance from an EJP RD query point.' +- description: 'The Virtual Platform is a federated ecosystem, in which resources are enhanced to be amenable to rare disease research, and made Findable, Accessible, Interoperable and Reusable: data stays at the source level but can be queyrable at distance from an EJP RD query point.' + id: european-joint-programme-on-rare-diseases-virtual-platform name: European Joint Programme on Rare Diseases Virtual Platform (EJP RD) - related_pages: - - rare_disease url: https://vp.ejprarediseases.org/ -- description: A record of sequence information scaling from raw sequcning reads to - assemblies and functional annotation +- description: A record of sequence information scaling from raw sequcning reads to assemblies and functional annotation + id: european-nucleotide-archive name: European Nucleotide Archive (ENA) registry: biotools: ena fairsharing: dj8nt8 tess: European Nucleotide Archive (ENA) - related_pages: - - micro_biotech - - plant_geno_assembly - - data_brokering url: https://www.ebi.ac.uk/ena/browser/home -- description: ERDRI.dor provides an overview of participating rare disease registries - with their main characteristics and description. +- description: ERDRI.dor provides an overview of participating rare disease registries with their main characteristics and description. + id: european-rare-disease-registry-infrastructure-directory-of-registries name: European Rare Disease Registry Infrastructure directory of registries (ERDRI.dor) - related_pages: - - rare_disease url: https://eu-rd-platform.jrc.ec.europa.eu/erdridor/ -- description: ERDRI.mdr serves to ease the integration of heterogeneous data from - different rare disease registries. For this purpose, it contains a collection - of metadata which specifies the used data elements of a registry including the - designation of the used data elements, their definition and units of measurement. +- description: ERDRI.mdr serves to ease the integration of heterogeneous data from different rare disease registries. For this purpose, it contains a collection of metadata which specifies the used data elements of a registry including the designation of the used data elements, their definition and units of measurement. + id: european-rare-disease-registry-infrastructure-metadata-repository name: European Rare Disease Registry Infrastructure metadata repository (ERDRI.mdr) - related_pages: - - rare_disease url: https://eu-rd-platform.jrc.ec.europa.eu/mdr/ -- description: Virtual networks involving healthcare providers across Europe. They - aim to facilitate discussion on complex or rare diseases and conditions that require - highly specialised treatment, and concentrated knowledge and resources. +- description: Virtual networks involving healthcare providers across Europe. They aim to facilitate discussion on complex or rare diseases and conditions that require highly specialised treatment, and concentrated knowledge and resources. + id: european-reference-networks name: European Reference Networks (ERNs) registry: tess: European Reference Networks (ERNs) - related_pages: - - rare_disease url: https://health.ec.europa.eu/european-reference-networks_en -- description: Open-access database of all types of genetic variation data from all - species. +- description: Open-access database of all types of genetic variation data from all species. + id: european-variation-archive name: European Variation Archive (EVA) registry: biotools: eva fairsharing: 6824pv tess: European Variation Archive (EVA) - related_pages: - - plant_geno_assembly url: https://www.ebi.ac.uk/eva/ -- description: Controlled vocabulary that describes types of evidence and assertion - methods +- description: Controlled vocabulary that describes types of evidence and assertion methods + id: evidence-and-conclusion-ontology name: Evidence and Conclusion Ontology (ECO) registry: + biotools: NA fairsharing: wvpgwn - related_pages: - - existing_data - - metadata url: https://evidenceontology.org/ -- description: The FDA Adverse Event Reporting System (FAERS) is an american resource - that contains adverse event reports, medication error reports and product quality - complaints submitted by healthcare professionals, consumers, and manufacturers. - MedDRA ontology is used for coding adverse effects. Note that reports available - in FAERS do not require a causal relationship between a product and an adverse - event and further evaluations are conducted by FDA to monitor the safety of products. +- description: The FDA Adverse Event Reporting System (FAERS) is an american resource that contains adverse event reports, medication error reports and product quality complaints submitted by healthcare professionals, consumers, and manufacturers. MedDRA ontology is used for coding adverse effects. Note that reports available in FAERS do not require a causal relationship between a product and an adverse event and further evaluations are conducted by FDA to monitor the safety of products. + id: faers name: FAERS registry: biotools: faers - related_pages: - - toxicology_data url: https://www.fda.gov/drugs/surveillance/questions-and-answers-fdas-adverse-event-reporting-system-faers -- description: FAIDARE is a tool allowing to search data across dinstinct databases - that implemented BrAPI. +- description: FAIDARE is a tool allowing to search data across dinstinct databases that implemented BrAPI. + id: faidare name: FAIDARE registry: biotools: faidare - related_pages: - - researcher - - data_manager - - plants - - ifb - - plant_pheno_assembly - - plant_geno_assembly url: https://urgi.versailles.inra.fr/faidare/ -- description: FAIR Cookbook is an online resource for the Life Sciences with recipes - that help you to make and keep data Findable, Accessible, Interoperable and Reusable - (FAIR) +- description: FAIR Cookbook is an online resource for the Life Sciences with recipes that help you to make and keep data Findable, Accessible, Interoperable and Reusable (FAIR) + id: fair-cookbook name: FAIR Cookbook - related_pages: - - compliance - - data_manager - - transmed url: https://fairplus.github.io/the-fair-cookbook/content/recipes/assessing-fairness.html# - description: A FAIR Data Point stores metadata in a standardized and sharable way. + id: fair-data-point name: FAIR Data Point (FDP) registry: fairsharing: '298' tess: FAIR Data Point - related_pages: - - rare_disease - - metadata - - it_support url: https://www.fairdatapoint.org/ - description: Resources and guidelines to assess the FAIRness of digital resources. + id: fair-evaluation-services name: FAIR Evaluation Services - related_pages: - - compliance - - data_manager - - policy_officer url: https://fairsharing.github.io/FAIR-Evaluator-FrontEnd/#!/#%2F! -- description: The FIP is a collection of FAIR implementation choices made by a community - of practice for each of the FAIR Principles. +- description: The FIP is a collection of FAIR implementation choices made by a community of practice for each of the FAIR Principles. + id: fair-implementation-profile name: FAIR Implementation Profile registry: fairsharing: '343864' - related_pages: - - dm_coordination - - dmp - - researcher - - data_manager url: https://www.go-fair.org/how-to-go-fair/fair-implementation-profile/ -- description: The FAIR wizard utilizes FAIRification resources developed by the FAIRplus - project and other platforms, suggests FAIRification materials based on the FAIRification - requirements, and designs FAIRification solutions for data owners, data stewards, - and other people involved in FAIRification. +- description: The FAIR wizard utilizes FAIRification resources developed by the FAIRplus project and other platforms, suggests FAIRification materials based on the FAIRification requirements, and designs FAIRification solutions for data owners, data stewards, and other people involved in FAIRification. + id: fair-wizard name: FAIR-Wizard registry: tess: FAIR-Wizard - related_pages: - - compliance - - data_manager - - policy_officer url: https://wwwdev.ebi.ac.uk/ait/fair-wizard/home - description: Help you discover resources to measure and improve FAIRness. + id: fairassist-org name: FAIRassist.org - related_pages: - - compliance - - data_manager - - policy_officer url: https://fairassist.org/#!/ -- description: A data Management Platform for organising, sharing and publishing research - datasets, models, protocols, samples, publications and other research outcomes. +- description: A data Management Platform for organising, sharing and publishing research datasets, models, protocols, samples, publications and other research outcomes. + id: fairdom-seek name: FAIRDOM-SEEK registry: biotools: seek - related_pages: - - storage - - it_support - - nels - - micro_biotech - - ifb - - machine_actionability - - plant_pheno_assembly - - plant_geno_assembly url: https://seek4science.org/ -- description: Data, model and SOPs management for projects, from preliminary data - to publication, support for running SBML models, etc. (public SEEK instance) +- description: Data, model and SOPs management for projects, from preliminary data to publication, support for running SBML models, etc. (public SEEK instance) + id: fairdomhub name: FAIRDOMHub registry: fairsharing: nnvcr9 tess: FAIRDOMHub - related_pages: - - storage - - researcher - - nels - - metadata - - micro_biotech - - machine_actionability - - data_manager url: https://fairdomhub.org - description: A System to Evaluate the FAIRness of Digital Objects + id: fairshake name: FAIRshake - related_pages: - - compliance - - data_manager - - it_support url: https://fairshake.cloud -- description: A curated, informative and educational resource on data and metadata - standards, inter-related to databases and data policies. +- description: A curated, informative and educational resource on data and metadata standards, inter-related to databases and data policies. + id: fairsharing name: FAIRsharing registry: fairsharing: 2abjs5 tess: FAIRsharing - related_pages: - - metadata - - data_publication - - policy_officer - - data_manager - - researcher - - micro_biotech - - existing_data url: https://fairsharing.org/ - description: Data publishing platform + id: figshare name: FigShare registry: fairsharing: drtwnh tess: FigShare - related_pages: - - data_publication - - biomol_sim - - bioimaging_data - - identifiers url: https://figshare.com/ - description: A free FTP (FTPS and SFTP) solution with graphical interface + id: filezilla name: FileZilla registry: tess: FileZilla - related_pages: - - transfer - - it_support url: https://filezilla-project.org -- description: FIP Wizard is a toolset to facilitate the capture of data in FAIR Convergence - Matrix questionnaire prompting communities to explicitly declare their FAIR Implementation - Profiles. These profiles can be then stored and published as nanopublications. +- description: FIP Wizard is a toolset to facilitate the capture of data in FAIR Convergence Matrix questionnaire prompting communities to explicitly declare their FAIR Implementation Profiles. These profiles can be then stored and published as nanopublications. + id: fip-wizard name: FIP Wizard - related_pages: - - dm_coordination - - dmp - - researcher - - data_manager url: https://fip-wizard.readthedocs.io/en/latest/ -- description: FreeIPA is an integrated Identity and Authentication solution for Linux/UNIX - networked environments. +- description: Fiji is an image processing package + id: fiji + name: Fiji + registry: + biotools: fiji + tess: Fiji + url: https://fiji.sc/ +- description: FreeIPA is an integrated Identity and Authentication solution for Linux/UNIX networked environments. + id: free-ipa name: Free-IPA - related_pages: - - it_support - - transmed url: https://www.freeipa.org/ - description: Repository of IP-free synthetic biological parts + id: freegenes name: Freegenes - related_pages: - - micro_biotech url: https://stanford.freegenes.org/collections/open-genes -- description: Principled and practical framework for the responsible sharing of genomic - and health-related data. +- description: Principled and practical framework for the responsible sharing of genomic and health-related data. + id: ga4gh-data-security-toolkit name: GA4GH Data Security Toolkit - related_pages: - - data_publication - - policy_officer - - data_manager - - it_support - - human_data - - sensitive url: https://www.ga4gh.org/genomic-data-toolkit/data-security-toolkit/ - description: Open standards for genomic data sharing. + id: ga4gh-genomic-data-toolkit name: GA4GH Genomic Data Toolkit - related_pages: - - data_manager - - it_support - - human_data url: https://www.ga4gh.org/genomic-data-toolkit/ - description: Framework for Responsible Sharing of Genomic and Health-Related Data + id: ga4gh-regulatory-and-ethics-toolkit name: GA4GH Regulatory and Ethics toolkit - related_pages: - - data_protection - - sensitive - - policy_officer - - data_manager - - it_support - - human_data url: https://www.ga4gh.org/genomic-data-toolkit/regulatory-ethics-toolkit/ -- description: Open, web-based platform for data intensive biomedical research. Whether - on the free public server or your own instance, you can perform, reproduce, and - share complete analyses. +- description: Open, web-based platform for data intensive biomedical research. Whether on the free public server or your own instance, you can perform, reproduce, and share complete analyses. + id: galaxy name: Galaxy registry: biotools: galaxy tess: Galaxy - related_pages: - - nels - - marine_assembly - - data_analysis - - researcher - - it_support - - ifb - - galaxy url: https://galaxyproject.org/ -- description: A database of genetic sequence information. GenBank may also refer - to the data format used for storing information around genetic sequence data. +- description: A database of genetic sequence information. GenBank may also refer to the data format used for storing information around genetic sequence data. + id: genbank name: GenBank registry: biotools: genbank fairsharing: 9kahy4 tess: GenBank - related_pages: - - micro_biotech url: https://www.ncbi.nlm.nih.gov/genbank/ -- description: A repository of MIAME-compliant genomics data from arrays and high-throughput - sequencing +- description: A repository of MIAME-compliant genomics data from arrays and high-throughput sequencing + id: gene-expression-omnibus name: Gene Expression Omnibus (GEO) - related_pages: - - micro_biotech - - data_publication - - metadata - - transfer - - ome - - bioimaging_data - - toxicology_data url: https://www.ncbi.nlm.nih.gov/geo/ -- description: Geneid is an ab initio gene finding program used to predict genes along - DNA sequences in a large set of organisms. +- description: Geneid is an ab initio gene finding program used to predict genes along DNA sequences in a large set of organisms. + id: geneid name: GENEID registry: biotools: geneid - related_pages: - - data_analysis - - researcher url: https://genome.crg.cat/software/geneid/index.html -- description: GHS (Globally Harmonized System of Classification and Labelling of - Chemicals) classification was developed by the United Nations in an attempt to - align standards and chemical regulations in different countries. GHS includes - criteria for the classification of health, physical and environmental hazards, - and what information should be included on labels of hazardous chemicals and safety - data sheets. +- description: GHS (Globally Harmonized System of Classification and Labelling of Chemicals) classification was developed by the United Nations in an attempt to align standards and chemical regulations in different countries. GHS includes criteria for the classification of health, physical and environmental hazards, and what information should be included on labels of hazardous chemicals and safety data sheets. + id: ghs-classification name: GHS Classification - related_pages: - - toxicology_data url: https://pubchem.ncbi.nlm.nih.gov/ghs/ -- description: Distributed version control system designed to handle everything from - small to very large projects +- description: Distributed version control system designed to handle everything from small to very large projects + id: git name: Git registry: tess: Git - related_pages: - - data_organisation - - data_manager - - it_support url: https://git-scm.com/ -- description: Versioning system, used for sharing code, as well as for sharing of - small data +- description: Versioning system, used for sharing code, as well as for sharing of small data + id: github name: GitHub registry: fairsharing: c55d5e fairsharing-coll: bsg-d001160 tess: GitHub - related_pages: - - data_publication - - data_organisation - - it_support - - data_manager url: https://github.com -- description: GitLab is an open source end-to-end software development platform with - built-in version control, issue tracking, code review, CI/CD, and more. Self-host - GitLab on your own servers, in a container, or on a cloud provider. +- description: GitLab is an open source end-to-end software development platform with built-in version control, issue tracking, code review, CI/CD, and more. Self-host GitLab on your own servers, in a container, or on a cloud provider. + id: gitlab name: GitLab registry: - fairsharing: 530e61 + fairsharing: '530e61' tess: GitLab - related_pages: - - data_organisation - - data_publication - - it_support - - data_manager url: https://gitlab.com/gitlab-org/gitlab - description: High-performance data transfers between systems within and across organizations + id: globus name: Globus - related_pages: - - transfer - - it_support url: https://www.globus.org -- description: A multispecies integrative information system dedicated to plant and - fungi pests. It allows researchers to access genetic, phenotypic and genomic data. - It is used by both large international projects and the French National Research - Institute for Agriculture, Food and Environment. +- description: A multispecies integrative information system dedicated to plant and fungi pests. It allows researchers to access genetic, phenotypic and genomic data. It is used by both large international projects and the French National Research Institute for Agriculture, Food and Environment. + id: gnpis name: GnpIS registry: biotools: gnpis fairsharing: dw22y3 - related_pages: - - plant_pheno_assembly url: https://urgi.versailles.inrae.fr/gnpis/ - description: Search engine for datasets + id: google-dataset-search name: Google Dataset Search - related_pages: - - existing_data url: https://datasetsearch.research.google.com/ - description: Cloud Storage for Work and Home + id: google-drive name: Google Drive - related_pages: - - storage - - transfer url: https://www.google.com/intl/en_us/drive/ - description: Repository of GPCR protein simulations + id: gpcrmd name: GPCRmd registry: biotools: GPCRmd - related_pages: - - biomol_sim - - data_publication url: http://gpcrmd.org/ -- description: The GRAPE pipeline provides an extensive pipeline for RNA-Seq analyses. - It allows the creation of an automated and integrated workflow to manage, analyse - and visualize RNA-Seq data. +- description: The GRAPE pipeline provides an extensive pipeline for RNA-Seq analyses. It allows the creation of an automated and integrated workflow to manage, analyse and visualize RNA-Seq data. + id: grape-2-0 name: GRAPE 2.0 registry: biotools: grape_2.0 - related_pages: - - data_analysis url: https://github.com/guigolab/grape-nf - description: ELN Comparison Grid by Hardvard Medical School + id: harvard-medical-school-electronic-lab-notebooks name: Harvard Medical School - Electronic Lab Notebooks - related_pages: - - metadata - - identifiers - - researcher - - data_manager url: https://datamanagement.hms.harvard.edu/analyze/electronic-lab-notebooks -- description: Haz-Map is an occupational health database that makes available information - about the adverse effects of exposures to chemical and biological agents at the - workplace. These associations have been established using current scientific evidence. +- description: Haz-Map is an occupational health database that makes available information about the adverse effects of exposures to chemical and biological agents at the workplace. These associations have been established using current scientific evidence. + id: haz-map name: Haz-Map - related_pages: - - toxicology_data url: https://haz-map.com/ -- description: Guidelines about how to license research data from Digital Curation - Centre +- description: Guidelines about how to license research data from Digital Curation Centre + id: how-to-license-research-data-dcc name: How to License Research Data - DCC - related_pages: - - licensing - - researcher - - data_manager - - policy_officer url: https://www.dcc.ac.uk/guidance/how-guides/license-research-data -- description: The Human Protein Atlas contains information for a large majority of - all human protein-coding genes regarding the expression and localization of the - corresponding proteins based on both RNA and protein data. +- description: The Human Protein Atlas contains information for a large majority of all human protein-coding genes regarding the expression and localization of the corresponding proteins based on both RNA and protein data. + id: human-protein-atlas name: Human Protein Atlas registry: fairsharing: j0t0pe tess: Human Protein Atlas - related_pages: - - proteomics url: https://www.proteinatlas.org/ -- description: HumanMine integrates many types of human data and provides a powerful - query engine, export for results, analysis for lists of data and FAIR access via - web services. +- description: HumanMine integrates many types of human data and provides a powerful query engine, export for results, analysis for lists of data and FAIR access via web services. + id: humanmine name: HumanMine registry: biotools: humanmine fairsharing: RJ99Pj tess: HumanMine - related_pages: - - data_organisation - - data_manager - - researcher - - human_data - - data_analysis url: https://www.humanmine.org/ -- description: With fast file transfer and streaming solutions built on the award-winning - IBM FASP protocol, IBM Aspera software moves data of any size across any distance +- description: With fast file transfer and streaming solutions built on the award-winning IBM FASP protocol, IBM Aspera software moves data of any size across any distance + id: ibm-aspera name: IBM Aspera - related_pages: - - transfer - - it_support url: https://www.ibm.com/products/aspera -- description: Data sharing - name: iCloud - related_pages: - - storage - - data_analysis - - transfer - url: https://www.icloud.com/ -- description: The Identifiers.org Resolution Service provides consistent access to - life science data using Compact Identifiers. Compact Identifiers consist of an - assigned unique prefix and a local provider designated accession number (prefix:accession). +- description: The Identifiers.org Resolution Service provides consistent access to life science data using Compact Identifiers. Compact Identifiers consist of an assigned unique prefix and a local provider designated accession number (prefix:accession). + id: identifiers-org name: Identifiers.org registry: biotools: identifiers.org fairsharing: n14rc8 tess: Identifiers.org - related_pages: - - identifiers - - it_support - - data_manager url: http://identifiers.org -- description: A collection of standard biological parts to which all entrants in - the iGEM competition must submit their parts +- description: A collection of standard biological parts to which all entrants in the iGEM competition must submit their parts + id: igem-parts-registry name: iGEM Parts Registry - related_pages: - - micro_biotech url: http://parts.igem.org/Main_Page - description: A repository of image datasets from scientific publications + id: image-data-resource name: Image Data Resource (IDR) registry: biotools: idr fairsharing: 6wf1zw - related_pages: - - micro_biotech - - data_publication - - metadata - - transfer - - ome - - bioimaging_data url: https://idr.openmicroscopy.org -- description: The Informed Consent Ontology (ICO) is an ontology for the informed - consent and informed consent process in the medical field. - name: Informed Consent Ontology +- description: The Informed Consent Ontology (ICO) is an ontology for the informed consent and informed consent process in the medical field. + id: informed-consent-ontology + name: Informed Consent Ontology (ICO) registry: fairsharing: b9znd5 - related_pages: - - it_support - - policy_officer - - human_data url: http://purl.obolibrary.org/obo/ICO.owl -- description: The International Compilation of Human Research Standards enumerates - over 1,000 laws, regulations, and guidelines (collectively referred to as standards) - that govern human subject protections in 133 countries, as well as standards from - a number of international and regional organizations +- description: The International Compilation of Human Research Standards enumerates over 1,000 laws, regulations, and guidelines (collectively referred to as standards) that govern human subject protections in 133 countries, as well as standards from a number of international and regional organizations + id: international-compilation-of-human-research-standards name: International Compilation of Human Research Standards - related_pages: - - human_data url: https://www.hhs.gov/ohrp/sites/default/files/2020-international-compilation-of-human-research-standards.pdf -- description: A collaborative database of genetic sequence datasets from DDBJ, EMBL-EBI - and NCBI +- description: A collaborative database of genetic sequence datasets from DDBJ, EMBL-EBI and NCBI + id: international-nucleotide-sequence-database-collaboration name: International Nucleotide Sequence Database Collaboration (INSDC) registry: biotools: insdc - related_pages: - - micro_biotech - - data_brokering url: http://www.insdc.org - description: Data standards and formats for reporting flow cytometry data + id: international-society-for-the-advancement-of-cytometry name: International Society for the Advancement of Cytometry (ISAC) - related_pages: - - micro_biotech + registry: + biotools: NA url: https://isac-net.org/page/Data-Standards - description: Resource for naming standards in biochemistry and molecular biology + id: international-union-of-biochemistry-and-molecular-biology name: International Union of Biochemistry and Molecular Biology (IUBMB) - related_pages: - - micro_biotech url: https://www.qmul.ac.uk/sbcs/iubmb/ -- description: Functional analysis of protein sequences by classifying them into families - and predicting the presence of domains and important sites +- description: Functional analysis of protein sequences by classifying them into families and predicting the presence of domains and important sites + id: interpro name: InterPro registry: biotools: interpro fairsharing: pda11d tess: InterPro url: https://www.ebi.ac.uk/interpro/ -- description: IntoGen collects and analyses somatic mutations in thousands of tumor - genomes to identify cancer driver genes. +- description: IntoGen collects and analyses somatic mutations in thousands of tumor genomes to identify cancer driver genes. + id: intogen name: IntoGen registry: biotools: intogen - related_pages: - - data_analysis - - human_data url: https://www.intogen.org/search - description: Intrinsically disordered proteins ontology + id: idpo name: Intrinsically disordered proteins ontology (IDPO) registry: biotools: idpo - related_pages: - - idp - - metadata url: https://disprot.org/ontology -- description: The Integrated Risk Information System (IRIS) resource evaluates information - on health that might arise after exposure to environmental contaminants. +- description: The Integrated Risk Information System (IRIS) resource evaluates information on health that might arise after exposure to environmental contaminants. + id: iris name: IRIS registry: biotools: iris tess: IRIS - related_pages: - - toxicology_data url: https://www.epa.gov/iris -- description: Integrated Rule-Oriented Data System (iRODS) is open source data management - software for a cancer genome analysis workflow. +- description: Integrated Rule-Oriented Data System (iRODS) is open source data management software for a cancer genome analysis workflow. + id: irods name: iRODS registry: biotools: irods - related_pages: - - storage - - it_support - - transmed - - bioimaging_data url: https://irods.org/ -- description: Open source framework and tools helping to manage a diverse set of - life science, environmental and biomedical experiments using the Investigation - Study Assay (ISA) standard +- description: Open source framework and tools helping to manage a diverse set of life science, environmental and biomedical experiments using the Investigation Study Assay (ISA) standard + id: isa-tools name: ISA-tools registry: fairsharing: 53gp75 - related_pages: - - it_support - - data_manager - - micro_biotech - - machine_actionability url: https://isa-tools.org/ -- description: Open source software library that can be used to generate a ISA-TAB - export from in-house data sets. These comprises e.g. local database or local file - system based experimental. +- description: Open source software library that can be used to generate a ISA-TAB export from in-house data sets. These comprises e.g. local database or local file system based experimental. + id: isa4j name: ISA4J registry: biotools: isa4j - related_pages: - - plants - - machine_actionability - - plant_pheno_assembly - url: https://doi.org/10.12688/f1000research.27188.1 + url: https://mvnrepository.com/artifact/de.ipk-gatersleben/isa4j - description: International information security standard + id: iso-iec-27001 name: ISO/IEC 27001 - related_pages: - - data_protection - - policy_officer - - human_data url: https://en.wikipedia.org/wiki/ISO/IEC_27001 -- description: A collaborative resource from IUPAC and IUBMB for naming standards - in biochemistry +- description: A collaborative resource from IUPAC and IUBMB for naming standards in biochemistry + id: iupac-iubmb-joint-commission-on-biochemical-nomenclature name: IUPAC-IUBMB Joint Commission on Biochemical Nomenclature (JCBN) - related_pages: - - micro_biotech url: https://www.qmul.ac.uk/sbcs/iupac/jcbn/ - description: A registry platform for biological parts + id: jbei-ice name: JBEI-ICE - related_pages: - - micro_biotech url: https://ice.jbei.org - description: Jupyter notebooks allow to share code, documentation + id: jupyter name: Jupyter registry: tess: Jupyter - related_pages: - - it_support - - data_analysis url: https://jupyter.org +- description: JWS-Online is a systems biology tool for the construction, modification and simulation of kinetic models and for the storage of curated models. + id: jws-online + name: JWS Online + registry: + fairsharing: r09jt6 + url: https://jjj.mib.ac.uk/ - description: Keycloak is an open source identity and data access management solution. + id: keycloak name: Keycloak registry: tess: Keycloak - related_pages: - - it_support - - transmed url: https://www.keycloak.org/ -- description: The LiMTox system is a text mining approach that tries to extract associations - between compounds and a particular toxicological endpoint at various levels of - granularity and evidence types, all inspired by the content of toxicology reports. - It integrates direct ranking of associations between compounds and hepatotoxicity - through combination of heterogeneous complementary strategies from term co-mention, - rules, and patterns to machine learning-based text classification. It also provides - indirect associations to hepatotoxicity through the extraction of relations reflecting - the effect of compounds at the level of metabolism and liver enzymes. +- description: The LiMTox system is a text mining approach that tries to extract associations between compounds and a particular toxicological endpoint at various levels of granularity and evidence types, all inspired by the content of toxicology reports. It integrates direct ranking of associations between compounds and hepatotoxicity through combination of heterogeneous complementary strategies from term co-mention, rules, and patterns to machine learning-based text classification. It also provides indirect associations to hepatotoxicity through the extraction of relations reflecting the effect of compounds at the level of metabolism and liver enzymes. + id: limtox name: LimTox registry: biotools: limtox - related_pages: - - toxicology_data url: http://limtox.bioinfo.cnio.es/ - description: Web portal for finding ontologies + id: linked-open-vocabularies name: Linked Open Vocabularies (LOV) - related_pages: - - metadata - - data_manager - - researcher url: https://lov.linkeddata.es/dataset/lov/ - description: A database of prokaryote specific biodiversity information + id: list-of-prokaryotic-names-with-standing-in-nomenclature name: List of Prokaryotic names with Standing in Nomenclature (LPSN) registry: biotools: lpsn - related_pages: - - micro_biotech url: https://lpsn.dsmz.de - description: EuroHPC world-class supercomputer + id: lumi name: LUMI registry: biotools: lumi - related_pages: - - data_analysis - - researcher - - it_support - - csc url: https://www.lumi-supercomputer.eu/ -- description: Machine-Actionable Data Management Plan | Webinar (2016) on making - a good data management plan. +- description: Machine-Actionable Data Management Plan | Webinar (2016) on making a good data management plan. + id: madmp-research-bridge name: maDMP - Research Bridge - related_pages: - - dmp - - it_support url: https://library.ust.hk/sc/machine-actionable-dmp/ -- description: MarDB includes all non-complete marine microbial genomes regardless - of level of completeness. Each entry contains 120 metadata fields including information - about sampling environment or host, organism and taxonomy, phenotype, pathogenicity, - assembly and annotation. +- description: MarDB includes all non-complete marine microbial genomes regardless of level of completeness. Each entry contains 120 metadata fields including information about sampling environment or host, organism and taxonomy, phenotype, pathogenicity, assembly and annotation. + id: mardb name: MarDB registry: biotools: mardb - related_pages: - - marine_assembly - - data_analysis url: https://mmp2.sfb.uit.no/mardb/ - description: MarFun is a manually curated marine fungi genome database. + id: marfun name: MarFun - related_pages: - - marine_assembly - - data_analysis url: https://mmp2.sfb.uit.no/marfun/ -- description: High-quality curated and freely accessible microbial genomics and metagenomics - resources for the marine scientific community - name: Marine metagenomics portal +- description: High-quality curated and freely accessible microbial genomics and metagenomics resources for the marine scientific community + id: marine-metagenomics-portal + name: Marine Metagenomics Portal (MMP) registry: biotools: mmp - related_pages: - - marine_assembly url: https://mmp2.sfb.uit.no/ -- description: MarRef is a manually curated marine microbial reference genome database - that equenced genomes. Each entry contains 120 metadata fields including information - about sampling environment or host, organism and taxonomy, phenotype, pathogenicity, - assembly and annotation information +- description: MarRef is a manually curated marine microbial reference genome database that equenced genomes. Each entry contains 120 metadata fields including information about sampling environment or host, organism and taxonomy, phenotype, pathogenicity, assembly and annotation information + id: marref name: MarRef registry: biotools: marref - related_pages: - - marine_assembly url: https://mmp2.sfb.uit.no/marref/ +- description: Powerful search engine which uses mass spectrometry data to identify proteins + from DNA, RNA and protein sequence databases as well as spectral libraries. + id: mascot + name: Mascot + registry: + biotools: MASCOT + url: https://www.matrixscience.com/server.html +- description: Powerful search engine which uses mass spectrometry data to identify proteins + from DNA, RNA and protein sequence databases as well as spectral libraries. + id: massive + name: MassIVE + url: https://massive.ucsd.edu/ProteoSAFe/static/massive.jsp - description: Database of over 5000 intrinsic membrane protein structures + id: memprotmd name: MemProtMD - related_pages: - - biomol_sim url: http://memprotmd.bioch.ox.ac.uk/ -- description: Multidisciplinary, free-to-use open repository specialized for research - data +- description: Multidisciplinary, free-to-use open repository specialized for research data + id: mendeley-data name: Mendeley data registry: fairsharing: 3epmpp - related_pages: - - data_publication - - biomol_sim url: https://data.mendeley.com/ - description: A repository of genomics data relating to the study of the metabolome - name: MetabolomeXchange - registry: - biotools: metabolomexchange - related_pages: - - micro_biotech - - data_publication + id: metabolomexchange + name: Metabolome Exchange url: http://www.metabolomexchange.org/site/ -- description: Cleans metagenomic reads to remove adapters, low-quality bases and - host (e.g. human) contamination +- description: Cleans metagenomic reads to remove adapters, low-quality bases and host (e.g. human) contamination + id: metagen-fastqc name: Metagen-FastQC - related_pages: - - data_publication url: https://github.com/alakob/Metagen-FastQC-Docker - description: Minimum Information About Disorder Experiments (MIADE) standard + id: miade name: MIADE - related_pages: - - metadata - - researcher - - data_manager - - idp url: https://www.psidev.info/intrinsically-disordered-proteins-workgroup - description: Minimum Information About a Plant Phenotyping Experiment + id: miappe name: MIAPPE registry: fairsharing: nd9ce9 tess: MIAPPE - related_pages: - - metadata - - researcher - - data_manager - - plants - - plant_geno_assembly - - plant_pheno_assembly url: https://www.miappe.org/ - description: Cloud storage and file sharing service from Microsoft + id: microsoft-azure name: Microsoft Azure - related_pages: - - storage - - it_support - - transfer url: https://azure.microsoft.com/en-gb/ - description: Cloud storage and file sharing service from Microsoft + id: microsoft-onedrive name: Microsoft OneDrive - related_pages: - - storage - - it_support url: https://www.microsoft.com/en-us/microsoft-365/onedrive/online-cloud-storage - description: Minimum Information about a (Meta)Genome Sequence + id: migs-mims name: MIGS/MIMS registry: fairsharing: va1hck - related_pages: - - metadata - - researcher - - data_manager - - marine - - micro_biotech url: https://www.gensc.org/pages/projects/mixs-gsc-project.html -- description: MINT, the Molecular INTeraction database, focuses on experimentally - verified protein-protein interactions mined from the scientific literature by - expert curators +- description: MINT, the Molecular INTeraction database, focuses on experimentally verified protein-protein interactions mined from the scientific literature by expert curators + id: mint name: MINT registry: biotools: mint fairsharing: 2bdvmk url: https://mint.bio.uniroma2.it/ - description: Minimum Information about any (x) Sequence + id: mixs name: MIxS registry: fairsharing: 9aa0zp tess: MIxS - related_pages: - - metadata - - researcher - - data_manager - - marine - - plant_geno_assembly url: https://genomicsstandardsconsortium.github.io/mixs/ +- description: Provides a set of open-source, cross-platform software libraries and tools + that facilitate proteomics data analysis. + id: msconvert + name: msconvert + registry: + biotools: msconvert + url: https://proteowizard.sourceforge.io/ - description: A database of protein disorder and mobility annotations + id: mobidb name: MobiDB registry: biotools: mobidb fairsharing: jwra3e tess: MobiDB - related_pages: - - idp - - researcher url: https://mobidb.org/ -- description: Database of Protein Molecular Dynamics simulations representing different - structural clusters of the PDB +- description: Database of Protein Molecular Dynamics simulations representing different structural clusters of the PDB + id: model name: MoDEL registry: biotools: model + fairsharing: NA tess: MoDEL - related_pages: - - biomol_sim url: https://mmb.irbbarcelona.org/MoDEL/ -- description: Database of COVID-19 related atomistic Molecular Dynamic Trajectories - name: MoDEL Covid19 - related_pages: - - biomol_sim - url: https://bioexcel-cv19.bsc.es/#/ -- description: Repository for Central Nervous System-related mainly membrane protein - MD simulations +- description: Repository for Central Nervous System-related mainly membrane protein MD simulations + id: model-cns name: MoDEL-CNS - related_pages: - - biomol_sim - - data_publication url: https://mmb.irbbarcelona.org/MoDEL-CNS/#/ -- description: Repository for theoretical models of macromolecular structures with - DOIs for models +- description: Repository for theoretical models of macromolecular structures with DOIs for models + id: modelarchive name: ModelArchive registry: biotools: modelarchive fairsharing: tpqndj - related_pages: - - biomol_sim - - struct_bioinfo - - data_publication url: https://www.modelarchive.org/ -- description: Molgenis is a modular web application for scientific data. Molgenis - provides researchers with user friendly and scalable software infrastructures - to capture, exchange, and exploit the large amounts of data that is being produced - by scientific organisations all around the world. +- description: Molgenis is a modular web application for scientific data. Molgenis provides researchers with user friendly and scalable software infrastructures to capture, exchange, and exploit the large amounts of data that is being produced by scientific organisations all around the world. + id: molgenis name: MOLGENIS registry: biotools: molgenis - related_pages: - - identifiers - - it_support - - data_manager url: https://molgenis.gitbooks.io/molgenis/content/ - description: Database about interactions of molecules with membranes + id: molmedb name: MolMeDB registry: biotools: MolMeDB fairsharing: cwzk3c - related_pages: - - biomol_sim url: https://molmedb.upol.cz/ -- description: A risk assessment tool that can be used to do Data Protection Impact - Assessments +- description: A risk assessment tool that can be used to do Data Protection Impact Assessments + id: monarc name: MONARC - related_pages: - - data_protection - - policy_officer - - human_data - - transmed + registry: + fairsharing: NA url: https://open-source-security-software.net/project/MONARC -- description: "a Magnetic Resonance Imaging (MRI) converter from ParaVision\xAE (Bruker,\ - \ Inc. Billerica, MA) file format to DICOM standard" +- description: a Magnetic Resonance Imaging (MRI) converter from ParaVision® (Bruker, Inc. Billerica, MA) file format to DICOM standard + id: mri2dicom name: MRI2DICOM - related_pages: - - researcher - - data_manager - - xnat-pic url: https://github.com/szullino/XNAT-PIC -- description: The Multi-Crop Passport Descriptor is the metadata standard for plant - genetic resources maintained ex situ by genbanks. +- description: The Multi-Crop Passport Descriptor is the metadata standard for plant genetic resources maintained ex situ by genbanks. + id: multi-crop-passport-descriptor name: Multi-Crop Passport Descriptor (MCPD) registry: + biotools: NA fairsharing: hn155r tess: Multi-Crop Passport Descriptor - related_pages: - - metadata - - researcher - - it_support - - policy_officer - - plants - - plant_pheno_assembly - - plant_geno_assembly url: https://www.bioversityinternational.org/e-library/publications/detail/faobioversity-multi-crop-passport-descriptors-v21-mcpd-v21/ - description: A file-system based platform handling the transfer of data + id: mytardis name: MyTARDIS - related_pages: - - data_manager - - transfer - - bioimaging_data url: http://www.mytardis.org/ -- description: Online database hosting a vast amount of biotechnological information - including nucleic acids, proteins, genomes and publications. Also boasts integrated - tools for analysis. +- description: Online database hosting a vast amount of biotechnological information including nucleic acids, proteins, genomes and publications. Also boasts integrated tools for analysis. + id: national-center-for-biotechnology-information name: National Center for Biotechnology Information (NCBI) - related_pages: - - micro_biotech url: https://www.ncbi.nlm.nih.gov -- description: The National Biomonitoring Program (NBP) is a public resource that - offers an assessment of nutritional status and the exposure of the U.S. population - to environmental chemicals and toxic substances. +- description: The National Biomonitoring Program (NBP) is a public resource that offers an assessment of nutritional status and the exposure of the U.S. population to environmental chemicals and toxic substances. + id: nbp name: NBP - related_pages: - - toxicology_data url: https://www.cdc.gov/biomonitoring/ - description: NCBI's taxonomy browser is a database of biodiversity information + id: ncbi-taxonomy name: NCBI Taxonomy registry: fairsharing: fj07xj - related_pages: - - micro_biotech url: https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/ - description: Hosts information relating to strains, cultures and more + id: ncimb name: NCIMB - related_pages: - - micro_biotech url: https://www.ncimb.com/culture-collection/ - description: Form and survey tool, also for sensitive data + id: nettskjema name: Nettskjema - related_pages: - - sensitive - - tsd url: https://nettskjema.no/ -- description: As fully on-premises solution, Nextcloud Hub provides the benefits - of online collaboration without the compliance and security risks +- description: As fully on-premises solution, Nextcloud Hub provides the benefits of online collaboration without the compliance and security risks + id: nextcloud name: Nextcloud - related_pages: - - storage - - it_support - - transfer url: https://nextcloud.com - description: Nextflow is a framework for data analysis workflow execution + id: nextflow name: Nextflow registry: biotools: nextflow tess: Nextflow - related_pages: - - it_support - - data_analysis url: https://www.nextflow.io -- description: Repository for lipid MD simulations to validate force fields with NMR - data +- description: Repository for lipid MD simulations to validate force fields with NMR data + id: nmrlipids name: NMRlipids - related_pages: - - biomol_sim - - data_publication url: http://nmrlipids.blogspot.com/ -- description: The National Poison Data System (NPDS) is a resource that provides - poisson exposure occurring in the US and some freely associated states. +- description: The National Poison Data System (NPDS) is a resource that provides poisson exposure occurring in the US and some freely associated states. + id: npds name: NPDS - related_pages: - - toxicology_data url: https://www.aapcc.org/national-poison-data-system -- description: Multi-stakeholder, interdisciplinary collaborative to bring out the - value of health data through large-scale analytics. All our solutions are open-source. +- description: Multi-stakeholder, interdisciplinary collaborative to bring out the value of health data through large-scale analytics. All our solutions are open-source. + id: ohdsi name: OHDSI registry: biotools: ohdsi - related_pages: - - researcher - - data_manager - - data_analysis - - storage - - transmed - - toxicology_data url: https://ohdsi.org/ -- description: OMERO is an open-source client-server platform for managing, visualizing - and analyzing microscopy images and associated metadata +- description: OMERO is an open-source client-server platform for managing, visualizing and analyzing microscopy images and associated metadata + id: omero name: OMERO registry: biotools: omero + fairsharing: NA tess: OMERO - related_pages: - - metadata - - data_manager - - it_support - - storage - - ome - - bioimaging_data - url: https://www.openmicroscopy.org -- description: Omics Discovery Index (OmicsDI) provides a knowledge discovery framework - across heterogeneous omics data (genomics, proteomics, transcriptomics and metabolomics) + url: https://www.openmicroscopy.org/omero/ +- description: Omics Discovery Index (OmicsDI) provides a knowledge discovery framework across heterogeneous omics data (genomics, proteomics, transcriptomics and metabolomics) + id: omicsdi name: OmicsDI registry: biotools: omicsdi fairsharing: re1278 tess: OmicsDI - related_pages: - - existing_data - - proteomics url: https://www.omicsdi.org -- description: OMOP is a common data model for the harmonisation for of observational - health data. +- description: OMOP is a common data model for the harmonisation for of observational health data. + id: omop-cdm name: OMOP-CDM - related_pages: - - transmed url: https://ohdsi.github.io/CommonDataModel/ -- description: OntoMaton facilitates ontology search and tagging functionalities within - Google Spreadsheets. +- description: OntoMaton facilitates ontology search and tagging functionalities within Google Spreadsheets. + id: onotomaton name: OnotoMaton - related_pages: - - researcher - - data_manager - - it_support - - metadata - - identifiers url: https://github.com/ISA-tools/OntoMaton - description: A web portal to search and visualise ontologies + id: ontobee name: Ontobee registry: fairsharing: q8fx1b - related_pages: - - metadata - - data_manager - - researcher url: http://www.ontobee.org -- description: Licenses that are conformant with the principles laid out in the Open - Definition. +- description: Licenses that are conformant with the principles laid out in the Open Definition. + id: open-definition-conformant-licenses name: Open Definition Conformant Licenses - related_pages: - - licensing - - researcher - - data_manager - - policy_officer url: https://opendefinition.org/licenses/ - description: Explore Open Access research outcomes from OpenAIRE network + id: openaire-explore name: OpenAIRE Explore - related_pages: - - existing_data url: https://explore.openaire.eu/search/find -- description: ELIXIR benchmarking platform to support community-led scientific benchmarking - efforts and the technical monitoring of bioinformatics reosurces +- description: ELIXIR benchmarking platform to support community-led scientific benchmarking efforts and the technical monitoring of bioinformatics reosurces + id: openebench name: OpenEBench registry: biotools: openebench - related_pages: - - data_analysis - - data_manager - - it_support url: https://openebench.bsc.es/ +- description: OpenMS offers an open-source C++ library (+ Python bindings) for LC/MS data management, analysis and visualization. + id: openms + name: OpenMS + registry: + biotools: openms + url: https://openms.de/ - description: Data curation tool for working with messy data + id: openrefine name: OpenRefine registry: tess: OpenRefine - related_pages: - - data_quality url: https://openrefine.org/ -- description: 'free and open source project management tool that supports the entire - research lifecycle: planning, execution, reporting, archiving, and discovery' +- description: 'free and open source project management tool that supports the entire research lifecycle: planning, execution, reporting, archiving, and discovery' + id: openscienceframework name: OpenScienceFramework registry: fairsharing: g4z879 - related_pages: - - data_publication - - biomol_sim url: https://osf.io/ -- description: OpenStack is an open source cloud computing infrastructure software - project and is one of the three most active open source projects in the world +- description: OpenStack is an open source cloud computing infrastructure software project and is one of the three most active open source projects in the world + id: openstack name: OpenStack registry: tess: OpenStack - related_pages: - - storage - - data_analysis - - transmed - - ifb url: https://www.openstack.org/ -- description: The Orphadata platform provides the scientific community with comprehensive, - high-quality datasets related to rare diseases and orphan drugs, in a reusable - and computable format +- description: The Orphadata platform provides the scientific community with comprehensive, high-quality datasets related to rare diseases and orphan drugs, in a reusable and computable format + id: orphadata name: Orphadata registry: biotools: orphadata fairsharing: d1a667 fairsharing-coll: bsg-d001249 url: http://www.orphadata.org/cgi-bin/index.php -- description: OSF (Open Science Framework) is a free, open platform to support your - research and enable collaboration. +- description: OSF (Open Science Framework) is a free, open platform to support your research and enable collaboration. + id: osf name: OSF registry: tess: OSF - related_pages: - - storage - - researcher - - data_manager url: https://osf.io -- description: One Touch Pipeline (OTP) is a data management platform for running - bioinformatics pipelines in a high-throughput setting, and for organising the - resulting data and metadata. +- description: One Touch Pipeline (OTP) is a data management platform for running bioinformatics pipelines in a high-throughput setting, and for organising the resulting data and metadata. + id: otp name: OTP registry: biotools: otp - related_pages: - - human_data - - metadata - - dmp - - data_analysis url: https://gitlab.com/one-touch-pipeline/otp - description: Cloud storage and file sharing service + id: owncloud name: ownCloud - related_pages: - - storage - - it_support - - transfer - - data_analysis url: https://owncloud.com -- description: PAA is an R/Bioconductor tool for protein microarray data analysis - aimed at biomarker discovery. +- description: PAA is an R/Bioconductor tool for protein microarray data analysis aimed at biomarker discovery. + id: paa name: PAA registry: biotools: paa - related_pages: - - data_analysis - - researcher - - human_data - - proteomics url: https://bioconductor.org/packages/PAA/ - description: Data Publisher for Earth and Environmental Science + id: pangaea name: PANGAEA registry: biotools: pangaea fairsharing: 6yw6cp tess: PANGAEA - related_pages: - - data_publication - - metadata - - researcher - - data_manager url: https://www.pangaea.de/ - description: The Protein Circular Dichroism Data Bank + id: pcddb name: PCDDB registry: biotools: pcddb - related_pages: - - idp - - researcher url: https://pcddb.cryst.bbk.ac.uk/ - description: The Protein Data Bank (PDB) + id: pdb name: PDB registry: biotools: pdb tess: PDB - related_pages: - - researcher - - idp - - struct_bioinfo url: https://www.wwpdb.org/ -- description: Prototype archiving system for structural models obtained using integrative - or hybrid modeling +- description: Prototype archiving system for structural models obtained using integrative or hybrid modeling + id: pdb-dev name: PDB-Dev - related_pages: - - biomol_sim - - struct_bioinfo url: https://pdb-dev.wwpdb.org/ -- description: Information about the standard PDB archive format PDBx/mmCIF, its dictionaries - and related software tools +- description: Information about the standard PDB archive format PDBx/mmCIF, its dictionaries and related software tools + id: pdbx-mmcif-format-and-tools name: PDBx/mmCIF format and tools registry: fairsharing: fd28en - related_pages: - - struct_bioinfo url: https://mmcif.wwpdb.org/ -- description: Extension of the PDBx/mmCIF dictionary for theoretical models of macromolecular - structures +- description: Extension of the PDBx/mmCIF dictionary for theoretical models of macromolecular structures + id: pdbx-mmcif-modelcif-extension-dictionary name: PDBx/mmCIF ModelCIF Extension Dictionary - related_pages: - - struct_bioinfo url: https://mmcif.wwpdb.org/dictionaries/mmcif_ma.dic/Index/ -- description: A resource that curates knowledge about the impact of genetic variation - on drug response. +- description: Database of multi-organism, publicly accessible compendium of peptides identified + in a large set of tandem mass spectrometry proteomics experiments. + id: peptideatlas + name: PeptideAtlas + url: https://peptideatlas.org/ +- description: A resource that curates knowledge about the impact of genetic variation on drug response. + id: pharmgkb name: PharmGKB registry: biotools: pharmgkb - related_pages: - - toxicology_data url: https://www.pharmgkb.org/ -- description: Pharos provides hazard, use, and exposure information on 140,872 chemicals - and 180 different kinds of building products. +- description: Pharos provides hazard, use, and exposure information on 140,872 chemicals and 180 different kinds of building products. + id: pharos name: Pharos registry: biotools: pharos - related_pages: - - toxicology_data url: https://pharosproject.net/ -- description: The open-source Phenotyping Hybrid Information System (PHIS) manages - and collects data from plants phenotyping and high throughput phenotyping experiments - on a day to day basis. +- description: The open-source Phenotyping Hybrid Information System (PHIS) manages and collects data from plants phenotyping and high throughput phenotyping experiments on a day to day basis. + id: phis name: PHIS registry: tess: PHIS - related_pages: - - plant_pheno_assembly - - plants - - ifb url: http://www.phis.inra.fr/ -- description: PIA is a toolbox for mass spectrometrey based protein inference and - identification analysis. +- description: PIA is a toolbox for mass spectrometrey based protein inference and identification analysis. + id: pia-protein-inference-algorithms name: PIA - Protein Inference Algorithms registry: biotools: pia - related_pages: - - data_analysis - - researcher - - proteomics url: https://github.com/mpc-bioinformatics/pia -- description: A data management solution for intra-institutional organization and - structured storage of life science project-associated research data, with emphasis - on the generation of adequate metadata. +- description: A data management solution for intra-institutional organization and structured storage of life science project-associated research data, with emphasis on the generation of adequate metadata. + id: pisa-tree name: pISA-tree registry: biotools: pisa-tree - related_pages: - - micro_biotech - - researcher - - data_manager - - data_organisation - - metadata - - plant_pheno_assembly - - plant_geno_assembly url: https://github.com/NIB-SI/pISA-tree -- description: Access point for plant comparative genomics, centralizing genomic data - produced by different genome sequencing initiatives. +- description: Access point for plant comparative genomics, centralizing genomic data produced by different genome sequencing initiatives. + id: plaza name: PLAZA registry: fairsharing: wBOua0 tess: PLAZA - related_pages: - - plants - - plant_geno_assembly - - researcher url: https://bioinformatics.psb.ugent.be/plaza/ -- description: Platform for the study of the impact of pathological mutations in protein - stuctures. +- description: Platform for the study of the impact of pathological mutations in protein stuctures. + id: pmut name: PMut registry: biotools: pmut - related_pages: - - data_analysis - - human_data url: http://mmb.irbbarcelona.org/PMut - description: PRoteomics IDEntifications (PRIDE) Archive database + id: pride name: PRIDE registry: biotools: pride fairsharing: e1byny tess: PRIDE - related_pages: - - proteomics url: https://www.ebi.ac.uk/pride/ -- description: ProteomeXchange provides globally coordinated standard data submission - and dissemination pipelines +- description: Main tool used to submit proteomics datasets to PRIDE Archive + id: pride-submission-tool + name: PRIDE Submission Tool + url: https://www.ebi.ac.uk/pride/markdownpage/pridesubmissiontool +- description: ProteomeXchange provides globally coordinated standard data submission and dissemination pipelines + id: proteomexchange name: ProteomeXchange registry: biotools: proteomexchange fairsharing: 92dt9d tess: ProteomeXchange - related_pages: - - proteomics url: http://www.proteomexchange.org/ -- description: The HUPO Proteomics Standards Initiative defines community standards - for data representation in proteomics and interactomics to facilitate data comparison, - exchange and verification. - name: Proteomics Standards Initiative - registry: - fairsharing: 46e1e9 - related_pages: - - proteomics +- description: The HUPO Proteomics Standards Initiative defines community standards for data representation in proteomics and interactomics to facilitate data comparison, exchange and verification. + id: proteomics-standards-initiative + name: HUPO Proteomics Standards Initiative url: https://www.psidev.info/ - description: A secure platform for developing and sharing reproducible methods. + id: protocols-io name: protocols.io registry: fairsharing: 132b10 - related_pages: - - micro_biotech url: https://www.protocols.io -- description: PROV-DM is the conceptual data model that forms a basis for the W3C - provenance (PROV) family of specifications. +- description: PROV-DM is the conceptual data model that forms a basis for the W3C provenance (PROV) family of specifications. + id: prov-dm-the-prov-data-model name: 'PROV-DM: The PROV Data Model' - related_pages: - - data_provenance url: https://www.w3.org/TR/prov-dm/ -- description: R Markdown documents are fully reproducible. Use a productive notebook - interface to weave together narrative text and code to produce elegantly formatted - output. Use multiple languages including R, Python, and SQL. +- description: R Markdown documents are fully reproducible. Use a productive notebook interface to weave together narrative text and code to produce elegantly formatted output. Use multiple languages including R, Python, and SQL. + id: r-markdown name: R Markdown registry: tess: R Markdown - related_pages: - - data_analysis - - researcher url: https://rmarkdown.rstudio.com -- description: The RD-Connect GPAP is an online tool for diagnosis and gene discovery - in rare disease research. +- description: The RD-Connect GPAP is an online tool for diagnosis and gene discovery in rare disease research. + id: rd-connect-genome-phenome-analysis-platform name: RD-Connect Genome Phenome Analysis Platform registry: tess: RD-Connect Genome Phenome Analysis Platform - related_pages: - - researcher - - human_data - - rare_disease url: https://rd-connect.eu/what-we-do/omics/gpap/ - description: Directory of standard metadata, divided into different research areas + id: rda-standards name: RDA Standards - related_pages: - - metadata - - researcher - - data_manager url: https://rd-alliance.github.io/metadata-directory/standards/ - description: Registry of Research Data Repositories + id: re3data name: re3data registry: tess: re3data - related_pages: - - existing_data - - data_publication url: https://www.re3data.org/ -- description: Portal with public data submitted to ECHA in REACH registration dossiers - by substance manufacturers, importers, or their representatives, as laid out by - the REACH Regulation (see Understanding REACH regulation). +- description: Portal with public data submitted to ECHA in REACH registration dossiers by substance manufacturers, importers, or their representatives, as laid out by the REACH Regulation (see Understanding REACH regulation). + id: reach-registered-substances name: REACH registered substances - related_pages: - - toxicology_data url: https://echa.europa.eu/information-on-chemicals/registered-substances -- description: REDCap is a secure web application for building and managing online - surveys and databases. While REDCap can be used to collect virtually any type - of data in any environment, it is specifically geared to support online and offline - data capture for research studies and operations. +- description: REDCap is a secure web application for building and managing online surveys and databases. While REDCap can be used to collect virtually any type of data in any environment, it is specifically geared to support online and offline data capture for research studies and operations. + id: redcap name: REDCap registry: biotools: redcap tess: REDCap - related_pages: - - identifiers - - it_support - - data_manager - - data_quality url: https://projectredcap.org -- description: Database of A-to-I (deamination of adenosines to inosines) events that - enables to search RNA editing sites by genomic region, gene name and other relevant - features as the tissue of origin. +- description: Database of A-to-I (deamination of adenosines to inosines) events that enables to search RNA editing sites by genomic region, gene name and other relevant features as the tissue of origin. + id: rediportal name: REDIportal registry: biotools: rediportal - related_pages: - - epitrans url: http://srv00.recas.ba.infn.it/atlas/ - description: Python scripts to detect RNA editing events in RNAseq experiments + id: reditools name: REDItools registry: biotools: reditools - related_pages: - - epitrans url: https://github.com/BioinfoUNIBA/REDItools - description: REDItools2 is the optimized, parallel multi-node version of REDItools. + id: reditools2 name: REDItools2 - related_pages: - - epitrans url: https://github.com/BioinfoUNIBA/REDItools2 -- description: "REMS (Resource Entitlement Management System), developed by CSC, is\ - \ a tool that can be used to manage researchers\u2019 access rights to datasets." +- description: REMS (Resource Entitlement Management System), developed by CSC, is a tool that can be used to manage researchers’ access rights to datasets. + id: rems name: REMS registry: biotools: rems tess: REMS - related_pages: - - it_support - - transmed url: https://github.com/CSCfi/rems - description: File renaming software for Mac + id: renamer4mac name: Renamer4Mac - related_pages: - - data_organisation - - data_manager - - researcher url: https://renamer.com/ -- description: Repository Finder can help you find an appropriate repository to deposit - your research data. The tool is hosted by DataCite and queries the re3data registry - of research data repositories. +- description: Repository Finder can help you find an appropriate repository to deposit your research data. The tool is hosted by DataCite and queries the re3data registry of research data repositories. + id: repository-finder name: Repository Finder - related_pages: - - data_publication - - researcher - - data_manager url: https://repositoryfinder.datacite.org -- description: Supports the systematic planning, organisation and implementation of - research data management throughout the course of a project +- description: Supports the systematic planning, organisation and implementation of research data management throughout the course of a project + id: research-data-management-organiser name: Research Data Management Organiser - related_pages: - - dmp - - researcher - - data_manager - - it_support url: https://rdmorganiser.github.io/en -- description: Data management platform for automated loading, storage, linkage and - provision of data sets +- description: Data management platform for automated loading, storage, linkage and provision of data sets + id: research-data-management-platform name: Research Data Management Platform (RDMP) registry: biotools: rdmp - related_pages: - - storage - - it_support url: https://www.dundee.ac.uk/hic/data-team/researchdatamanagementplatform/ - description: Machine actionable DMPs. + id: research-management-plan name: Research Management Plan - related_pages: - - dmp - - researcher - - data_manager url: https://researcheracademy.elsevier.com/research-preparation/research-data-management/creating-good-research-data-management-plan -- description: RO-Crate is a lightweight approach to packaging research data with - their metadata, using schema.org. An RO-Crate is a structured archive of all the - items that contributed to the research outcome, including their identifiers, provenance, - relations and annotations. +- description: RO-Crate is a lightweight approach to packaging research data with their metadata, using schema.org. An RO-Crate is a structured archive of all the items that contributed to the research outcome, including their identifiers, provenance, relations and annotations. + id: research-object-crate name: Research Object Crate (RO-Crate) registry: fairsharing: wUoZKE - related_pages: - - metadata - - storage - - data_organisation - - data_manager - - researcher - - micro_biotech - - machine_actionability - - data_provenance url: https://w3id.org/ro/crate - description: Reva connects cloud storages and application providers + id: reva name: Reva registry: biotools: reva tess: Reva - related_pages: - - data_analysis - - transfer url: https://reva.link/ -- description: RightField is an open-source tool for adding ontology term selection - to Excel spreadsheets +- description: RightField is an open-source tool for adding ontology term selection to Excel spreadsheets + id: rightfield name: Rightfield registry: biotools: rightfield - related_pages: - - researcher - - metadata - - data_manager - - micro_biotech - - identifiers - - machine_actionability url: https://rightfield.org.uk - description: Rstudio notebooks allow to share code, documentation + id: rstudio name: Rstudio registry: biotools: rstudio tess: Rstudio - related_pages: - - data_analysis - - it_support - - researcher url: https://rstudio.com - description: Rucio - Scientific Data Management + id: rucio name: Rucio - related_pages: - - storage - - data_analysis - - transfer url: https://rucio.cern.ch/ -- description: RxNorm is a normalized naming system for medications that is maintained - by the National Library of Medicine. Rxnorm provides unique identifiers and allows - unambiguous communication of drug-related information across the American health - computer systems. +- description: RxNorm is a normalized naming system for medications that is maintained by the National Library of Medicine. Rxnorm provides unique identifiers and allows unambiguous communication of drug-related information across the American health computer systems. + id: rxnorm name: RxNorm registry: biotools: rxnorm fairsharing: 36pf8q - related_pages: - - toxicology_data url: https://www.nlm.nih.gov/research/umls/rxnorm/index.html -- description: SalDB is a salmon specific database of genome sequenced prokaryotes - representing the microbiota of fishes found in the taxonomic family of Salmonidae. +- description: SalDB is a salmon specific database of genome sequenced prokaryotes representing the microbiota of fishes found in the taxonomic family of Salmonidae. + id: saldb name: salDB - related_pages: - - marine_assembly url: https://mmp.sfb.uit.no/databases/saldb/ - description: Small Angle Scattering Biological Data Bank + id: sasbdb name: SASBDB - related_pages: - - idp - - researcher url: https://www.sasbdb.org/ -- description: A standard library of visual glyphs used to represent SBOL designs - and interactions. +- description: A standard library of visual glyphs used to represent SBOL designs and interactions. + id: sbol-visual name: SBOL Visual - related_pages: - - micro_biotech url: https://sbolstandard.org/visual-glyphs/ - description: A CAD tool to create SBOL designs through the use of SBOL Visual glyphs. + id: sboldesigner name: SBOLDesigner - related_pages: - - micro_biotech url: https://sboldesigner.github.io -- description: Schema.org is a collaborative, community activity with a mission to - create, maintain, and promote schemas for structured data on the Internet, on - web pages, in email messages, and beyond. +- description: Schema.org is a collaborative, community activity with a mission to create, maintain, and promote schemas for structured data on the Internet, on web pages, in email messages, and beyond. + id: schema-org name: Schema.org registry: fairsharing: hzdzq8 tess: Schema.org - related_pages: - - machine_actionability - - it_support url: https://schema.org -- description: ScienceMesh - frictionless scientific collaboration and access to research - services +- description: ScienceMesh - frictionless scientific collaboration and access to research services + id: sciencemesh name: ScienceMesh - related_pages: - - storage - - data_analysis - - transfer url: https://sciencemesh.io/ -- description: List of respositories recommended by Scientific Data, contains both - discipline-specific and general repositories. +- description: List of respositories recommended by Scientific Data, contains both discipline-specific and general repositories. + id: scientific-data-s-recommended-repositories name: Scientific Data's Recommended Repositories - related_pages: - - data_publication - - researcher - - data_manager - - it_support url: https://www.nature.com/sdata/policies/repositories - description: SeaFile File Synchronization and Share Solution + id: seafile name: SeaFile - related_pages: - - storage - - transfer url: https://www.seafile.com/ -- description: A semantic data model describing the common data elements for rare - diseases registration. +- description: A semantic data model describing the common data elements for rare diseases registration. + id: semantic-data-model-of-the-set-of-common-data-elements-for-rare-diseases-registration name: Semantic data model of the set of common data elements for rare diseases registration - related_pages: - - rare_disease url: https://github.com/ejp-rd-vp/CDE-semantic-model -- description: All-in-one platform for life science data management, semantic data - integration, data analysis and visualization +- description: All-in-one platform for life science data management, semantic data integration, data analysis and visualization + id: semares name: Semares - related_pages: - - researcher - - data_manager - - metadata - - data_analysis - - it_support - - storage url: https://www.genevention.com/products -- description: Contains 16 data elements to be registered by each rare disease registry - across Europe, which are considered to be essential for further research. +- description: Contains 16 data elements to be registered by each rare disease registry across Europe, which are considered to be essential for further research. + id: set-of-common-data-elements-for-rare-diseases-registration name: Set of common data elements for rare diseases registration - related_pages: - - rare_disease url: https://eu-rd-platform.jrc.ec.europa.eu/set-of-common-data-elements_en -- description: A scripting language for creating Synthetic Biology Open Language (SBOL) - in a more abstract way. +- description: A scripting language for creating Synthetic Biology Open Language (SBOL) in a more abstract way. + id: shortbol name: ShortBOL - related_pages: - - micro_biotech url: http://shortbol.org - description: Structure integration with function, taxonomy and sequence + id: sifts name: SIFTS - related_pages: - - researcher - - idp url: https://www.ebi.ac.uk/pdbe/docs/sifts/ -- description: SILVA provides comprehensive, quality checked and regularly updated - datasets of aligned small (16S/18S, SSU) and large subunit (23S/28S, LSU) ribosomal - RNA (rRNA) sequences for all three domains of life (Bacteria, Archaea and Eukarya). +- description: SILVA provides comprehensive, quality checked and regularly updated datasets of aligned small (16S/18S, SSU) and large subunit (23S/28S, LSU) ribosomal RNA (rRNA) sequences for all three domains of life (Bacteria, Archaea and Eukarya). + id: silva name: Silva registry: biotools: silva @@ -2758,357 +1833,487 @@ tess: Silva url: https://www.arb-silva.de/ - description: Singularity is a container platform. + id: singularity name: Singularity registry: tess: Singularity - related_pages: - - it_support - - data_analysis - - tsd url: https://sylabs.io -- description: SMASCH (Smart Scheduling) system, is a web-based tooldesigned for longitudinal - clinical studies requiring recurrent follow-upvisits of the participants. SMASCH - controls and simplifies the scheduling of big database of patients. Smasch is - also used to organize the daily plannings (delegation of tasks) for the different - medical professionals such as doctors, nurses and neuropsychologists. +- description: Freely-available, open-source Windows client application for building Selected + Reaction Monitoring (SRM) / Multiple Reaction Monitoring (MRM), Parallel Reaction Monitoring (PRM), + DIA/SWATH and targeted DDA quantitative methods and analyzing the resulting mass spectrometer data. + id: skyline + name: Skyline + registry: + biotools: skyline + tess: Skyline + url: https://skyline.ms/project/home/begin.view +- description: SMASCH (Smart Scheduling) system, is a web-based tooldesigned for longitudinal clinical studies requiring recurrent follow-upvisits of the participants. SMASCH controls and simplifies the scheduling of big database of patients. Smasch is also used to organize the daily plannings (delegation of tasks) for the different medical professionals such as doctors, nurses and neuropsychologists. + id: smasch name: SMASCH - related_pages: - - data_organisation - - transmed url: https://smasch.pages.uni.lu - description: Snakemake is a framework for data analysis workflow execution + id: snakemake name: Snakemake registry: biotools: snakemake tess: Snakemake - related_pages: - - it_support - - data_analysis url: https://snakemake.github.io - description: Added-value database for biological dynamics images + id: ssbd-database name: SSBD:database registry: fairsharing: we2r5a - related_pages: - - bioimaging_data - - data_publication url: https://ssbd.riken.jp/database/ -- description: An open data archive that stores and publishes bioimaging and biological - quantitative datasets +- description: An open data archive that stores and publishes bioimaging and biological quantitative datasets + id: ssbd-repository name: SSBD:repository - related_pages: - - bioimaging_data - - data_publication url: https://ssbd.riken.jp/repository/ - description: Resource of standards for reporting enzyme data + id: standards-for-reporting-enzyme-data name: Standards for Reporting Enzyme Data (STRENDA) - related_pages: - - micro_biotech url: https://www.beilstein-institut.de/en/projects/strenda/ - description: Known and predicted protein-protein interactions. + id: string name: STRING registry: biotools: string fairsharing: 9b7wvk tess: STRING - related_pages: - - proteomics url: https://string-db.org/ - description: A searchable design repository for biological constructs + id: synbiohub name: SynBioHub registry: biotools: synbiohub fairsharing: 7CVoS6 - related_pages: - - micro_biotech url: https://synbiohub.org -- description: An open standard for the representation of in silico biological designs - and their place in the Design-Build-Test-Learn cycle of synthetic biology. +- description: An open standard for the representation of in silico biological designs and their place in the Design-Build-Test-Learn cycle of synthetic biology. + id: synthetic-biology-open-language name: Synthetic Biology Open Language (SBOL) registry: biotools: sbol - related_pages: - - micro_biotech url: https://sbolstandard.org - description: An open format for computational models of biological processes + id: systems-biology-markup-language name: Systems Biology Markup Language (SBML) registry: biotools: sbml - related_pages: - - micro_biotech url: http://sbml.org/Main_Page -- description: The Toxin and Toxin Target Database is a bioinformatics resource that - combines exhaustive toxin data with toxin target information. Currently it presents - more than 42,000 toxin-target associations extracted from other databases, government - documents, books and scientific literature. Each toxin record includes data on - chemical properties and descriptors, toxicity values and medical information. +- description: The Toxin and Toxin Target Database is a bioinformatics resource that combines exhaustive toxin data with toxin target information. Currently it presents more than 42,000 toxin-target associations extracted from other databases, government documents, books and scientific literature. Each toxin record includes data on chemical properties and descriptors, toxicity values and medical information. + id: t3db name: T3DB registry: biotools: t3db - related_pages: - - toxicology_data url: http://www.t3db.ca/ - description: Talend is an open source data integration platform. + id: talend name: Talend - related_pages: - - data_manager - - researcher - - transmed url: https://www.talend.com/ -- description: A toxicogenomics database that stores gene expression data and biochemistry, - hematology, and histopathology findings derived from in vivo (rat) and in vitro - (primary rat hepatocytes, primary human hepatocytes) exposure to 170 compounds - at multiple dosages and time points. +- description: A toxicogenomics database that stores gene expression data and biochemistry, hematology, and histopathology findings derived from in vivo (rat) and in vitro (primary rat hepatocytes, primary human hepatocytes) exposure to 170 compounds at multiple dosages and time points. + id: tg-gates name: TG-GATES registry: biotools: open_tg-gates - related_pages: - - toxicology_data url: https://toxico.nibiohn.go.jp/english/ - description: An ontology for expressing environmental terms + id: the-environment-ontology name: The Environment Ontology (EnvO) - related_pages: - - micro_biotech url: https://sites.google.com/site/environmentontology/ -- description: EGA is a service for permanent archiving and sharing of all types of - personally identifiable genetic and phenotypic data resulting from biomedical - research projects +- description: EGA is a service for permanent archiving and sharing of all types of personally identifiable genetic and phenotypic data resulting from biomedical research projects + id: the-european-genome-phenome-archive name: The European Genome-phenome Archive (EGA) registry: biotools: ega fairsharing: mya1ff tess: The European Genome-phenome Archive (EGA) - related_pages: - - data_publication - - human_data - - policy_officer - - csc - - tsd url: https://ega-archive.org/ -- description: The Genomic Standards Consortium (GSC) is an open-membership working - body enabling genomic data integration, discovery and comparison through international - community-driven standards. - name: The Genomic Standards Consortium (GSC) +- description: The Genomic Standards Consortium (GSC) is an open-membership working body enabling genomic data integration, discovery and comparison through international community-driven standards. + id: genomic-standards-consortium + name: Genomic Standards Consortium (GSC) registry: + biotools: NA fairsharing: 9aa0zp - related_pages: - - metadata - - researcher - - it_support - - policy_officer - - human_data - url: https://www.gensc.org/pages/about.html -- description: Collaborative effort to develob interoperable ontologies for the biological - sciences + url: https://www.gensc.org/ +- description: Collaborative effort to develob interoperable ontologies for the biological sciences + id: the-open-biological-and-biomedical-ontology-foundry name: The Open Biological and Biomedical Ontology (OBO) Foundry registry: fairsharing-coll: bsg-d001083 - related_pages: - - metadata - - data_manager - - researcher url: http://obofoundry.org -- description: The Toxicology in the 21st Century program, or Tox21, is a unique collaboration - between several federal agencies to develop new ways to rapidly test whether substances - adversely affect human health. The Tox21 Toolbox contains data-analysis tools - for accessing and visualizing Tox21 quantitative high-throughput screening (qHTS) - 10K library data, as well as integrating with other publicly available data. +- description: The Toxicology in the 21st Century program, or Tox21, is a unique collaboration between several federal agencies to develop new ways to rapidly test whether substances adversely affect human health. The Tox21 Toolbox contains data-analysis tools for accessing and visualizing Tox21 quantitative high-throughput screening (qHTS) 10K library data, as well as integrating with other publicly available data. + id: tox21-toolbox name: Tox21_Toolbox - related_pages: - - toxicology_data url: https://ntp.niehs.nih.gov/whatwestudy/tox21/toolbox/index.html -- description: The Toxicology in the 21st Century program, or Tox21, is a unique collaboration - between several federal agencies to develop new ways to rapidly test whether substances - adversely affect human health. This portal contains diverse downloadable results - of the ToxCast project. +- description: The Toxicology in the 21st Century program, or Tox21, is a unique collaboration between several federal agencies to develop new ways to rapidly test whether substances adversely affect human health. This portal contains diverse downloadable results of the ToxCast project. + id: toxcast-data name: ToxCast_data - related_pages: - - toxicology_data url: https://www.epa.gov/chemical-research/exploring-toxcast-data-downloadable-data -- description: The Toxicology Data Network (TOXNET) was a portal that allowed access - to several relevant sources in the toxicological field. Nowadays, these sources - have been integrated into other NLM resources. +- description: The Toxicology Data Network (TOXNET) was a portal that allowed access to several relevant sources in the toxicological field. Nowadays, these sources have been integrated into other NLM resources. + id: toxnet name: TOXNET - related_pages: - - toxicology_data url: https://www.nlm.nih.gov/toxnet/index.html -- description: Knowledge management and high-content analysis platform enabling analysis - of integrated data for the purposes of hypothesis generation, hypothesis validation, - and cohort discovery in translational research. +- description: Knowledge management and high-content analysis platform enabling analysis of integrated data for the purposes of hypothesis generation, hypothesis validation, and cohort discovery in translational research. + id: transmart name: tranSMART registry: biotools: transmart - related_pages: - - researcher - - data_manager - - data_analysis - - storage - - transmed url: https://github.com/transmart -- description: A list of Ethical, Legal, and Societal Implications (ELSI) to consider - for research projects on human subjects +- description: A list of Ethical, Legal, and Societal Implications (ELSI) to consider for research projects on human subjects + id: tryggve-elsi-checklist name: Tryggve ELSI Checklist - related_pages: - - sensitive - - policy_officer - - data_manager - - human_data - - nels - - csc - - tsd - - data_protection - url: https://scilifelab-data-guidelines.readthedocs.io/en/latest/docs/general/sensitive_data.html -- description: TU Delft costing tool helps to budget for data management personnel - costs in proposals. + url: https://neic.no/tryggve/links/ +- description: TU Delft costing tool helps to budget for data management personnel costs in proposals. + id: tu-delft-data-management-costing-tool name: TU Delft data management costing tool - related_pages: - - costs url: https://www.tudelft.nl/en/library/research-data-management/r/plan/data-management-costs -- description: A tool that contains weighted gene co-expression networks obtained - from the Primary Human Hepatocytes, rat kidney, and liver TG-GATEs dataset. +- description: A tool that contains weighted gene co-expression networks obtained from the Primary Human Hepatocytes, rat kidney, and liver TG-GATEs dataset. + id: txg-mapr name: TXG-MAPr registry: biotools: txg-mapr - related_pages: - - data_analysis - - researcher - - toxicology_data url: https://txg-mapr.eu/ - description: UK Data Service activity-based costing tool. + id: uk-data-service-data-management-costing-tool name: UK Data Service Data Management costing Tool - related_pages: - - costs url: https://ukdataservice.ac.uk/learning-hub/research-data-management/plan-to-share/costing/ -- description: The Unified Medical Language System (UMLS) is a set of tools that establishes - a mapping structure among different vocabularies in the biomedical sciences field - to enable interoperativity between computer systems. +- description: The Unified Medical Language System (UMLS) is a set of tools that establishes a mapping structure among different vocabularies in the biomedical sciences field to enable interoperativity between computer systems. + id: umls name: UMLS - related_pages: - - toxicology_data url: https://www.nlm.nih.gov/research/umls/index.html -- description: 'UniChem is a very simple, large-scale non-redundant database of pointers - between chemical structures and EMBL-EBI chemistry resources. Primarily, this - service has been designed to maintain cross references between EBI chemistry resources. - These include primary chemistry resources (ChEMBL and ChEBI), and other resources - where the main focus is not small molecules, but which may nevertheless contain - some small molecule information (eg: Gene Expression Atlas, PDBe).' +- description: 'UniChem is a very simple, large-scale non-redundant database of pointers between chemical structures and EMBL-EBI chemistry resources. Primarily, this service has been designed to maintain cross references between EBI chemistry resources. These include primary chemistry resources (ChEMBL and ChEBI), and other resources where the main focus is not small molecules, but which may nevertheless contain some small molecule information (eg: Gene Expression Atlas, PDBe).' + id: unichem name: UniChem registry: tess: UniChem - related_pages: - - toxicology_data url: https://www.ebi.ac.uk/unichem/ +- description: Protein modification for mass spectrometry + id: unimod + name: Unimod + url: https://www.unimod.org - description: Comprehensive resource for protein sequence and annotation data + id: uniprot name: UniProt registry: biotools: uniprot fairsharing: s1ne3g tess: UniProt - related_pages: - - metadata - - researcher - - idp - - micro_biotech - - proteomics - - struct_bioinfo url: https://www.uniprot.org/ - description: List of Electronic Research Notebook Products by University of Cambridge + id: university-of-cambridge-electronic-research-notebook-products name: University of Cambridge - Electronic Research Notebook Products - related_pages: - - metadata - - identifiers - - researcher - - data_manager url: https://www.data.cam.ac.uk/data-management-guide/electronic-research-notebooks/electronic-research-notebook-products - description: A JavaScript library for the visualisation of SBOL. + id: visbol name: VisBOL - related_pages: - - micro_biotech url: http://visbol.org -- description: Wellcome Open Research requires that the source data underlying the - results are made available as soon as an article is published. This page provides - information about data you need to include, where your data can be stored, and - how your data should be presented. +- description: Wellcome Open Research requires that the source data underlying the results are made available as soon as an article is published. This page provides information about data you need to include, where your data can be stored, and how your data should be presented. + id: wellcome-open-research-data-guidelines name: Wellcome Open Research - Data Guidelines - related_pages: - - data_publication - - researcher - - data_manager url: https://wellcomeopenresearch.org/for-authors/data-guidelines -- description: WinSCP is a popular SFTP client and FTP client for Microsoft Windows! - Copy file between a local computer and remote servers using FTP, FTPS, SCP, SFTP, - WebDAV or S3 file transfer protocols. +- description: WinSCP is a popular SFTP client and FTP client for Microsoft Windows! Copy file between a local computer and remote servers using FTP, FTPS, SCP, SFTP, WebDAV or S3 file transfer protocols. + id: winscp name: WinSCP - related_pages: - - transfer - - it_support url: https://winscp.net/eng/index.php -- description: WorkflowHub is a registry for describing, sharing and publishing scientific - computational workflows. +- description: WorkflowHub is a registry for describing, sharing and publishing scientific computational workflows. + id: workflowhub name: WorkflowHub registry: biotools: workflowhub fairsharing: 07cf72 tess: WorkflowHub - related_pages: - - data_publication - - data_manager - - researcher url: https://workflowhub.eu -- description: Open source imaging informatics platform. It facilitates common management, - productivity, and quality assurance tasks for imaging and associated data. +- description: Open source imaging informatics platform. It facilitates common management, productivity, and quality assurance tasks for imaging and associated data. + id: xnat name: XNAT - related_pages: - - researcher - - data_analysis - - transmed - - xnat-pic - - bioimaging_data url: https://www.xnat.org/ -- description: Analysing of single or multiple subjects within the same project in - XNAT +- description: Analysing of single or multiple subjects within the same project in XNAT + id: xnat-pic-pipelines name: XNAT-PIC Pipelines - related_pages: - - researcher - - data_manager - - data_analysis - - xnat-pic url: https://github.com/szullino/XNAT-PIC - description: Import tool for multimodal DICOM image datasets to XNAT + id: xnat-pic-uploader name: XNAT-PIC Uploader - related_pages: - - researcher - - data_manager - - xnat-pic url: https://github.com/szullino/XNAT-PIC -- description: Generalist research data repository built and developed by OpenAIRE - and CERN +- description: Generalist research data repository built and developed by OpenAIRE and CERN + id: zenodo name: Zenodo registry: fairsharing: wy4egf tess: Zenodo - related_pages: - - data_publication - - biomol_sim - - bioimaging_data - - plant_pheno_assembly url: https://zenodo.org/ - description: Find possible ontology mappings for free text terms in the ZOOMA repository. + id: zooma name: Zooma registry: biotools: zooma tess: Zooma - related_pages: - - metadata - - data_manager - - researcher url: https://www.ebi.ac.uk/spot/zooma/ - description: A comprehensive repository of biomedical ontologies + id: bioportal name: BioPortal registry: biotools: bioportal fairsharing: 4m97ah tess: BioPortal - related_pages: - - metadata url: https://bioportal.bioontology.org +- description: PROAST (copyright RIVM National Institute for Public Health and the Environment) is a software package for the statistical analysis of dose-response data. + id: proast + name: PROAST + url: https://www.rivm.nl/en/proast +- description: EPA's Benchmark Dose Software (BMDS) collects and provides easy access to numerous mathematical models that help risk assessors estimate the quantitative relationship between a chemical dose and the test subject’s response. + id: bmds + name: BMDS + url: https://www.epa.gov/bmds +- description: Differential gene expression analysis based on the negative binomial distribution + id: deseq2 + name: DESEq2 + registry: + biotools: deseq2 + tess: DESEq2 + url: https://bioconductor.org/packages/release/bioc/html/DESeq2.html +- description: Linear Models for Microarray Data + id: limma + name: limma + registry: + biotools: limma + tess: limma + url: https://bioconductor.org/packages/release/bioc/html/limma.html +- description: Flame is a flexible framework supporting predictive modeling and similarity search within the eTRANSAFE project. + id: flame + name: Flame + registry: + biotools: flame + url: https://github.com/phi-grib/flame +- description: CDISC SEND Controlled Terminology + id: cdisc-send + name: CDISC/SEND + url: https://evs.nci.nih.gov/ftp1/CDISC/SEND/SEND%20Terminology.html +- description: The OntoBrowser tool was developed to manage ontologies and code lists. + id: ontobrowser + name: ONTOBROWSER + registry: + biotools: ontobrowser + url: https://opensource.nibr.com/projects/ontobrowser/ +- description: ITER is an Internet database of human health risk values and cancer classifications for over 680 chemicals of environmental concern from multiple organizations worldwide. + id: iter + name: ITER + registry: + tess: ITER + url: https://www.tera.org/iter/ +- description: ERDRI.spider (Secure Privacy-preserving Identity management in Distributed Environments for Research) pseudonymisation tool generates pseudonyms for RD patients. + id: spider-pseudonymisation-tool + name: SPIDER pseudonymisation tool + url: https://eu-rd-platform.jrc.ec.europa.eu/spider/ +- description: Metadata for machines + id: fair-data-points + name: FAIR Data Points + registry: + tess: FAIR Data Points + url: https://home.fairdatapoint.org/ +- description: EJP RD - European Joint Programme on Rare Diseases - ERN Registries Generic Informed Consent Forms + id: ern-registries-generic-informed-consent-forms + name: ERN Registries Generic Informed Consent Forms + url: https://www.ejprarediseases.org/ern-registries-generic-icf/ +- description: ProteoWizard provides a set of open-source, cross-platform software libraries and tools that facilitate proteomics data analysis. + id: proteowizard + name: ProteoWizard + registry: + biotools: proteowizard + url: https://proteowizard.sourceforge.io/ +- description: The International Nucleotide Sequence Database Collaboration (INSDC) is a long-standing foundational initiative that operates between DDBJ, EMBL-EBI and NCBI. INSDC covers the spectrum of data raw reads, through alignments and assemblies to functional annotation, enriched with contextual information relating to samples and experimental configurations. + id: international-nucleotide-sequence-database-collaboration + name: International Nucleotide Sequence Database Collaboration + url: http://www.insdc.org/ +- description: A number of ECPGR Central Crop Databases have been established through the initiative of individual institutes and of ECPGR Working Groups. The databases hold passport data and, to varying degrees, characterization and primary evaluation data of the major collections of the respective crops in Europe. + id: ecpgr-central-crop-databases + name: ECPGR Central Crop Databases and other Crop Databases + url: https://www.ecpgr.cgiar.org/resources/germplasm-databases/ecpgr-central-crop-databases +- description: A catalogue of relevant International Multicrop Databases + id: international-multicrop-databases + name: International Multicrop Databases + url: https://www.ecpgr.cgiar.org/resources/germplasm-databases/international-multicrop-databases +- description: Genesys is an online platform where you can find information about Plant Genetic Resources for Food and Agriculture PGRFA conserved in genebanks worldwide. + id: genesys + name: Genesys + url: https://www.genesys-pgr.org/ +- description: This document describes the MIAPPE Checklist and Data Model + id: miappe-checklist-data-model + name: MIAPPE_Checklist-Data-Model + url: https://github.com/MIAPPE/MIAPPE/tree/master/MIAPPE_Checklist-Data-Model-v1.1 +- description: BioSamples Plant MIAPPE checklist in JSON format + id: plant-miappe-json + name: plant-miappe.json + url: https://www.ebi.ac.uk/biosamples/schemas/certification/plant-miappe.json +- description: An ecosystem for sharing and opening research data + id: recherche-data-gouv + name: Recherche Data Gouv + registry: + fairsharing: 59985a + url: https://recherche.data.gouv.fr/ +- description: Submit a new BrAPI compatible server + id: brapi-compatible-server + name: BrAPI compatible server + url: https://www.brapi.org/servers +- description: MIAPPE-compliant spreadsheet template + id: miappe-compliant-spreadsheet-template + name: MIAPPE-compliant spreadsheet template + url: https://github.com/MIAPPE/MIAPPE/raw/master/MIAPPE_Checklist-Data-Model-v1.1/MIAPPE_templates/MIAPPEv1.1_training_spreadsheet.xlsx +- description: The Common Workflow Language CWL is an emerging standard for writing workflows that are portable across multiple workflow engines and platforms. Toil has full support for the CWL v1.0, v1.1, and v1.2 standards. + id: cwl-in-toil + name: CWL in Toil + url: https://toil.readthedocs.io/en/latest/running/cwl.html +- description: This is the reference implementation of the Common Workflow Language open standards. It is intended to be feature complete and provide comprehensive validation of CWL files as well as provide other tools related to working with CWL. + id: cwltool + name: cwltool + url: https://pypi.org/project/cwltool/ +- description: Kernel Zero is IPython, which you can get through ipykernel, and is still a dependency of jupyter. The IPython kernel can be thought of as a reference implementation, as CPython is for Python. + id: jupyter-kernels + name: Jupyter kernels + url: https://github.com/jupyter/jupyter/wiki/Jupyter-kernels +- description: The Missing Package Manager for macOS or Linux + id: homebrew + name: Homebrew + registry: + tess: Homebrew + url: https://brew.sh/ +- description: A database to quickly access all tryptic peptides of the UniProtKB + id: macpepdb + name: Mass Centric Peptide Database + url: https://macpepdb.mpc.rub.de/ +- description: The MacPorts Project is an open-source community initiative to design an easy-to-use system for compiling, installing, and upgrading either command-line, X11 or Aqua based open-source software on the Mac operating system. + id: macports + name: MacPorts + url: https://www.macports.org +- description: The Package Manager for Windows + id: chocolatey + name: Chocolatey + url: https://chocolatey.org/ +- description: Windows Package Manager is a comprehensive package manager solution that consists of a command line tool and set of services for installing applications on Windows 10 and Windows 11. + id: windows-package-manager + name: Windows Package Manager + url: https://docs.microsoft.com/en-us/windows/package-manager/ +- description: udocker is a basic user tool to execute simple docker containers in user space without requiring root privileges. + id: udocker + name: udocker + url: https://indigo-dc.gitbook.io/udocker/ +- description: Manage containers, pods, and images with Podman. Seamlessly work with containers and Kubernetes from your local environment. + id: podman + name: Podman + url: https://podman.io/ +- description: Docker Hub is the world's easiest way to create, manage, and deliver your team's container applications. + id: docker-hub + name: Docker Hub + registry: + fairsharing: afc2b3 + tess: Docker Hub + url: https://hub.docker.com/ +- description: BioContainers Flow + id: biocontainers + name: BioContainers + registry: + biotools: biocontainers + tess: BioContainers + url: https://biocontainers.pro/ +- description: Volumes are the preferred mechanism for persisting data generated by and used by Docker containers. + id: volumes + name: Volumes + registry: + tess: Volumes + url: https://docs.docker.com/storage/volumes/ +- description: Docker can build images automatically by reading the instructions from a Dockerfile + id: dockerfile-reference + name: Dockerfile reference + url: https://docs.docker.com/engine/reference/builder/ +- description: Compose is a tool for defining and running multi-container Docker applications. + id: docker-compose-overview + name: Docker Compose overview + url: https://docs.docker.com/compose/ +- description: Kubernetes, also known as K8s, is an open-source system for automating deployment, scaling, and management of containerized applications. + id: kubernetes + name: Kubernetes + registry: + tess: Kubernetes + url: https://kubernetes.io/ +- description: Cromwell is a Workflow Management System geared towards scientific workflows. + id: cromwell + name: Cromwell + url: https://cromwell.readthedocs.io/en/stable/tutorials/Containers/ +- description: Essential scientific and technical information about software tools, databases and services for bioinformatics and the life sciences. + id: bio-tools + name: bio.tools + registry: + biotools: bio.tools + fairsharing: 63520c + tess: bio.tools + url: https://bio.tools/ +- description: Dockstore is a free and open source platform for sharing reusable and scalable analytical tools and workflows. It’s developed by the Cancer Genome Collaboratory and used by the GA4GH. + id: dockstore + name: Dockstore + registry: + biotools: dockstore + tess: Dockstore + url: https://dockstore.org +- description: LifeMonitor is a service to support the sustainability and reusability of published computational workflows. + id: life-monitor + name: Life-Monitor + registry: + tess: Life-Monitor + url: https://crs4.github.io/life_monitor/ +- description: Data Management Plans that meet institutional funder requirements. + id: dmponline + name: DMPonline + registry: + tess: DMPonline + url: https://dmponline.dcc.ac.uk +- description: Data Management Plan Generator + id: easy-dmp + name: easy.DMP + url: https://easydmp.no +- description: Gene expression across species and biological conditions + id: expression-atlas + name: Expression Atlas + registry: + fairsharing: f5zx00 + tess: Expression Atlas + url: https://www.ebi.ac.uk/gxa/home +- description: Accessible database designed by scientist for scientist, to improve the sharing of influenza data. + id: gisaid + name: GISAID + registry: + fairsharing: ba6a09 + url: https://www.gisaid.org +- description: Protein Data Bank in Europe Knowledge Base + id: pdbe-kb + name: PDBe-KB + registry: + biotools: pdbe-kb + tess: PDBe-KB + url: https://www.ebi.ac.uk/pdbe/pdbe-kb +- description: 3DBIONOTES-WS is a web application designed to automatically annotate biochemical and biomedical information onto structural models. + id: 3dbionotes + name: 3DBioNotes + registry: + biotools: 3dbionotes + tess: 3DBioNotes + url: http://3dbionotes-ws.cnb.csic.es/ +- description: A software to make biological search more integrated, intuitive and intelligent, enabling a better way to discover and share new insights. + id: knetminer + name: KnetMiner + registry: + biotools: knetminer + fairsharing: 826b4a + url: https://knetminer.com +- description: The RDF Knowledge-based Database for plant molecular networks + id: agronomic-linked-data + name: Agronomic Linked Data + registry: + fairsharing: ZPRtfG + url: http://agrold.southgreen.fr/agrold/ +- description: Interoperability in small molecules + id: integrated-database-of-small-molecules + name: Integrated Database of Small Molecules + url: https://idsm.elixir-czech.cz/ +- description: The OME Model is a specification for storing data on biological imaging. + id: ome-data-model-and-file-formats + name: OME Data Model and File Formats + url: https://docs.openmicroscopy.org/ome-model/ diff --git a/_sass/_custom_classes.scss b/_sass/_custom_classes.scss index 991d6c2e4..c2e35acdf 100644 --- a/_sass/_custom_classes.scss +++ b/_sass/_custom_classes.scss @@ -36,6 +36,7 @@ li ul .sidebar_rdm_sub { } #side-nav .btn.sidebar-title { + border: 0; border-bottom: 0.2rem solid $primary; font-weight: bold; padding-bottom: $input-btn-padding-y - 0.2rem; diff --git a/images/fr_ifb_assembly.svg b/images/fr_ifb_assembly.svg index 5ec77506f..5b52308a8 100644 --- a/images/fr_ifb_assembly.svg +++ b/images/fr_ifb_assembly.svg @@ -1 +1,1402 @@ -PlanResearch questionConceptualizationGrant ApplicationData Management PlanBudgetingCollectExperimentsMetadata trackingData integrationSample & Data provenanceData StorageIFB clusters Storage Analysis PipelinesISA FormatSample/Data provenance JWS online for SBML modelsSnapshots for publicationKnowledgeModellingPreserve & ShareDepositionDatabasesProcess & AnalyseComputingStatisticsFAIR-checker \ No newline at end of file + +PlanResearch questionConceptualizationGrant ApplicationData Management PlanBudgetingCollectExperimentsMetadata trackingData integrationSample & Data provenanceData StorageIFB clusters Storage Analysis PipelinesISA FormatSample/Data provenance JWS online for SBML modelsSnapshots for publicationKnowledgeModellingPreserve & ShareDepositionDatabasesProcess & AnalyseComputingStatisticsFAIR-checker diff --git a/images/plant_phenomics.svg b/images/plant_phenomics.svg index 70d198c6d..06dd6500a 100644 --- a/images/plant_phenomics.svg +++ b/images/plant_phenomics.svg @@ -23,8 +23,8 @@ inkscape:deskcolor="#d1d1d1" showgrid="false" inkscape:zoom="0.22225943" - inkscape:cx="1534.2431" - inkscape:cy="1759.2054" + inkscape:cx="980.83577" + inkscape:cy="1660.222" inkscape:window-width="1920" inkscape:window-height="1011" inkscape:window-x="0" @@ -1157,12 +1157,12 @@ y="112.913" id="tspan685">PHISPHIS + y="578.68298" /> diff --git a/index.html b/index.html index 8f606f5f9..871961cee 100644 --- a/index.html +++ b/index.html @@ -6,7 +6,6 @@ {% include head.html %} - {% if jekyll.environment == "development" %}{% include dev-info.html %}{% endif %} {% include topnav.html search=false %} @@ -81,7 +80,7 @@

We welcome contributors!

{% include news.html caption=true title=true limit=3 %}
- {% include events.html title=true event_type="upcoming_event"%} + {% include events.html title=true event_type="upcoming_event" %}
diff --git a/pages/about/editorial_board.md b/pages/about/editorial_board.md index 11e6b8ff5..ac494d537 100644 --- a/pages/about/editorial_board.md +++ b/pages/about/editorial_board.md @@ -4,7 +4,7 @@ title: Editorial board ## Meet the editorial board members -{% include contributor-carousel-selection.html custom="Bert Droesbeke, Carole Goble, Daniel Faria, Flora D'Anna, Frederik Coppens, Munazah Andrabi, Niclas Jareborg, Pinar Alper, Rob Hooft, Ulrike Wittig, Laura Portell Silva, Martin Cook, Korbinian Bösl, Federico Bianchini, Nazeefa Fatima, Ishwar Chandramouliswaran"%} +{% include contributor-carousel-selection.html custom="Bert Droesbeke, Carole Goble, Daniel Faria, Flora D'Anna, Frederik Coppens, Munazah Andrabi, Niclas Jareborg, Pinar Alper, Rob Hooft, Ulrike Wittig, Laura Portell Silva, Martin Cook, Korbinian Bösl, Federico Bianchini, Nazeefa Fatima, Ishwar Chandramouliswaran" %} ## Join as editorial board member diff --git a/pages/about/support.md b/pages/about/support.md index 90a348477..a616858f7 100644 --- a/pages/about/support.md +++ b/pages/about/support.md @@ -7,13 +7,13 @@ custom_editme: _data/affiliations.yaml We thank these institutions for their contribution. -{% include affiliation-tiles-selection.html type="institution"%} +{% include affiliation-tiles-selection.html type="institution" %} ## Projects We thank these projects for their efforts: -{% include affiliation-tiles-selection.html type="project"%} +{% include affiliation-tiles-selection.html type="project" %} ## Funders @@ -21,10 +21,10 @@ RDMkit is developed in ELIXIR-CONVERGE that received funding from the European U Additionally we thank the funders that supported some of our contributors. -{% include affiliation-tiles-selection.html type="funder"%} +{% include affiliation-tiles-selection.html type="funder" %} ## Infrastructures We thank these infrastructures for their efforts: -{% include affiliation-tiles-selection.html type="infrastructure"%} +{% include affiliation-tiles-selection.html type="infrastructure" %} diff --git a/pages/all_tools_and_resources.md b/pages/all_tools_and_resources.md index e731f3c60..f0d09b621 100644 --- a/pages/all_tools_and_resources.md +++ b/pages/all_tools_and_resources.md @@ -10,7 +10,7 @@ We link tools and resources to related information in ELIXIR registries: related It is recommended to add: - your training materials and events into the training registry [TeSS](https://tess.elixir-europe.org/about/registering) -- your standards, databases and policies in [FAIRsharing](https://fairsharing.org/new) registry +- your standards, databases and policies in [fairsharing](https://fairsharing.org/) registry - your software tools, databases and services for bioinformatics and the life sciences in [bio.tools](https://bio.tools) registry diff --git a/pages/contribute/editors_checklist.md b/pages/contribute/editors_checklist.md index 754d0eed7..3d03a504b 100644 --- a/pages/contribute/editors_checklist.md +++ b/pages/contribute/editors_checklist.md @@ -14,10 +14,10 @@ summary: Checklist for editors before approving and merging a pull request (PR). * `training` * `search_exclude` must be deleted * `description` - * `affiliation` + * `affiliations` * `coordinators`(only used in national pages + they must be listed as `contributors` as well) * `resources` -5. Items in the "[all tools and resources spreadsheet](https://docs.google.com/spreadsheets/d/16RESor_qQ_ygI0lQYHR23kbZJUobOWZUbOwhJbLptDE/edit#gid=268211668)" are tagged with already existing (merged) `page_id` from "Your role, Your domain, Your tasks, Tool assembly" and that Bert has been informed of the changes. +5. Make sure that listed tools or resources are tagged in the text with the [correct snippet](tool_resource_update) + that its metadata is described in the [tool_and_resource_list.yml](https://github.com/bedroesb/rdmkit/blob/demo/_data/tool_and_resource_list.yml) file. 6. Check if relevant recipes on FAIR Cookbook can be linked to the RDMkit page that is being added/changed ([Linking from RDMkit to FAIR Cookbook](editorial_board_guide#linking-from-rdmkit-to-fair-cookbook)). 7. The content is conform to RDMkit scope, [style](style_guide) and templates. 8. There are no [copyright](copyright) issues related to the content of the page. diff --git a/pages/contribute/page_metadata.md b/pages/contribute/page_metadata.md index 29ff71676..a610216e0 100644 --- a/pages/contribute/page_metadata.md +++ b/pages/contribute/page_metadata.md @@ -99,7 +99,7 @@ The main tools are described in the mains tools and resources table. How to add - name: Resource name description: A general description about the resource. how_to_access: explantation on how you can access this resource - instance_of: GitHub + instance_of: github related_pages: example_pages: [gp3, gp1, gp2] registry: diff --git a/pages/contribute/tool_resource_update.md b/pages/contribute/tool_resource_update.md index 252d113ed..35f5aec3a 100644 --- a/pages/contribute/tool_resource_update.md +++ b/pages/contribute/tool_resource_update.md @@ -1,52 +1,72 @@ --- -title: Updating the tool and resource list +title: Add new tool or resource summary: How to add a tool or resource to RDMkit --- ## Way of working -The tools or resources you will find on pages are a filtered set from a [bigger list](all_tools_and_resources). This filtering is done using page_id. If a tool or resource is tagged with for example the page_id `researcher`, it will be automatically listed on the corresponding page. +The tools or resources you will find on pages are a selected set from a [bigger list](all_tools_and_resources). This selection is based on the appearance of a tool or resource in the content of the page. -Since the `Data life cycle` pages are not listing tools, we do not allow page_id from this section in the tool table. page_id allowed in the tool table are page_id from the following sections: `Your domain`, `Your role`, `Your tasks` and `Tool assembly`. The page_id can be found in the [List of page IDs](website_overview). +Since the `Data life cycle` pages are not listing tools, no tools table will be present in these pages. Tool and resource mentions are allowed in the following sections: `Your domain`, `Your role`, `Your tasks` and `Tool assembly`. -The [all_tools_and_resources](all_tools_and_resources) list is based on the [csv file](https://github.com/elixir-europe/rdmkit/blob/master/_data/main_tool_and_resource_list.csv) in the `_data` directory of the RDMkit repository. Tools and resources can be manually linked to [FAIRsharing.org](https://fairsharing.org/), [Bio.tools](https://bio.tools) and [TeSS](https://tess.elixir-europe.org/), but every week we also run a fully automatic check that links tools and resources with the corresponding registries. A GitHub Bot will generate a Pull Request (PR) with the new links added to the main data file of the website (a yaml file). +The [all_tools_and_resources](all_tools_and_resources) list is based on the [yaml file](https://github.com/bedroesb/rdmkit/blob/demo/_data/tool_and_resource_list.yml) in the `_data` directory of the RDMkit repository. Tools and resources can be manually linked to [FAIRsharing.org](https://fairsharing.org/), [Bio.tools](https://bio.tools) and [TeSS](https://tess.elixir-europe.org/), but every week we also run a fully automatic check that links tools and resources with the corresponding registries. A GitHub Bot will generate a Pull Request (PR) with the new links added to the main data file of the website. -{% include callout.html type="important" content="The link with FAIRsharing,TeSS and Bio.tools is automatically done using GitHub actions and is weekly updated. These automatic links are not seen in the table. The search query to one of these registries for a tool or resource can be overwritten in the registry column of the main csv tool table. If no FAIRsharing ID, Bio.tools ID or TeSS Query is available for a source, but there is yet one automatically given (faulty), you can overwrite the automatic linking by adding 'NA' as registry." %} +{% include callout.html type="important" content="The link with FAIRsharing,TeSS and Bio.tools is automatically done using GitHub actions and is weekly updated. If no FAIRsharing ID, Bio.tools ID or TeSS Query is available for a source, but there is yet one automatically given (faulty), you can overwrite the automatic linking by adding 'NA' as registry." %} -## The main table +## The main yaml file - -The main table is based on [this google spreadsheet](https://docs.google.com/spreadsheets/d/16RESor_qQ_ygI0lQYHR23kbZJUobOWZUbOwhJbLptDE/edit#gid=268211668). - -The table consists of 5 columns: +Each tool or resource mentioned in the text has metadata stored in the [main yaml file](https://github.com/bedroesb/rdmkit/blob/demo/_data/tool_and_resource_list.yml). The metadata block for each tool consists of 5 attributes: +- **id**: The ID of a tool, in kebab-case, lowercase with hyphens. - **name**: the name of the tool or resource - **url**: URL to the main page of the tool or resource, make sure to let the URL start with `https://` - **description**: A short description of the tool or resource. Try to not use the characters `"` or `'` -- **registry**: 3 registries are supported: [Bio.tools](https://bio.tools), [FAIRsharing.org](https://fairsharing.org/) and [TeSS](https://tess.elixir-europe.org/). The keywords you can use respectively are: `biotools`, `fairsharing`, `fairsharing-coll` and `tess`, specifying the id or query with a colon). FAIRsharing collections have an ID that follows the pattern `bsg-s000XXX`. List multiple registries using a comma `, ` between the keywords to separate the key:value pairs. The values that are given in the table will always overrule the automatic links. If no FAIRsharing ID, Bio.tools ID or TeSS Query is available for a source, you can overwrite the automatic linking by adding 'NA' as registry. -- **related_pages**: This is used to tag the tools so it is listed on the correct page. We only allow page_id that are linked to a page. To find out what the page_id of a page is, please check its metadata attribute `page_id` at the top of the markdown file or the [List of page IDs](website_overview) page. Since the Data life cycle pages are not listing tools, we do not allow these page_id in the tool table. page_id allowed in the tool table are page_id from the following sections: `Your domain`, `Your role`, `Your tasks` and `Tool assembly`. List multiple page_id by using a comma `, ` between them. +- **registry**: 3 registries are supported: [Bio.tools](https://bio.tools), [FAIRsharing.org](https://fairsharing.org/) and [TeSS](https://tess.elixir-europe.org/). The keywords you can use respectively are: `biotools`, `fairsharing`, `fairsharing-coll` and `tess`, specifying the id or query with a colon. FAIRsharing collections have an ID that follows the pattern `bsg-s000XXX`. List registries under the `registry` attribute as `key: value pairs`. If no FAIRsharing ID, Bio.tools ID or TeSS Query is available for a source, you can overwrite the automatic linking by adding 'NA' as registry. +Example: -| name | url | description | registry | related_pages | -|----------|----------------------------------|-------------------------------------------------------------------------------------------|---------------------------------------------|--------------------------------------------------| -| Beacon | https://beacon-project.io/ | The Beacon protocol defines an open standard for genomics data discovery. | | researcher, data manager, IT support, human data | -| Bioconda | https://bioconda.github.io/ | Bioconda is a bioinformatics channel for the Conda package manager | biotools:bioconda | IT support, data analysis | -| BrAPI | https://www.brapi.org | Specification for a standard API for plant data: plant material, plant phenotyping data | | IT support, plants | -| Conda | https://docs.conda.io/en/latest/ | Open source package management system | | IT support, data analysis | -| COPO | https://copo-project.org/ | Portal for scientists to broker more easily rich metadata alongside data to public repos. | biotools:copo, fairsharing-coll:bsg-d001247 | metadata, researcher, plants | +```yml +- id: github + name: GitHub + url: https://github.com + description: + Versioning system, used for sharing code, as well as for sharing of + small data + registry: + tess: GitHub +``` ## What tool or resource can be added to the table -Tools and resources specifically mentioned in the text of the pages should be present in the main table. If necessary, tools and resources equivalent to the one mentioned in the text could also be added to the table. +Tools and resources specifically mentioned in the text of the pages should be present in the main table. ## Making changes -Since the csv file is not user-friendly and prone to mistakes because of potential empty fields and use of commas, we do not recommend making changes using the GitHub website itself, instead we point people to the [Google spreadsheet](https://docs.google.com/spreadsheets/d/16RESor_qQ_ygI0lQYHR23kbZJUobOWZUbOwhJbLptDE/edit?usp=sharing). +1. Make sure the tool you want to add is not yet already described in the [yaml file](https://github.com/bedroesb/rdmkit/blob/demo/_data/tool_and_resource_list.yml). If so, go to step 3, if not, go follow the next step. + +1. Click on the pencil icon seen on GitHub of the [main yaml file](https://github.com/bedroesb/rdmkit/blob/demo/_data/tool_and_resource_list.yml) as described in our GitHub Guide. Add your tool or resource at the bottom of the file following the structure described in the [The main yaml file section of this page](#the-main-yaml-file). Make sure the indentation follows the one of the previous listed items. Copy the content of the yaml file and paste in in an online yaml validator in case of doubt. + +1. Copy the `tool_id` of the tool or resource + +1. Add the right context on RDMkit for the tool by mentioning it somewhere in the text using following syntax: + ``` + {% raw %} + {% tool "tool_id" %} + {% endraw %} + ``` + + {% include callout.html type="important" content="Don't forget to add the `\"` double quotes around the tool_id and make sure to use the exact tool_id as described in the yaml file." %} + + Example: + + ``` + {% raw %} + {% tool "zenodo" %} is a powerful data publication service, which is supported by the European commission and focused on research data, including supplemental material like software, tables, figures or slides. + {% endraw %} + ``` + Will give: + + {% tool "zenodo" %} is a powerful data publication service, which is supported by the European commission and focused on research data, including supplemental material like software, tables, figures or slides. -The editors will do the work on Git for you. All you need to do is: + -- Check if a tool or resource is already listed. -- Add or edit tools and resources as described above. -- Done! The editors will update the "tool and resource list" in GitHub regularly. In case your change is urgent, ping an editor in an issue or pull request. -## Let the editor and GitHub bot do the rest -If the PR of the editor containing the changes to the .csv table is merged, a PR will be opened by github-actions. Please check that the changes this PR proposes to the yaml file are in line with what you want to have changed. diff --git a/pages/contribute/working_with_git.md b/pages/contribute/working_with_git.md index 8bfd67e28..541e4b56b 100644 --- a/pages/contribute/working_with_git.md +++ b/pages/contribute/working_with_git.md @@ -54,7 +54,7 @@ Make sure you have cloned the rdmkit repo: cd rdmkit -To run the website locally, you can either use [Docker](https://www.docker.com/) or use Jekyll directly after installing various dependencies. +To run the website locally, you can either use {% tool "docker" %} or use Jekyll directly after installing various dependencies. ### Run using Docker diff --git a/pages/national_resources/TEMPLATE_resources.md b/pages/national_resources/TEMPLATE_resources.md index ca0b87d4e..d9c0d47fa 100644 --- a/pages/national_resources/TEMPLATE_resources.md +++ b/pages/national_resources/TEMPLATE_resources.md @@ -27,14 +27,14 @@ training: # Refer to entries of the "main_tool_ and_resource_table" if institutions, organizations and projects from the country contribute to the development of international tools and resources. ref_to_main_resources: - - + - # List here tools and resources mainly relevant for the specific country national_resources: - name: description: how_to_access: - instance_of: + instance_of: related_pages: tool_assembly: [] your_domain: [] diff --git a/pages/national_resources/be_resources.md b/pages/national_resources/be_resources.md index b92a79169..72bec4c0e 100644 --- a/pages/national_resources/be_resources.md +++ b/pages/national_resources/be_resources.md @@ -17,10 +17,10 @@ training: url: https://www.youtube.com/channel/UC7XUideTn8tFCOC-lhT9-Aw ref_to_main_resources: - - FAIRDOM-SEEK - - Galaxy - - WorkflowHub - - ENA upload tool + - fairdom-seek + - galaxy + - workflowhub + - ena-upload-tool national_resources: - name: RDM Guide @@ -35,7 +35,7 @@ national_resources: - name: Galaxy Belgium description: Galaxy Belgium is a Galaxy instance managed by the Belgian ELIXIR node, funded by the Flemish government, which utilizing infrastructure provided by the Flemish Supercomputer Center (VSC). how_to_access: - instance_of: Galaxy + instance_of: galaxy related_pages: tool_assembly: [] your_domain: [] @@ -45,7 +45,7 @@ national_resources: - name: DMPonline.be description: This instance of DMPonline is provided by the DMPbelgium Consortium. We can help you write and maintain data management plans for your research. how_to_access: Affiliation with one of the universities of the consortium is required. - instance_of: DMPRoadmap + instance_of: dmproadmap related_pages: tool_assembly: [] your_domain: [] diff --git a/pages/national_resources/ch_resources.md b/pages/national_resources/ch_resources.md index 9dd8079c3..3c7a95d69 100644 --- a/pages/national_resources/ch_resources.md +++ b/pages/national_resources/ch_resources.md @@ -118,7 +118,7 @@ national_resources: - name: UNIL DMP Online description: This instance of DMPonline is provided by the Service des ressources informationnelles et archives (UNIRIS) of the University of Lausanne (UNIL) to help its community of researchers to write a Data Management Plan (DMP). how_to_access: - instance_of: DMPonline + instance_of: dmponline related_pages: tool_assembly: [] your_domain: [] diff --git a/pages/national_resources/cy_resources.md b/pages/national_resources/cy_resources.md index 99442d7d5..b5c3ec933 100644 --- a/pages/national_resources/cy_resources.md +++ b/pages/national_resources/cy_resources.md @@ -26,14 +26,14 @@ training: # Refer to entries of the "main_tool_ and_resource_table" if institutions, organizations and projects from the country contribute to the development of international tools and resources. ref_to_main_resources: - - + - # List here tools and resources mainly relevant for the specific country national_resources: - name: description: how_to_access: - instance_of: + instance_of: related_pages: tool_assembly: [] your_domain: [] diff --git a/pages/national_resources/cz_resources.md b/pages/national_resources/cz_resources.md index 6b36b43a1..93fde021c 100644 --- a/pages/national_resources/cz_resources.md +++ b/pages/national_resources/cz_resources.md @@ -24,14 +24,14 @@ training: url: https://zenodo.org/communities/elixir-cz/ ref_to_main_resources: - - Galaxy - - Data Stewardship Wizard + - galaxy + - data-stewardship-wizard national_resources: - name: Galaxy MetaCentrum description: Galaxy MetaCentrum is a Galaxy instance managed by the Czech ELIXIR node and [e-INFRA](https://www.e-infra.cz/en). It provides extra support for [RepeatExplorer](https://repeatexplorer-elixir.cerit-sc.cz/) tool for plant genomic analysis. how_to_access: - instance_of: Galaxy + instance_of: galaxy related_pages: tool_assembly: [] your_domain: [plant] @@ -52,7 +52,7 @@ national_resources: - name: ownCloud@CESNET description: CESNET-hosted ownCloud is a 100 GB cloud storage freely available for Czech scientists to manage their data from any research projects. how_to_access: To use the CESNET-hosted ownCloud, you have to be an employee or a student of a Czech academic organization. For technical reasons, you have to have an account in [eduID.cz](https://eduid.cz). - instance_of: ownCloud + instance_of: owncloud related_pages: tool_assembly: [] your_domain: [] diff --git a/pages/national_resources/de_resources.md b/pages/national_resources/de_resources.md index 71f3b8b23..085243043 100644 --- a/pages/national_resources/de_resources.md +++ b/pages/national_resources/de_resources.md @@ -42,11 +42,11 @@ national_resources: url: https://rdmorganiser.github.io/ ref_to_main_resources: - - BRENDA - - e!DAL-PGP - - FAIRDOM-SEEK - - PANGAEA - - Silva + - brenda + - e-dal-pgp + - fairdom-seek + - pangaea + - silva --- @@ -74,7 +74,7 @@ This page provides useful information and resources with a focus on research dat ## Research Data Initiatives * [NFDI - National Research Data Infrastructure (Nationale Forschungsdaten Infrastruktur)](https://www.nfdi.de/) -* [re3data.org - Registry of Research Data Repositories](https://www.re3data.org/) +* {% tool "re3data" %}/) * [GFBio - German Federation for Biological Data](https://www.gfbio.org/) * [FAIRDOM - Consortium of Services for Research Data Management](https://fair-dom.org/) * [ZB MED - Infrastructure and research centre for information and data in the life sciences](https://www.zbmed.de/) diff --git a/pages/national_resources/ee_resources.md b/pages/national_resources/ee_resources.md index 7cac245b8..7813399ae 100644 --- a/pages/national_resources/ee_resources.md +++ b/pages/national_resources/ee_resources.md @@ -9,7 +9,7 @@ national_resources: - name: Galaxy Estonia description: This is the Estonian instance of Galaxy, which is an open source, web-based platform for data intensive biomedical research. how_to_access: - instance_of: Galaxy + instance_of: galaxy related_pages: tool_assembly: your_domain: @@ -20,7 +20,7 @@ national_resources: - name: REDCap Estonia description: This is the Estonian instance of REDCap, which is a secure web platform for building and managing online databases and surveys. how_to_access: - instance_of: REDCap + instance_of: redcap related_pages: tool_assembly: your_domain: diff --git a/pages/national_resources/fi_resources.md b/pages/national_resources/fi_resources.md index ee662d15f..dee120a84 100644 --- a/pages/national_resources/fi_resources.md +++ b/pages/national_resources/fi_resources.md @@ -14,7 +14,7 @@ national_resources: url: https://chipster.csc.fi/ - name: DMPTuuli description: Data management planning tool (Finland). - instance_of: DMPRoadmap + instance_of: dmproadmap how_to_access: related_pages: tool_assembly: [csc] @@ -88,16 +88,7 @@ national_resources: your_role: [researcher, data_manager] your_tasks: [data_analysis] url: https://research.csc.fi/computing#cloud-computing - - name: IceBear - description: A browser-based Research Data Management tool for protein cyrstallization that offers flexible crystal fishing workbench, no-typing submission for crystal shipment, and linking crystals and datasets including PDB depositions. - instance_of: - how_to_access: - related_pages: - tool_assembly: - your_role: [researcher, data_manager] - your_tasks: [data_analysis] - your_domain: [structural_bioinformatics] - url: https://icebear.fi/ + --- ## Introduction diff --git a/pages/national_resources/fr_resources.md b/pages/national_resources/fr_resources.md index 2b2af0aed..567d7c04d 100644 --- a/pages/national_resources/fr_resources.md +++ b/pages/national_resources/fr_resources.md @@ -5,14 +5,14 @@ contributors: [Olivier Collin] coordinators: [] ref_to_main_resources: - - PHIS - - FAIDARE + - phis + - faidare national_resources: - name: DMP OPIDoR description: Online questionnaire for the development of data management plans - repository of DMPs. how_to_access: - instance_of: DMPRoadmap + instance_of: dmproadmap related_pages: tool_assembly: [ifb] your_role: [researcher, data_manager] diff --git a/pages/national_resources/gr_resources.md b/pages/national_resources/gr_resources.md index 6bbc879f2..bdcdbe6ec 100644 --- a/pages/national_resources/gr_resources.md +++ b/pages/national_resources/gr_resources.md @@ -27,14 +27,14 @@ training: # Refer to entries of the "main_tool_ and_resource_table" if institutions, organizations and projects from the country contribute to the development of international tools and resources. ref_to_main_resources: - - + - # List here tools and resources mainly relevant for the specific country national_resources: - name: description: how_to_access: - instance_of: + instance_of: related_pages: tool_assembly: [] your_domain: [] diff --git a/pages/national_resources/lu_resources.md b/pages/national_resources/lu_resources.md index 7c45df3e2..cf669bcf7 100644 --- a/pages/national_resources/lu_resources.md +++ b/pages/national_resources/lu_resources.md @@ -20,18 +20,18 @@ training: # Refer to entries of the "main_tool_ and_resource_table" if institutions, organizations and projects from the country contribute to the development of international tools and resources. ref_to_main_resources: - - FAIR Cookbook - - COVID-19 Disease Map - - DAISY - - Data Catalog - - DPIA Knowledge Model + - fair-cookbook + - covid-19-disease-map + - daisy + - data-catalog + - dpia-knowledge-model # List here tools and resources mainly relevant for the specific country national_resources: - name: learning.DSW description: A training instance of Data Steward Wizard (DSW), which has the FNR and the DPIA templates. how_to_access: registration - instance_of: Data Stewardship Wizard + instance_of: data-stewardship-wizard related_pages: your_tasks: [dmp] url: https://learning.ds-wizard.org/dashboard @@ -39,7 +39,7 @@ national_resources: - name: DPMRoadmap @ ELIXIR Luxembourg description: This instance of DMPOnline is provided by ELIXIR Luxembourg and has FNR template for Data Management Plan (DMP). how_to_access: registration - instance_of: DMPRoadmap + instance_of: dmproadmap related_pages: your_tasks: [dmp] url: https://dmponline.elixir-luxembourg.org/ diff --git a/pages/national_resources/nl_resources.md b/pages/national_resources/nl_resources.md index 3dfba9817..5411498d1 100644 --- a/pages/national_resources/nl_resources.md +++ b/pages/national_resources/nl_resources.md @@ -113,11 +113,10 @@ national_resources: # Refer to entries of the "main_tool_ and_resource_table" if institutions, organizations and projects from the country contribute to the development of international tools and resources. ref_to_main_resources: - - FAIRsharing - - Molgenis - - XNAT - - Data Stewardship Wizard - - WikiPathways + - fairsharing + - molgenis + - xnat + - data-stewardship-wizard --- diff --git a/pages/national_resources/no_resources.md b/pages/national_resources/no_resources.md index d5ffc83ac..9ec41c07b 100644 --- a/pages/national_resources/no_resources.md +++ b/pages/national_resources/no_resources.md @@ -22,22 +22,22 @@ training: national_resources: - name: Feide - description: Feide is the national solution for secure login and data exchange in education and research. Feide can be linked with [ELIXIR-AAI](https://elixir-europe.org/services/compute/aai) through [eduGAIN](https://edugain.org/). + description: Feide is the national solution for secure login and data exchange in education and research. Feide can be linked with [Life Science Login (LS Login)](https://elixir-europe.org/services/compute/aai) through [eduGAIN](https://edugain.org/). how_to_access: Everyone with an affiliation to a Norwegian academic institution. related_pages: tool_assembly: [tsd, nels, marine_assembly] url: https://www.feide.no/ - name: DS-Wizard ELIXIR-Norway description: DS-Wizard is a tool to aid the creation, organisaton and sharing of data management plans. It provides scientists with guidance, facilitating the understanding of the key components of FAIR-oriented Data Stewardship. The template in this instance provides additional guidance on resources, laws and regulations in Norway. - how_to_access: ELIXIR-AAI with Feide or upon registration - instance_of: Data Stewardship Wizard + how_to_access: Life Science Login (LS Login) with Feide or upon registration + instance_of: data-stewardship-wizard related_pages: tool_assembly: [tsd, nels, marine_assembly] your_tasks: [dmp] url: https://elixir-no.ds-wizard.org/ - name: EasyDMP description: DMP tool from [UNINETT Sigma2 (SIKT)](https://www.sigma2.no/). - instance_of: EasyDMP + instance_of: how_to_access: Feide related_pages: your_tasks: [dmp] @@ -58,8 +58,8 @@ national_resources: url: https://covid19dataportal.no/ - name: Norwegian Federated EGA description: Federated instance collects metadata of -omics data collections stored in national or regional archives and makes them available for search through the main EGA portal. With this solution, sensitive data will not physically leave the country, but will reside on TSD. - how_to_access: ELIXIR-AAI; intended for data from Norwegian institutions - instance_of: The European Genome-phenome Archive (EGA) + how_to_access: Life Science Login (LS Login); intended for data from Norwegian institutions + instance_of: the-european-genome-phenome-archive related_pages: your_domain: [human_data] your_tasks: [sensitive, existing_data, data_publication] @@ -67,7 +67,7 @@ national_resources: url: https://ega.elixir.no/ - name: usegalaxy.no description: Galaxy is an open source, web-based platform for data intensive biomedical research. This instance of Galaxy is coupled with NeLS for easy data transfer. - instance_of: Galaxy + instance_of: galaxy how_to_access: Feide or upon application related_pages: your_tasks: [data_analysis, sensitive, existing_data, data_publication] @@ -95,7 +95,7 @@ national_resources: - name: Norwegian Research and Education Cloud (NREC) description: NREC is an Infrastructure-as-a-Service (IaaS) project between the University of Bergen and the University of Oslo, with additional contributions from NeIC (Nordic e-Infrastructure Collaboration) and Uninett., commonly referred to as a cloud infrastructure An IaaS is a self-service infrastructure where you spawn standardized servers and storage instantly, as needed, from a given resource quota. how_to_access: All users at educational institutions via Feide - instance_of: OpenStack + instance_of: openstack related_pages: your_tasks: [data_analysis, storage] url: https://www.nrec.no/ @@ -138,7 +138,7 @@ national_resources: - name: DataverseNO description: DataverseNO is a national, generic repository for open research data. Various Norwegian research institutions have established a partner agreements about using DataverseNO as institutional repositories for open research data. how_to_access: open access - instance_of: DATAVERSE + instance_of: dataverse related_pages: your_domain: [] your_tasks: [data_publication] @@ -155,8 +155,8 @@ national_resources: tool_assembly: [tsd] url: https://nettskjema.no/ ref_to_main_resources: - - MarDB - - MarFun + - mardb + - marfun --- diff --git a/pages/national_resources/pt_resources.md b/pages/national_resources/pt_resources.md index 0e2e99d27..14403548b 100644 --- a/pages/national_resources/pt_resources.md +++ b/pages/national_resources/pt_resources.md @@ -23,7 +23,7 @@ national_resources: url: http://services.biodata.pt/ - name: BioData.pt Data Management Portal (DMPortal) description: This instance of DataVerse is provided by the BioData.pt. We can help you write and maintain data management plans for your research. - instance_of: DATAVERSE + instance_of: dataverse how_to_access: related_pages: your_role: [researcher, data_manager] @@ -31,15 +31,14 @@ national_resources: url: https://dmportal.biodata.pt/ - name: BioData.pt Data Stewardship Wizard description: Local instance of Data Stewardship Wizard. You can use this tool to create your own Data Management Plans. - instance_of: Data Stewardship Wizard - how_to_access: + instance_of: data-stewardship-wizard related_pages: your_role: [researcher, data_manager] your_tasks: [dmp] url: https://biodata-pt.ds-wizard.org/ - name: Ready for BioData Management description: Capacity building program in data management for the life sciences to empower researchers and institutions in managing their data more effectively and efficiently. - instance_of: Data Stewardship Wizard + instance_of: data-stewardship-wizard how_to_access: related_pages: your_role: [researcher] diff --git a/pages/national_resources/se_resources.md b/pages/national_resources/se_resources.md index 2e3370df3..6aded0b5a 100644 --- a/pages/national_resources/se_resources.md +++ b/pages/national_resources/se_resources.md @@ -17,14 +17,14 @@ national_resources: - name: DS-Wizard ELIXIR-SE description: Data Stewardship Wizard is a tool to be used when planning for data management, including generating a data management plan (DMP). This instance provides guidance with focus towards Swedish life science researchers, including national resources. how_to_access: ELIXIR AAI login - instance_of: Data Stewardship Wizard + instance_of: data-stewardship-wizard related_pages: your_tasks: [dmp] url: https://dsw.scilifelab.se/ - name: SciLifeLab Data Repository (Figshare) description: A repository for publishing any kind of research-related data, e.g. documents, figures, or presentations. how_to_access: Available to everyone with an affiliation to a Swedish academic institution. - instance_of: FigShare + instance_of: figshare related_pages: your_tasks: [existing_data, data_publication] url: https://scilifelab.figshare.com/ @@ -91,7 +91,7 @@ If personal data is processed in your research, contact your institute’s Data The [**SciLifeLab Data Centre**](https://www.scilifelab.se/data) provides services for IT and data management, including Data Stewardship Wizard instance (for writing data management plans), the Swedish COVID-19 data portal, and the SciLifeLab Data Repository. -Data stewards at [**NBIS**](https://nbis.se/) (ELIXIR-SE) provide consultation and support services regarding data management questions, including e.g. guidance when writing data management plans and when doing submissions to domain-specific repositories. For information about this and other resources at NBIS please see the [**Data Management**](https://nbis.se/infrastructure/data-management/) page. An upcoming resource is the [**Swedish Sensitive Data Archive**](https://nbis.se/infrastructure/sensitive-data-archive.html), a secure data archive and sharing platform for sensitive datasets, which will be integrated with the [**Federated EGA network**](https://ega-archive.org/federated). +Data stewards at [**NBIS**](https://nbis.se/) (ELIXIR-SE) provide consultation and support services regarding data management questions, including e.g. guidance when writing data management plans and when doing submissions to domain-specific repositories. For information about this and other resources at NBIS please see the [**Data Management**](https://nbis.se/infrastructure/data-management/) page. An upcoming resource is the [**Swedish Sensitive Data Archive**](https://nbis.se/infrastructure/sensitive-data-archive.html), a secure data archive and sharing platform for sensitive datasets, which will be integrated with the {% tool "the-european-genome-phenome-archive" %}. The [**Swedish National Infrastructure for Computing**](https://snic.se/) (SNIC) is a national research infrastructure that provides resources and user support for large scale computation and data storage to meet the needs of researchers from all scientific disciplines and from all over Sweden. Of particular use for life science researchers is the [**SNIC-SENS**](https://www.uppmax.uu.se/projects-and-collaborations/snic-sens/) project which provides high-performance computing resources for analyzing sensitive data. diff --git a/pages/national_resources/si_resources.md b/pages/national_resources/si_resources.md index a36f68ef8..c16d09ba2 100644 --- a/pages/national_resources/si_resources.md +++ b/pages/national_resources/si_resources.md @@ -27,14 +27,14 @@ training: # Refer to entries of the "main_tool_ and_resource_table" if institutions, organizations and projects from the country contribute to the development of international tools and resources. ref_to_main_resources: - - + - # List here tools and resources mainly relevant for the specific country national_resources: - name: description: how_to_access: - instance_of: + instance_of: related_pages: tool_assembly: [] your_domain: [] diff --git a/pages/national_resources/uk_resources.md b/pages/national_resources/uk_resources.md index 25429305c..a372618ac 100644 --- a/pages/national_resources/uk_resources.md +++ b/pages/national_resources/uk_resources.md @@ -9,7 +9,7 @@ national_resources: - name: DMPonline description: "DMPonline is a web-based tool that supports researchers to develop data management and sharing plans. It contains the latest funder templates and best practice guidelines to support users to create good quality DMPs." how_to_access: - instance_of: DMPRoadmap + instance_of: dmproadmap related_pages: your_role: [data_manager, researcher] your_tasks: [dmp] @@ -52,9 +52,9 @@ national_resources: url: http://intermine.org/ ref_to_main_resources: - - WorkflowHub - - FAIRDOM-SEEK - - COPO + - workflowhub + - fairdom-seek + - copo --- diff --git a/pages/tool_assembly.md b/pages/tool_assembly.md index 31dd34653..f91585115 100644 --- a/pages/tool_assembly.md +++ b/pages/tool_assembly.md @@ -5,4 +5,4 @@ search_exclude: true Tool Assemblies are examples of combining tools to cover data management tasks across several stages of the data life cycle. These can be tools that one or several communities combine to support RDM that can be picked up or accessed and used by others. The assemblies are aimed for users in a specific location and/or for users within a specific domain. -{% include section-navigation-tiles.html type="tool_assembly" affiliations="true" search=true %} +{% include section-navigation-tiles.html type="tool_assembly" affiliations="true" search=true except="tool_assembly.md" %} diff --git a/pages/tool_assembly/csc_assembly.md b/pages/tool_assembly/csc_assembly.md index 576b2e37a..14be6caca 100644 --- a/pages/tool_assembly/csc_assembly.md +++ b/pages/tool_assembly/csc_assembly.md @@ -56,13 +56,13 @@ When you start [collecting](collecting) data and need a storing environment wher ### Data processing and analysis For [processing](processing), [analysing](analysing) and [storing data](storage) during the research project, CSC offers several [computing platforms](https://research.csc.fi/computing). These include both environments for non-sensitive and [sensitive data](sensitive_data). Depending on your needs, you can choose from a wide variety of computing resources: use [Chipster](https://chipster.csc.fi/) software for high-throughput data such as RNA-seq and single cell RNA-seq, build your own custom virtual machine, or utilise the full power of our world-class supercomputers. -Supercomputers Puhti and Mahti can be used for larger scale analysis and simulations. They will soon be accompanied with the world-class supercomputer LUMI. Pouta and Rahti cloud computing services offer more flexibility, allowing the user to manage the infrastructure. CSC's computers have a wide range of [preinstalled scientific software and databases](https://research.csc.fi/bioscience-programs) with usage instructions. +Supercomputers Puhti and Mahti can be used for larger scale analysis and simulations. They will soon be accompanied with the world-class supercomputer {% tool "lumi" %}. Pouta and Rahti cloud computing services offer more flexibility, allowing the user to manage the infrastructure. CSC's computers have a wide range of [preinstalled scientific software and databases](https://research.csc.fi/bioscience-programs) with usage instructions. This summer, CSC will be releasing beta versions of new services for sensitive data management: Sensitive Data Desktop (SD Desktop) and Sensitive Data Connect (SD Connect). Sensitive Data Submit (SD Submit) will be available later this year. The new Sensitive Data Services are designed to facilitate collaborative research across Finland and between Finnish academics and their collaborators. [SD Desktop](https://research.csc.fi/-/sd-desktop) is a service that allows a user and their authorized colleagues to access a private computing environment workspace via a web browser and analyze the data within a secure cloud. [SD Connect](https://research.csc.fi/-/sd-connect) allows you to collect, organize and share your encrypted sensitive data in a secure manner via web browser. ### Data sharing and publishing -It is recommended to [publish](data_publication) data in data specific repositories. You can find many options from [ELIXIR Deposition Databases for Biomolecular data web page](https://elixir-europe.org/platforms/data/elixir-deposition-databases). Furthermore, CSC and ELIXIR-FI will offer Federated EGA for sensitive human biomedical data that is linked to the central European Genome-phenome Archive and the SD Submit at the end of 2021. +It is recommended to [publish](data_publication) data in data specific repositories. You can find many options from {% tool "elixir-deposition-databases-for-biomolecular-data" %}. Furthermore, CSC and ELIXIR-FI will offer Federated EGA for sensitive human biomedical data that is linked to the central European Genome-phenome Archive and the SD Submit at the end of 2021. SD Submit allows you to publish sensitive data securely in a national repository. The service will give you the tools to describe your dataset (adding the appropriate metadata) and assign a permanent identifier (DOI). After publication, you will remain the data controller and decide according to specific policies, who can access the sensitive data for reuse. According to the GDPR, your data will remain within the Finnish borders and, at the same time, they will be accessible and discoverable according to FAIR data principles. diff --git a/pages/tool_assembly/galaxy_assembly.md b/pages/tool_assembly/galaxy_assembly.md index 4793d48a5..58e8eb67b 100644 --- a/pages/tool_assembly/galaxy_assembly.md +++ b/pages/tool_assembly/galaxy_assembly.md @@ -40,8 +40,12 @@ Galaxy also provides [open infrastructure ready to use for researchers worldwide Galaxy can be used at different stages of the data life cycle, covering from the data collection to the reuse steps. - -
+ +
@@ -51,15 +55,16 @@ Galaxy can be used at different stages of the data life cycle, covering from the

Access to databases

Customised data access

@@ -94,7 +99,7 @@ Galaxy can be used at different stages of the data life cycle, covering from the

Import workflows

@@ -150,7 +155,7 @@ Galaxy can be used at different stages of the data life cycle, covering from the

Export to remote sources

    @@ -211,7 +216,7 @@ Galaxy can be used at different stages of the data life cycle, covering from the

    Import artefacts

    • Histories (own, shared by others)
    • -
    • Workflows from the WorkflowHub
    • +
    • Workflows from the {% tool "workflowhub" %}
diff --git a/pages/tool_assembly/ifb_assembly.md b/pages/tool_assembly/ifb_assembly.md index 7d07bbc97..17baac57a 100644 --- a/pages/tool_assembly/ifb_assembly.md +++ b/pages/tool_assembly/ifb_assembly.md @@ -33,7 +33,7 @@ IFB data management tool assembly supports data management activities of scienti IFB and the underlying infrastructure are accessible to researchers in France and their foreign collaborators. Researchers that would like to know more about IFB services can find specific contact details at the unified [IFB help desk page](https://www.france-bioinformatique.fr/en/help-desk/) and get support through the dedicated help pages. Depending on the resources, fees may apply. It is therefore advisable to contact them during the planning phase of the project. -The way you can access the IFB depends on the type of resources (for instance, cluster or cloud), and there will be different authentication procedures (local, national or international). For example, the Biosphere cloud federation uses the EduGAIN federation for authentication, while useGalaxy.fr uses the [ELIXIR AAI](https://elixir-europe.org/services/compute/aai) authentication. To have additional information on how to access the IFB contact the [help desk](https://www.france-bioinformatique.fr/en/help-desk/). +The way you can access the IFB depends on the type of resources (for instance, cluster or cloud), and there will be different authentication procedures (local, national or international). For example, the Biosphere cloud federation uses the EduGAIN federation for authentication, while useGalaxy.fr uses the {% tool "life-science-login" %} authentication. To have additional information on how to access the IFB contact the [help desk](https://www.france-bioinformatique.fr/en/help-desk/). ## For what can you use the IFB data management tool assembly? @@ -76,12 +76,10 @@ IFB infrastructure can also help you with bioinformatics analysis of your data. ### Data sharing and publishing -It is good practice to [publish](data_publication) your data on repositories. IFB encourages researchers to browse the list of [ELIXIR deposition databases for biomolecular data](https://elixir-europe.org/platforms/data/elixir-deposition-databases) to find the appropriate repository. +It is good practice to [publish](data_publication) your data on repositories. IFB encourages researchers to browse the list of {% tool "elixir-deposition-databases-for-biomolecular-data" %} to find the appropriate repository. The french scientific community benefit from [Recherche.Data.Gouv](https://recherche.data.gouv.fr/en) a national Dataverse repository. This repository is associated with [thematic reference centres](https://recherche.data.gouv.fr/en/page/thematic-reference-centers-providing-expertise-for-individual-scientific-fields) and data management clusters. IFB is the reference centre for Life Science. -If you are a member of INRAE (one of the stakeholders of IFB infrastructure), you can access the institutional instance of the Dataverse platform [Data INRAE](https://data.inrae.fr). Data INRAE can be used by researchers to store and describe datasets during the project, and to share them according to specific sharing settings. - You can also browse [cat-OPIDoR](https://cat.opidor.fr/index.php/Cat_OPIDoR,_wiki_des_services_dédiés_aux_données_de_la_recherche) for an overview of the different services related to data management provided by IFB infrastructure and its stakeholders in France. ### Compliance monitoring & measurement diff --git a/pages/tool_assembly/marine_metagenomics_assembly.md b/pages/tool_assembly/marine_metagenomics_assembly.md index 0408abf58..d6a99ad93 100644 --- a/pages/tool_assembly/marine_metagenomics_assembly.md +++ b/pages/tool_assembly/marine_metagenomics_assembly.md @@ -40,14 +40,14 @@ If you use one of the National Norwegian research infrastructures, such as the N The solutions for data storage, sharing and computation are built on the services and infrastructure delivered by ELIXIR Norway described in the Norwegian e-Infrastructure for Life Sciences (NeLS) [tool assembly](nels_assembly). ### Data processing and analysis -The Marine Metagenomics Portal provides a complete service for analysis of marine metagenomic data through the tool [META-pipe](https://mmp2.sfb.uit.no/metapipe/). META-pipe is a pipeline that can assemble your high-throughput sequence data, functionally annotate the predicted genes, and taxonomically profile your marine metagenomics samples, helping you to gain insight into the phylogenetic diversity, metabolic and functional potential of environmental communities. You can read more [details about META-pipe in the publication](https://www.ncbi.nlm.nih.gov/labs/pmc/articles/PMC6480938/). Norwegian users with Feide access can access the online version of META-pipe. For other users META-pipe is [downloadable](https://gitlab.com/uit-sfb/metapipe) and can easily be run on any computing environment (e.g. any Linux workstation, SLURM cluster or Kubernetes). +The {% tool "marine-metagenomics-portal" %} provides a complete service for analysis of marine metagenomic data through the tool [META-pipe](https://mmp2.sfb.uit.no/metapipe/). META-pipe is a pipeline that can assemble your high-throughput sequence data, functionally annotate the predicted genes, and taxonomically profile your marine metagenomics samples, helping you to gain insight into the phylogenetic diversity, metabolic and functional potential of environmental communities. You can read more [details about META-pipe in the publication](https://www.ncbi.nlm.nih.gov/labs/pmc/articles/PMC6480938/). Norwegian users with Feide access can access the online version of META-pipe. For other users META-pipe is [downloadable](https://gitlab.com/uit-sfb/metapipe) and can easily be run on any computing environment (e.g. any Linux workstation, SLURM cluster or Kubernetes). -[Usegalaxy.no](https://usegalaxy.no/) is a Norwegian instance of the [Galaxy](https://wiki.galaxyproject.org/) web-based platform for data intensive life science research that provides users with a unified, easy-to-use graphical interface to a host of more than 200 different analysis tools. Here, you can find tools for a wide variety of analysis for your marine metagenomic and genomic data. The tools are publicly available in the [Galaxy Toolshed](https://toolshed.g2.bx.psu.edu/) which serves as an "appstore" so you can easily transfer them to your favourite Galaxy instance anywhere. You can run the tools interactively, one by one, or combine them into multi-step workflows that can be executed as a single analysis. Premade workflows (i.e for Taxonomic classification of metagenomic sequences) are provided, and you can request installation of your favourite tool by contacting the [ELIXIR Norway help desk](mailto:contact@bioinfo.no?subject=marine metagenomics). +[Usegalaxy.no](https://usegalaxy.no/) is a Norwegian instance of the {% tool "galaxy" %} web-based platform for data intensive life science research that provides users with a unified, easy-to-use graphical interface to a host of more than 200 different analysis tools. Here, you can find tools for a wide variety of analysis for your marine metagenomic and genomic data. The tools are publicly available in the [Galaxy Toolshed](https://toolshed.g2.bx.psu.edu/) which serves as an "appstore" so you can easily transfer them to your favourite Galaxy instance anywhere. You can run the tools interactively, one by one, or combine them into multi-step workflows that can be executed as a single analysis. Premade workflows (i.e for Taxonomic classification of metagenomic sequences) are provided, and you can request installation of your favourite tool by contacting the [ELIXIR Norway help desk](mailto:contact@bioinfo.no?subject=marine metagenomics). ### Data sharing and publishing -ELIXIR Norway acts as a [broker for Norwegian end-users](https://elixir.no/news/52/63/ELIXIR-Norway-broker-data-to-ENA) that wish to submit data to [ELIXIR Deposition Databases](https://elixir-europe.org/platforms/data/elixir-deposition-databases#:~:text=ELIXIR%20Deposition%20Database%20list%20%20%20%20Deposition,%20%20%20%208%20more%20rows%20) (such as ENA), providing support in submitting the data on behalf of the data owners directly from the National e-infrastructure for Life Science (NeLS). +ELIXIR Norway acts as a [broker for Norwegian end-users](https://elixir.no/news/52/63/ELIXIR-Norway-broker-data-to-ENA) that wish to submit data to {% tool "elixir-deposition-databases-for-biomolecular-data" %} (such as ENA), providing support in submitting the data on behalf of the data owners directly from the National e-infrastructure for Life Science (NeLS). If you need help with publishing or are interested in using the brokering service, please contact the [ELIXIR Norway help desk](mailto:contact@bioinfo.no?subject=marine%20metagenomics). ### Data reuse -The [Marine Metagenomics Portal (MMP)](https://mmp2.sfb.uit.no/) provides you with high-quality curated and freely accessible microbial genomics and metagenomics resources. Through MMP you can access the The [Marine reference databases (MarRef)](https://mmp2.sfb.uit.no/marref/), [Marine Genome Database (MarDb)](https://mmp2.sfb.uit.no/mardb/), [(MarFun; database for marine fungi genomes)](https://mmp2.sfb.uit.no/marfun/), and [(SalDB; salmon specific database of genome sequenced prokaryotes)](https://mmp2.sfb.uit.no/saldb/) databases. They are built by aggregating data from a number of publicly available sequences, taxonomy and literature databases in a semi-automatic fashion. Other databases or resources such as bacterial diversity and culture collections databases, web mapping service and ontology databases are used extensively for curation of metadata. At present the MarRef contains nearly 1000 complete microbial genomes, and MarDB hosts more than 13,000 non-complete genomes. The MAR database entries are cross-referenced with ENA and the [World Register of Marine Species (WoRMS)](http://marinespecies.org/) - you can read the [publication about the Mar databases](https://pubmed.ncbi.nlm.nih.gov/29106641/). +The {% tool "marine-metagenomics-portal" %} provides you with high-quality curated and freely accessible microbial genomics and metagenomics resources. Through MMP you can access the The Marine reference databases ({% tool "marref" %}), Marine Genome Database({% tool "mardb" %}), database for marine fungi genomes ({% tool "marfun" %}), and salmon specific database of genome sequenced prokaryotes({% tool "saldb" %}) databases. They are built by aggregating data from a number of publicly available sequences, taxonomy and literature databases in a semi-automatic fashion. Other databases or resources such as bacterial diversity and culture collections databases, web mapping service and ontology databases are used extensively for curation of metadata. At present the {% tool "marref" %} contains nearly 1000 complete microbial genomes, and {% tool "mardb" %} hosts more than 13,000 non-complete genomes. The MAR database entries are cross-referenced with ENA and the [World Register of Marine Species](https://www.marinespecies.org) - you can read the [publication about the Mar databases](https://pubmed.ncbi.nlm.nih.gov/29106641/). diff --git a/pages/tool_assembly/molgenis_assembly.md b/pages/tool_assembly/molgenis_assembly.md index 9540f6ab1..834063978 100644 --- a/pages/tool_assembly/molgenis_assembly.md +++ b/pages/tool_assembly/molgenis_assembly.md @@ -27,7 +27,7 @@ training: ## What is the Molgenis tool assembly? -[MOLGENIS](https://www.molgenis.org/) is a modular web application for scientific data. MOLGENIS was born from molecular genetics research (and was called 'molecular genetics information system') but has become relevant to many other scientific areas such as biobanking, rare disease research, patient registries and even energy research. MOLGENIS provides user-friendly and scalable software infrastructures to capture, exchange, and exploit the large amounts of data that is being produced by scientific organizations all around the world. To get an idea of what the software can do, visit our MOLGENIS YouTube channel or our demo page via the [related pages](#related-pages). +{% tool "molgenis" %} is a modular web application for scientific data. MOLGENIS was born from molecular genetics research (and was called 'molecular genetics information system') but has become relevant to many other scientific areas such as biobanking, rare disease research, patient registries and even energy research. MOLGENIS provides user-friendly and scalable software infrastructures to capture, exchange, and exploit the large amounts of data that is being produced by scientific organizations all around the world. To get an idea of what the software can do, visit our MOLGENIS YouTube channel or our demo page via the [related pages](#related-pages). MOLGENIS is an [ELIXIR Recommended Interoperability Resource](https://elixir-europe.org/platforms/interoperability/rirs#ELIXIR%20Recommended%20Interoperability%20Resources%20list). One of the key features is that it has a completely customisable data system, allowing you to model your data according to your needs. This creates flexibility that other, more static, database applications often lack. It is web-based, meaning you setup a server, install and configure MOLGENIS, load your data and share it. If your data is ready, setting up a useful online research database application can be done in few hours. Another key feature is that MOLGENIS is modular, having all kinds of extension modules to store and interact with your data. A good example are interfaces to create R and Python scripts that interact with your data. This enables you to add your own statistical modules to run statistical analysis, or create plots based on your data within the online environment. diff --git a/pages/tool_assembly/nels_assembly.md b/pages/tool_assembly/nels_assembly.md index 8e6747de4..752a8f9b1 100644 --- a/pages/tool_assembly/nels_assembly.md +++ b/pages/tool_assembly/nels_assembly.md @@ -27,8 +27,8 @@ NeLS and the underlying infrastructure are accessible for researchers in Norway {% include image.html file="NeLS_toolkit.svg" caption="Figure 1. The Norwegian e-Infrastructure for Life Sciences (NeLS) Data Management tool assembly." alt="NeLS RDMkit" %} -You can access all tools in NeLS using the the national solution for secure login and data sharing in the educational and research sector [FEIDE](https://www.feide.no/), when coupled with [ELIXIR AAI](https://elixir-europe.org/services/compute/aai). -The NeLS Data Management tool assembly provides support with [Data Management Planning](planning) through an [instance of the Data Steward Wizard](https://elixir-no.ds-wizard.org) following the guidelines of the major national and European funding bodys. Dedicated references guide you through national infrastructure, resources, laws and regulations and also include the [Tryggve ELSI Checklist](https://neic.no/tryggve/links/) for Ethical, Legal and Social Implications. Soon you will be able to submit storage request forms for [Data Storage](storage) in NeLS with defined access permissions through the Data Stewardship Wizard. +You can access all tools in NeLS using the the national solution for secure login and data sharing in the educational and research sector [FEIDE](https://www.feide.no/), when coupled with {% tool "life-science-login" %}. +The NeLS Data Management tool assembly provides support with [Data Management Planning](planning) through an [instance of the Data Steward Wizard](https://elixir-no.ds-wizard.org) following the guidelines of the major national and European funding bodys. Dedicated references guide you through national infrastructure, resources, laws and regulations and also include the {% tool "tryggve-elsi-checklist" %} for Ethical, Legal and Social Implications. Soon you will be able to submit storage request forms for [Data Storage](storage) in NeLS with defined access permissions through the Data Stewardship Wizard. [Data Storage](storage) is the core functionality of NeLS and builds upon a 3 layer tiered system: the first layer is intended for short-term storage when computing, processing and analysing data; the second layer of medium capacity (NeLS) is intended for sharing and storing active research data, while the third layer (StoreBioinfo) of high capacity is intended for longer storage until end of a project. Data in the second (NeLS) layer is protected against hardware failure on disk or server level and snapshots of the data are kept for 4 weeks. The third layer is implemented on top of the national research data storage solutions operated by Sigma2 Uninett A/S and is protected against data loss by snapshots and geo-replication. @@ -37,6 +37,6 @@ National Norwegian research infrastructures, such as the Norwegian sequencing in For [Processing](processing) and [Analysing](analysing) your data, the NeLS Data Management tool assembly provides access to a national [instance of Galaxy](https://usegalaxy.no) with ~2000 tools. Data stored in NeLS is directly available within this Galaxy instance, hence you do not need to keep local copies of your data. -In order to help you keeping track of metadata, NeLS is integrated with the [SEEK](https://seek4science.org/) web-based cataloguing and sharing platform. You can use any instance of SEEK such as the public [FAIRDOMHub](https://fairdomhub.org/) to [manage metadata](metadata_management) associated with your data stored in NeLS and access the data through SEEK. SEEK uses the ISA (Investigation, Study, Assay) structure to organise your data and recommended minimal information such as sample characteristics, technologies, measurements and relationships between samples, data and models. Public SEEK instances like the [FAIRDOMHub](https://fairdomhub.org/) can also be used to collaborate on data and to [share](sharing) them publicly. If you are doing modelling, you can also use the inbuilt [JWS Online](https://jjj.mib.ac.uk/) simulator for your SBML models. +In order to help you keeping track of metadata, NeLS is integrated with the {% tool "fairdom-seek" %} web-based cataloguing and sharing platform. You can use any instance of FAIRDOM-SEEK such as the public {% tool "fairdomhub" %} to [manage metadata](metadata_management) associated with your data stored in NeLS and access the data through FAIRDOM-SEEK. FAIRDOM-SEEK uses the ISA (Investigation, Study, Assay) structure to organise your data and recommended minimal information such as sample characteristics, technologies, measurements and relationships between samples, data and models. Public FAIRDOM-SEEK instances like the {% tool "fairdomhub" %} can also be used to collaborate on data and to [share](sharing) them publicly. If you are doing modelling, you can also use the inbuilt {% tool "jws-online" %} simulator for your SBML models. -One recommended way to share your data is to deposit them in the [ELIXIR Deposition Databases for Biomolecular Data](https://elixir-europe.org/platforms/data/elixir-deposition-databases). The NeLS Data Management tool assembly will soon offer tools to help you with the deposition step for data stored in NeLS. +One recommended way to share your data is to deposit them in the {% tool "elixir-deposition-databases-for-biomolecular-data" %}. The NeLS Data Management tool assembly will soon offer tools to help you with the deposition step for data stored in NeLS. diff --git a/pages/tool_assembly/omero_assembly.md b/pages/tool_assembly/omero_assembly.md index a12a76c13..9a887de11 100644 --- a/pages/tool_assembly/omero_assembly.md +++ b/pages/tool_assembly/omero_assembly.md @@ -25,11 +25,14 @@ training: ## What is OMERO? -[OMERO](https://www.openmicroscopy.org/omero/) is a software platform for managing, sharing and analysing images data. OMERO supports over proprietary 150 file formats, including all major microscopy formats, medical images, digital pathology images, high content screening, etc., using [Bio-Formats](https://www.openmicroscopy.org/bio-formats/). Bio-Formats is a Java software tool for reading proprietary image data and metadata and writing image data using standardized open formats. +{% tool "omero" %} is a software platform for managing, sharing and analysing images data. OMERO supports over proprietary 150 file formats, including all major microscopy formats, medical images, digital pathology images, high content screening, etc., using {% tool "bioformats" %}. Bio-Formats is a Java software tool for reading proprietary image data and metadata and writing image data using standardized open formats. -OMERO handles all your images in a secure central repository. Users can view, [organize](data_organisation), [analyze](analysing) and [share](sharing) data from anywhere via the internet. Users can work with image data and metadata from a Desktop application, from the Web or from 3rd party tools e.g. [Fiji](https://fiji.sc/). +OMERO handles all your images in a secure central repository. Users can view, [organize](data_organisation), [analyze](analysing) and [share](sharing) data from anywhere via the internet. Users can work with image data and metadata from a Desktop application, from the Web or from 3rd party tools e.g. {% tool "fiji" %}. -OMERO stores image metadata in a relational database and offers a more flexible structure based on HDF5 to store for example, analytical results. This allows analytical results generated by 3rd party softwares e.g. [CellProfiler](https://cellprofiler.org/), [ilastik](https://www.ilastik.org/), etc., to be stored alongside the images. +OMERO stores image metadata in a relational database and offers a more flexible structure based on HDF5 to store for example, analytical results. This allows analytical results generated by 3rd party softwares e.g. {% tool "cellprofiler" %}, {% tool "ilastik" %}, etc., to be stored alongside the images. + +Recommendations and software tools are being developed to capture acquisition metadata importable into OMERO e.g. +{% tool "4dn-bina-ome-quarep" %}. {% include image.html file="ome_informatics.png" caption="Schematic overview of the OMERO tool assembly." alt="Schematic overview of the OMERO tool assembly." %} @@ -52,5 +55,5 @@ A demo server maintained by the OME team is also available for users wishing to OMERO can be used for the day-to-day data management of data. - Users can remotely view, [handle metadata](metadata_management), [analyze](data_analysis), generate figures ready for publication, etc. - The plaform can also be used to publish data either using public repository like Image Data Repository (IDR) or by enabling the public user within the OMERO installation in a given institution e.g. [Liverpool CCI gallery](https://cci02.liv.ac.uk/gallery/). -- It is the software platform for several public image repositories e.g. [Image Data Repository](http://idr.openmicroscopy.org/), [EMPIAR](https://www.ebi.ac.uk/empiar/). +- It is the software platform for several public image repositories e.g. {% tool "image-data-resource" %},{% tool "empiar" %} - It is also used as a teaching platform by several institutions e.g. University of Dundee, Harvard Medical school. diff --git a/pages/tool_assembly/plant_genomics_assembly.md b/pages/tool_assembly/plant_genomics_assembly.md index 621230a66..480732952 100644 --- a/pages/tool_assembly/plant_genomics_assembly.md +++ b/pages/tool_assembly/plant_genomics_assembly.md @@ -26,9 +26,9 @@ All the components of this tool assembly are publicly available, but most requir {% include image.html file="plant_genomics.svg" caption="Figure 1. The plant genomics tool assembly." alt="Tools and resources used in managing plant genomics and genotyping data." %} ### Metadata collection and tracking -Accurate [documentation](metadata_management.html) of the plant biological materials and samples is critical for interoperability, and should comply with the [MIAPPE](https://www.miappe.org/) standard. -This information should be submitted to [BioSamples](https://www.ebi.ac.uk/biosamples/), with MIAPPE compliance validated using BioSamples' [plant-miappe.json](https://github.com/EBIBioSamples/biosamples-v4/blob/biohackathon_miappe_checklist/webapps/core/src/main/resources/schemas/certification/plant-miappe.json) template available on the [sample validation](https://www.ebi.ac.uk/biosamples/docs/guides/validation) page. -Submission of sample descriptions to BioSamples can be done as early as the data collection stage, but at the latest, must acompany submission of the genomic data to the [European Nucleotide Archive](https://www.ebi.ac.uk/ena/browser/home) (ENA) or of genotyping data to the [European Variation Archive](https://www.ebi.ac.uk/eva/) (EVA). The complete timeline for submitting plant biological material to BioSamples and resulting genotyping experiment results to ENA and EVA should look like this: +Accurate [documentation](metadata_management) of the plant biological materials and samples is critical for interoperability, and should comply with the {% tool "miappe" %} standard. +This information should be submitted to {% tool "biosamples" %}, with MIAPPE compliance validated using BioSamples' [plant-miappe.json](https://github.com/EBIBioSamples/biosamples-v4/blob/biohackathon_miappe_checklist/webapps/core/src/main/resources/schemas/certification/plant-miappe.json) template available on the [sample validation](https://www.ebi.ac.uk/biosamples/docs/guides/validation) page. +Submission of sample descriptions to BioSamples can be done as early as the data collection stage, but at the latest, must acompany submission of the genomic data to the {% tool "european-nucleotide-archive" %} (ENA) or of genotyping data to the {% tool "european-variation-archive" %} (EVA). The complete timeline for submitting plant biological material to BioSamples and resulting genotyping experiment results to ENA and EVA should look like this: 1. Register plant biological material information to BioSamples 2. Submit Sequencing reads to ENA (using BioSamples IDs to identify material) 3. Check if used reference genome assembly is INSDC available (GCF / GCA accesion number available) @@ -38,12 +38,12 @@ Submission of sample descriptions to BioSamples can be done as early as the data {% include callout.html type="note" content="Metadata associated with a single sample registered with BioSamples can only be updated from the original account." %} -[e!DAL-PGP](https://edal-pgp.ipk-gatersleben.de/), FAIRDOM-SEEK instances such as [FAIRDOMHub](https://fairdomhub.org/) or [Recherche Data Gouv](https://recherche.data.gouv.fr/) can be used to manage and share experimental metadata, as well as data. +{% tool "e-dal-pgp" %}, FAIRDOM-SEEK instances such as {% tool "fairdomhub" %} or [Recherche Data Gouv](https://recherche.data.gouv.fr/) can be used to manage and share experimental metadata, as well as data. ### Data processing and analysis -Reference genomes for genome assembly and annotation should be obtained from [ENSEMBL Plants](https://plants.ensembl.org/index.html) or [PLAZA](https://bioinformatics.psb.ugent.be/plaza/), if available. +Reference genomes for genome assembly and annotation should be obtained from {% tool "ensembl-plants" %} or {% tool "plaza" %}, if available. Genetic variant data must be produced in the VCF format, and validated using the EVA vcf-validator (https://github.com/EBIvariation/vcf-validator). Please note to only use identifiers of sequences that match the reference genome assembly identifiers. In order to ensure interoperability of VCF files, the VCF meta-information lines should be used: see the [Plant sciences page](plant_sciences#plant-genotyping-data-sharing-and-deposition) for more details. ### Data sharing and publishing -All sequencing data collected in plant genotyping experiments should be submitted to ENA together with metadata compliant to the [GSC MIxS plant associated checklist](https://www.ebi.ac.uk/ena/browser/view/ERC000020). Final results of such studies in the form of VCF files should be submitted to EVA. Additionally, supplemental data complementing these two data types is encouraged to be submitted to [e!DAL-PGP](https://edal-pgp.ipk-gatersleben.de/) or [Recherche Data Gouv](https://recherche.data.gouv.fr/). +All sequencing data collected in plant genotyping experiments should be submitted to ENA together with metadata compliant to the [GSC MIxS plant associated checklist](https://www.ebi.ac.uk/ena/browser/view/ERC000020). Final results of such studies in the form of VCF files should be submitted to EVA. Additionally, supplemental data complementing these two data types is encouraged to be submitted to {% tool "e-dal-pgp" %} or [Recherche Data Gouv](https://recherche.data.gouv.fr/). diff --git a/pages/tool_assembly/plant_phenomics_assembly.md b/pages/tool_assembly/plant_phenomics_assembly.md index ecb9e77d3..449615ca4 100644 --- a/pages/tool_assembly/plant_phenomics_assembly.md +++ b/pages/tool_assembly/plant_phenomics_assembly.md @@ -25,11 +25,11 @@ training: ## What is the plant phenomics tool assembly and who can use it? -The plant phenomics tool assembly covers the whole [life cycle](data_life_cycle) of experimental plant phenotyping data. It uses the concepts of the [MIAPPE](https://www.miappe.org/) (Minimum Information About a Plant Phenotyping Experiment) standard: (i) experiments description including organisation, objectives and location, (ii) biological material description and identification and (iii) traits (phenotypic and environmental) description including measurement methodology. A more [detailed overview](https://www.miappe.org/overview/) of the MIAPPE standard is available, as well as the full [specifications](https://www.miappe.org/support/#miappe-spec). +The plant phenomics tool assembly covers the whole [life cycle](data_life_cycle) of experimental plant phenotyping data. It uses the concepts of the {% tool "miappe" %} (Minimum Information About a Plant Phenotyping Experiment) standard: (i) experiments description including organisation, objectives and location, (ii) biological material description and identification and (iii) traits (phenotypic and environmental) description including measurement methodology. A more [detailed overview](https://www.miappe.org/overview/) of the MIAPPE standard is available, as well as the full [specifications](https://www.miappe.org/support/#miappe-spec). The plant phenomics tool assembly helps [everyone](your_role) in charge of plant phenotyping data management to enable: * the integration of phenotyping data with other omics data: see the general principles on the [Plant Sciences domain page](plant_sciences); -* the findability of their data in plant specific (e.g. [FAIDARE](https://urgi.versailles.inrae.fr/faidare/)) or generic search portal (e.g. Google Data Search); +* the findability of their data in plant specific (e.g. {% tool "faidare" %}) or generic search portal (e.g. Google Data Search); * the long term reusability of their data. ## How can you access the plant phenomics tool assembly? @@ -41,10 +41,10 @@ All the components of the plant phenomics tool assembly are publicly available a ### Data management planning The general principles to be considered are described in the [Plant Science domain page](plant_sciences) and in particular in its section dedicated to [plant phenotyping data](plant_sciences#phenotyping-metadata-collection-and-publication). In a nutshell: -* the phenotyping data must be described following the [MIAPPE](https://www.miappe.org/) data standard; +* the phenotyping data must be described following the {% tool "miappe" %} data standard; * special attention should be given to the identification and description of the [biological material](plant_sciences#plant-biological-materials-metadata-collection-and-sharing) and the [observation variables](plant_sciences#phenotyping-metadata-collection-and-publication). -The general principles for data management planning and available tools are described in the RDMkit [data management plan page](data_management_plan). The knowledge model of the data management planning application [Data Stewardship Wizard (DSW)](https://ds-wizard.org/) was reviewed for compliance with the needs of the Plant Sciences community. +The general principles for data management planning and available tools are described in the RDMkit [data management plan page](data_management_plan). The knowledge model of the data management planning application {% tool "data-stewardship-wizard" %} was reviewed for compliance with the needs of the Plant Sciences community. ### File based data collection @@ -54,13 +54,13 @@ The metadata and description of your experiments should be filled using a [MIAPP #### Systems for file based data collection -* [FAIRDOM-SEEK](https://seek4science.org/) is an open source web-based data sharing platform used as a repository or a catalog. It is being deployed as several instances ranging from confidential project data sharing platforms ([INRAE/AGENT](https://urgi.versailles.inrae.fr/fairdom), VIB) to public repositories like [FAIRDOMHub](https://fairdomhub.org/). It is MIAPPE compliant through the integration of MIAPPE metadata at the investigation, study and assay levels. It can be used for project based early data sharing, in preparation for long term data storage, but also as a preservation tool for raw data. -* [pISA-tree](https://bio.tools/pisa-tree) is a data management solution developed to contribute to the reproducibility of research and analyses. Hierarchical set of batch files is used to create standardized nested directory tree and associated files for research projects. -* [COPO](https://copo-project.org/) is a data management platform specific to plant sciences. +* {% tool "fairdom-seek" %} is an open source web-based data sharing platform used as a repository or a catalog. It is being deployed as several instances ranging from confidential project data sharing platforms ([INRAE/AGENT](https://urgi.versailles.inrae.fr/fairdom), VIB) to public repositories like {% tool "fairdomhub" %}. It is MIAPPE compliant through the integration of MIAPPE metadata at the investigation, study and assay levels. It can be used for project based early data sharing, in preparation for long term data storage, but also as a preservation tool for raw data. +* {% tool "pisa-tree" %} is a data management solution developed to contribute to the reproducibility of research and analyses. Hierarchical set of batch files is used to create standardized nested directory tree and associated files for research projects. +* {% tool "copo" %} is a data management platform specific to plant sciences. #### High throughput dedicated systems -* [PHIS](http://www.phis.inra.fr/) is the open-source Phenotyping Hybrid Information System (PHIS), based on [OpenSILEX](https://github.com/OpenSILEX/), manages and collects data from Phenotyping and High Throughput Phenotyping experiments on a day to day basis. It can store, organize and manage highly heterogeneous (e.g. images, spectra, growth curves) and multi-spatial and temporal scale data (leaf to canopy level) originating from multiple sources (field, greenhouse). +* {% tool "phis" %} the open-source Phenotyping Hybrid Information System (PHIS), based on [OpenSILEX](https://github.com/OpenSILEX/), manages and collects data from Phenotyping and High Throughput Phenotyping experiments on a day to day basis. It can store, organize and manage highly heterogeneous (e.g. images, spectra, growth curves) and multi-spatial and temporal scale data (leaf to canopy level) originating from multiple sources (field, greenhouse). It unambiguously identifies all objects and traits in an experiment and establishes their relations via ontologies and semantics that apply to both field and controlled conditions. Its ontology-driven architecture is a powerful tool for integrating and managing data from multiple experiments and platforms, for creating relationships between objects and enriching datasets with knowledge and metadata. It is MIAPPE and BrAPI compliant, and naming conventions are recommended for users to declare their resources. Several experimental platforms use PHIS to manage their data, and PHIS instances dedicated to sharing resources (projects, genetic resources, variables) also exist to allow the sharing of studied concepts. * [PIPPA](https://pippa.psb.ugent.be/) is the PSB Interface for Plant Phenotype Analysis, is the central web interface and database that provides the tools for the management of the plant imaging robots on the one hand, and the analysis of images and data on the other hand. The database supports all MIAPPE fields which are accessible through the BrAPI endpoints. Experiment pages are marked up with Bioschemas to improve findability on google. @@ -70,7 +70,7 @@ It is important to keep in mind the difference between data processing and analy * [Processing](processing) provides the tools and procedures to transform primary data, such as imaging or observational data, to appropriate quality and processability. * [Analysing](analysing), on the other hand, is concerned with extracting information from the processed data for the purpose of supporting knowledge acquisition. -Some analysis tools dedicated to plant phenotyping experiments are registered in bio.tools, for example: [Plant 3D](https://bio.tools/plant-3d), [LeafNet](https://bio.tools/leafnet), [PlantCV](https://bio.tools/plantcv_v2), [Phenomenal 3D](https://bio.tools/phenomenal-3d). +Some analysis tools dedicated to plant phenotyping experiments are registered in bio.tools, for example: {% tool "plant3d" %},{% tool "leafnet" %}, {% tool "plantcv" %}, {% tool "phenomenal-3d" %} ### Data sharing @@ -78,23 +78,25 @@ The data collected and annotated can be [shared](sharing) in trustworthy reposit #### Metadata management -* [isa4j](https://github.com/IPK-BIT/isa4j) is a software library which can help you to programmatically generate ISA-Tab formatted metadata for your experiments. This will make your metadata machine-(and human-)readable and thereby improve the reusability of your work. It was especially designed for large datasets and/or to be included in applications which export data regularly, but of course it can also be used for smaller, individual datasets (although you will need to know how to code). Since version 1.1 it also supports specific term completion and validation for MIAPPE, see the [isa4j documentation](https://ipk-bit.github.io/isa4j/miappe-validation.html). +* {% tool "isa4j" %} is a software library which can help you to programmatically generate ISA-Tab formatted metadata for your experiments. This will make your metadata machine-(and human-)readable and thereby improve the reusability of your work. It was especially designed for large datasets and/or to be included in applications which export data regularly, but of course it can also be used for smaller, individual datasets (although you will need to know how to code). Since version 1.1 it also supports specific term completion and validation for MIAPPE, see the [isa4j documentation](https://ipk-bit.github.io/isa4j/miappe-validation.html). #### Repositories -* [Dataverse](https://dataverse.org/) is an open source research data repository software used by several research institute over the globe to publicly share heterogenous dataset. In Europe, it is being used among others by the portuguese [DMPortal](https://dmportal.biodata.pt/), the german [Julich data portal](https://data.fz-juelich.de/), and the french [Recherche Data Gouv](https://entrepot.recherche.data.gouv.fr/) (previously Data.INRAE) research communities. Its main strength is its flexibility, as the mandatory metadata are focused on publication information such as title, abstract, authors and keywords. It can therefore host any datatype, which is both a strength and a weakness, as shared good practices are necessary to ensure the reusability and findability of published phenomic data. +* {% tool "dataverse" %} is an open source research data repository software used by several research institute over the globe to publicly share heterogenous dataset. In Europe, it is being used among others by the portuguese [DMPortal](https://dmportal.biodata.pt/), the german [Julich data portal](https://data.fz-juelich.de/), and the french [Recherche Data Gouv](https://entrepot.recherche.data.gouv.fr/) (previously Data.INRAE) research communities. Its main strength is its flexibility, as the mandatory metadata are focused on publication information such as title, abstract, authors and keywords. It can therefore host any datatype, which is both a strength and a weakness, as shared good practices are necessary to ensure the reusability and findability of published phenomic data. -* [e!DAL-PGP](https://edal-pgp.ipk-gatersleben.de/) is a comprehensive research data repository, which is hosted at the [Leibniz Institute of Plant Genetics and Crop Plant Research (IPK) Gatersleben](https://www.ipk-gatersleben.de/en/) and is mainly focused on sharing high valuable and large genomics and phenomics datasets. It is the first productive instance, which is based on the open source [e!DAL](https://edal.ipk-gatersleben.de) infrastructure software and is furthermore a part of the de.NBI/ELIXIR Germany services. All provided datasets are FAIR compliant and citable via a persistent DOI. By using the widely established LifeScience AAI (formerly known as ELIXIR AAI) the submission procedure is open for all ELIXIR associated users. The key feature of e!DAL-PGP is its user-friendly, simple and FAIR-compliant data submission and internal review procedure. The repository has no general limit to any type of size of datasets. A comprehensive documentation including, guidelines, code snippets for technical integration and videos is available on the [project website](https://edal-pgp.ipk-gatersleben.de/). +* {% tool "e-dal-pgp" %} is a comprehensive research data repository, which is hosted at the [Leibniz Institute of Plant Genetics and Crop Plant Research (IPK) Gatersleben](https://www.ipk-gatersleben.de/en/) and is mainly focused on sharing high valuable and large genomics and phenomics datasets. It is the first productive instance, which is based on the open source {% tool "e-dal" %} infrastructure software and is furthermore a part of the de.NBI/ELIXIR Germany services. All provided datasets are FAIR compliant and citable via a persistent DOI. By using the widely established LifeScience AAI (formerly known as ELIXIR AAI) the submission procedure is open for all ELIXIR associated users. The key feature of e!DAL-PGP is its user-friendly, simple and FAIR-compliant data submission and internal review procedure. The repository has no general limit to any type of size of datasets. A comprehensive documentation including, guidelines, code snippets for technical integration and videos is available on the [project website](https://edal-pgp.ipk-gatersleben.de/). -* [Zenodo](https://zenodo.org/) is a powerful data publication service, which is supported by the European commission and focused on research data, including supplemental material like software, tables, figures or slides. Therefore the publication is usually associated with the publication of a research paper, book chapters or presentations. The Zenodo data submission form allows to describe every data file with a set of technical metadata based on the DataCite metadata schema, which is necessary and assign a persistent DOI to every dataset. The Zenodo infrastructure is hosted at the CERN and can publish dataset up to a size of 50 GB for free. For larger datasets a specific support request is necessary. A further valuable feature of Zenodo is the connection to GitHub and the provided opportunity to assign a DOI to a concrete version or rather commit of a hosted software repository which allows to persist software scripts, which improves the reproducibility of research workflows and results, which is often a challenge especially for older research publications. +* {% tool "gnpis" %} is a multispecies integrative information system dedicated to plants. It allows researchers to access genetic, MIAPPE compliant phenotypic data as well as genomic data. It is used by both large international projects and the French National Research Institute for Agriculture, Food and Environment. + +* {% tool "zenodo" %} is a powerful data publication service, which is supported by the European commission and focused on research data, including supplemental material like software, tables, figures or slides. Therefore the publication is usually associated with the publication of a research paper, book chapters or presentations. The Zenodo data submission form allows to describe every data file with a set of technical metadata based on the DataCite metadata schema, which is necessary and assign a persistent DOI to every dataset. The Zenodo infrastructure is hosted at the CERN and can publish dataset up to a size of 50 GB for free. For larger datasets a specific support request is necessary. A further valuable feature of Zenodo is the connection to GitHub and the provided opportunity to assign a DOI to a concrete version or rather commit of a hosted software repository which allows to persist software scripts, which improves the reproducibility of research workflows and results, which is often a challenge especially for older research publications. #### Machine actionable data sharing -* [BrAPI](http://www.brapi.org) (the Breeding API) is a MIAPPE compliant web service specification available on several [deposition databases](https://www.brapi.org/servers). Those endpoints can be validated using the BrAPI validator [BRAVA](https://webapps.ipk-gatersleben.de/brapivalidator/). BrAPI hosts several documentation and training material to support its usage. +* {% tool "brapi" %}(the Breeding API) is a MIAPPE compliant web service specification available on several [deposition databases](https://www.brapi.org/servers). Those endpoints can be validated using the BrAPI validator {% tool "brava" %} BrAPI hosts several documentation and training material to support its usage. ### Data reuse Plant phenotyping data reuse relies on rich metadata following the MIAPPE specifications annotated with proper ontologies. Most of the important ontologies are registered on FAIRSHARING: use this [search example](https://fairsharing.org/search?fairsharingRegistry=Standard&q=plant&isMaintained=true). -* [AgroPortal](http://agroportal.lirmm.fr/) is a vocabulary and ontology repository for agronomy and related domains. -* [FAIDARE](https://bio.tools/faidare) (FAIR Data-finder for Agronomic Research) is a portal facilitating discoverability of public data on plant biology from a federation of established data repositories. +* {% tool "agroportal" %} is a vocabulary and ontology repository for agronomy and related domains. +* {% tool "faidare" %}(FAIR Data-finder for Agronomic Research) is a portal facilitating discoverability of public data on plant biology from a federation of established data repositories. diff --git a/pages/tool_assembly/transmed_assembly.md b/pages/tool_assembly/transmed_assembly.md index 1180cb066..d4ac6caf7 100644 --- a/pages/tool_assembly/transmed_assembly.md +++ b/pages/tool_assembly/transmed_assembly.md @@ -29,19 +29,19 @@ Additionally, ELIXIR Luxembourg provides hosting of the TransMed assembly. Hosti ### Data management planning Translational Biomedicine projects often deal with sensitive data from human subjects. Therefore, data management planning of this type of projects needs to take data protection and GDPR compliance into account . -Typically a TransMed project involves multiple (clinical) study sites and can contain several cohorts. During the planning phase the dataflow for the project and data/metadata collected prospectively or retrospectively needs to be documented. Projects can use the [Data Information Sheet DISH](http://doi.org/10.5281/zenodo.5127940) to map the project dataflow and collect metadata necessary for GDPR-compliant processing. In addition, a data protection impact assessment needs to be performed taking into account partner roles, responsibilities and the data information collected via the DISH. For this purpose TransMed assembly uses the Data Information System - [DAISY](https://daisy-demo.elixir-luxembourg.org/), which indexes all information collected by DISH and provides a repository to accumulate GDPR-required project documentation such as ethics approvals and consent templates and subject information sheets and ultimately the project data management plan. TransMed assembly includes the risk management tool [MONARC](https://open-source-security-software.net/project/MONARC), which can be used to perform Data Protection Impact Assessments (DPIA). DPIAs are a requirement of the GDPR for projects dealing with sensitive human data. +Typically a TransMed project involves multiple (clinical) study sites and can contain several cohorts. During the planning phase the dataflow for the project and data/metadata collected prospectively or retrospectively needs to be documented. Projects can use the [Data Information Sheet DISH](http://doi.org/10.5281/zenodo.5127940) to map the project dataflow and collect metadata necessary for GDPR-compliant processing. In addition, a data protection impact assessment needs to be performed taking into account partner roles, responsibilities and the data information collected via the DISH. For this purpose TransMed assembly uses the Data Information System - {% tool "daisy" %}, which indexes all information collected by DISH and provides a repository to accumulate GDPR-required project documentation such as ethics approvals and consent templates and subject information sheets and ultimately the project data management plan. TransMed assembly includes the risk management tool {% tool "monarc" %}, which can be used to perform Data Protection Impact Assessments (DPIA). DPIAs are a requirement of the GDPR for projects dealing with sensitive human data. ### Data collection, transfer and storage -For projects involving patient recruitment the TransMed assembly provides the Smart Scheduling System, [SMASCH](https://smasch.pages.uni.lu ), tracking availability of resources in clinics and manages patient visits. Pseudonymised clinical data and patient surveys are then collected by the state of the art electronic data capture (EDC) system [REDCap](https://projectredcap.org) through a battery of electronic case report forms (eCRFs). Imaging data from the clinics are deposited into a dedicated imaging platform [XNAT](https://www.xnat.org/). Omics data, both in raw and derived form can be deposited to the data provenance system [iRODS](https://irods.org/). +For projects involving patient recruitment the TransMed assembly provides the Smart Scheduling System, {% tool "smasch" %}, tracking availability of resources in clinics and manages patient visits. Pseudonymised clinical data and patient surveys are then collected by the state of the art electronic data capture (EDC) system {% tool "redcap" %} through a battery of electronic case report forms (eCRFs). Imaging data from the clinics are deposited into a dedicated imaging platform {% tool "xnat" %}. Omics data, both in raw and derived form can be deposited to the data provenance system {% tool "irods" %}. The transfer of data files can be done via various encrypted communication options as outlined in the [Data transfer](data_transfer) section of the RDMkit. The TransMed assembly most typically utilises (S)FTP, Aspera FASP and ownCloud. Data is also encrypted at rest with hard-ware and also with file-level encryption using either open-source utilities such as gpg or commercial options such as Aspera FASP. ### Data curation and harmonisation -To facilitate cross-cohort/cross-study interoperability of data, upon collection, the data needs to be curated and harmonised. For this purpose the TransMed assembly uses a variety of open standards and tools. For data quality and cleansing the assembly uses [OpenRefine](https://openrefine.org/), which provides an intuitive interface to generate facets of data that support the research to identify quality issues and outliner. It also enables traceable and yet easy data correction. For data Extraction, Transformation and Loading (ETL) the assembly uses [Talend Open Studio](https://www.talend.com/) (for complex and reusable ETLs) as well as R and Python (for ad-hoc and simple transformation). To evaluate and improve FAIRness of datasets, the assembly follows the recipes in the [FAIR Cookbook](https://fairplus.github.io/the-fair-cookbook/) developed by the FAIRplus consortium. Related to standard data models and ontologies the assembly follows the recommendations in the FAIR Cookbook recipe for selecting terminologies and ontologies. +To facilitate cross-cohort/cross-study interoperability of data, upon collection, the data needs to be curated and harmonised. For this purpose the TransMed assembly uses a variety of open standards and tools. For data quality and cleansing the assembly uses {% tool "openrefine" %}, which provides an intuitive interface to generate facets of data that support the research to identify quality issues and outliner. It also enables traceable and yet easy data correction. For data Extraction, Transformation and Loading (ETL) the assembly uses {% tool "talend" %} Open Studio (for complex and reusable ETLs) as well as R and Python (for ad-hoc and simple transformation). To evaluate and improve FAIRness of datasets, the assembly follows the recipes in the {% tool "fair-cookbook" %} developed by the FAIRplus consortium. Related to standard data models and ontologies the assembly follows the recommendations in the FAIR Cookbook recipe for selecting terminologies and ontologies. ### Data integration and analysis -TransMed projects usually require different data types from different cohorts to be integrated into one data platform for the exploring, sub-setting and integrated analysis for hypothesis generation. The TransMed assembly consists of several such tools: [Ada](https://ada.parkinson.lu/documentation/intro) is a web-based tool to provide a performant and highly configurable system for secured integration, visualization, and collaborative analysis of heterogeneous data sets, primarily targeting clinical and experimental sources. The assembly also includes other tools for specific data types, such as [ATLAS](https://github.com/OHDSI/Atlas/wiki) that integrate features from various [OHDSI](https://ohdsi.org/) applications for Electronic Health Record data in [OMOP](https://ohdsi.github.io/CommonDataModel/) format into a single cohesive experience. [tranSMART](https://github.com/transmart) is a tool that provides easy integration between phenotypic/clinical data and molecular data and a “drag-and-drop” fashion data exploration interface. +TransMed projects usually require different data types from different cohorts to be integrated into one data platform for the exploring, sub-setting and integrated analysis for hypothesis generation. The TransMed assembly consists of several such tools: {% tool "ada-discovery-analytics" %} is a web-based tool to provide a performant and highly configurable system for secured integration, visualization, and collaborative analysis of heterogeneous data sets, primarily targeting clinical and experimental sources. The assembly also includes other tools for specific data types, such as {% tool "atlas" %} that integrate features from various {% tool "ohdsi" %} applications for Electronic Health Record data in {% tool "omop-cdm" %} format into a single cohesive experience. {% tool "transmart" %} is a tool that provides easy integration between phenotypic/clinical data and molecular data and a “drag-and-drop” fashion data exploration interface. ### Data stewardship -To facilitate the findability of data the TransMed assembly provides a [Data/Sample Catalog tool](https://datacatalog.elixir-luxembourg.org/) that supports the indexing search and discovery of studies, data sets and samples accumulated in the context of projects from different sites and cohorts. The catalog implements a controlled-access model by integration with [AAI REMS](https://github.com/CSCfi/rems). Audit trailing of data access is achieved by integration of the [DAISY tool](https://daisy-demo.elixir-luxembourg.org/) in the access process. The catalog tool can be integrated with various identity management systems such as [Keycloak](https://www.keycloak.org/), [ELIXIR-AAI](https://elixir-europe.org/services/compute/aai) or [Free-IPA](https://www.freeipa.org/). +To facilitate the findability of data the TransMed assembly provides a {% tool "data-catalog" %} tool that supports the indexing search and discovery of studies, data sets and samples accumulated in the context of projects from different sites and cohorts. The catalog implements a controlled-access model by integration with {% tool "rems" %}. Audit trailing of data access is achieved by integration of the {% tool "daisy" %} in the access process. The catalog tool can be integrated with various identity management systems such as {%tool "keycloak" %}, {% tool "life-science-login" %} or {% tool "free-ipa" %}. diff --git a/pages/tool_assembly/tsd_assembly.md b/pages/tool_assembly/tsd_assembly.md index 32cfa4ce5..f49863958 100644 --- a/pages/tool_assembly/tsd_assembly.md +++ b/pages/tool_assembly/tsd_assembly.md @@ -33,7 +33,7 @@ If you are affiliated to a Norwegian institution which has already stipulated a {% include image.html file="TSD_tool_assembly.svg" caption="Figure 1. Norwegian ELIXIR tools assembly for sensitive data - TSD" alt="TSD tool assembly" %} -The Norwegian tools assembly for sensitive data offers support with [Data Management Planning](planning) through an [instance of the Data Stewardship Wizard](https://elixir-no.ds-wizard.org) following the guidelines of the major national and European funding bodies. Dedicated references guide you through national infrastructure, resources, laws and regulations and also include the [Tryggve ELSI Checklist](https://neic.no/tryggve/links/) for Ethical, Legal and Social Implications. Soon you will be able to submit storage request forms for [Data Storage](storage) in TSD with defined access permissions through the Data Stewardship Wizard. +The Norwegian tools assembly for sensitive data offers support with [Data Management Planning](planning) through an [instance of the Data Stewardship Wizard](https://elixir-no.ds-wizard.org) following the guidelines of the major national and European funding bodies. Dedicated references guide you through national infrastructure, resources, laws and regulations and also include the {% tool "tryggve-elsi-checklist" %} for Ethical, Legal and Social Implications. Soon you will be able to submit storage request forms for [Data Storage](storage) in TSD with defined access permissions through the Data Stewardship Wizard. TSD offers [Data Storage](storage) services. Moreover, [Processing](processing) and [Analysing](analysing) of data is performed in a safe environment within TSD. As a national user, you can access TSD by identifying yourself using the Norwegian [ID-porten](https://eid.difi.no/en/id-porten) system. International users can get access by contacting [tsd-contact@usit.uio.no](mailto:tsd-contact@usit.uio.no). @@ -45,7 +45,7 @@ As the primary design goal of TSD is security, [transfer of data](data_transfer) ### Data management planning -You can access the [ELIXIR-NO instance of the Data Stewardship Wizard](https://elixir-no.ds-wizard.org) using [Life Science Login](https://lifescience-ri.eu/ls-login/), which can be coupled with the national solution for secure login and data sharing in the educational and research sector [Feide](https://www.feide.no/). +You can access the [ELIXIR-NO instance of the Data Stewardship Wizard](https://elixir-no.ds-wizard.org) using {% tool "life-science-login" %}, which can be coupled with the national solution for secure login and data sharing in the educational and research sector [Feide](https://www.feide.no/). ### Data Collection @@ -61,4 +61,4 @@ The computing services provided through TSD include an Illumina DRAGEN (Dynamic ### Data Sharing and Preservation -One solution for permanent archiving and sharing of personally identifiable genetic and phenotypic datasets resulting from biomedical research data is to deposit them to the [European Genome-phenome Archive (EGA)](https://ega-archive.org/). The EGA applies a controlled access model. There can be limitations, e.g. given consents, for your datasets which prevents them from leaving your jurisdiction or being archived in general. This is partly addressed by federated EGA services with nodes operating from one country or institution under one specific jurisdiction. This model enables discovery of publicly shareable metadata about studies/datasets archived at the federated EGA nodes through the Central EGA, while the remaining data is stored in a local solution. The federated EGA nodes offer the same APIs as the Central EGA and provide independent data distribution to users. The [Norwegian Federated EGA](https://ega.elixir.no/) is accessible through [Life Science Login](https://lifescience-ri.eu/ls-login), compatible with [Feide](https://www.feide.no/). +One solution for permanent archiving and sharing of personally identifiable genetic and phenotypic datasets resulting from biomedical research data is to deposit them to the {% tool "the-european-genome-phenome-archive" %}. The EGA applies a controlled access model. There can be limitations, e.g. given consents, for your datasets which prevents them from leaving your jurisdiction or being archived in general. This is partly addressed by federated EGA services with nodes operating from one country or institution under one specific jurisdiction. This model enables discovery of publicly shareable metadata about studies/datasets archived at the federated EGA nodes through the Central EGA, while the remaining data is stored in a local solution. The federated EGA nodes offer the same APIs as the Central EGA and provide independent data distribution to users. The [Norwegian Federated EGA](https://ega.elixir.no/) is accessible through {% tool "life-science-login" %}, compatible with [Feide](https://www.feide.no/). diff --git a/pages/tool_assembly/xnat_pic_assembly.md b/pages/tool_assembly/xnat_pic_assembly.md index a8c706598..3cf8d88dd 100644 --- a/pages/tool_assembly/xnat_pic_assembly.md +++ b/pages/tool_assembly/xnat_pic_assembly.md @@ -1,7 +1,7 @@ --- title: XNAT-PIC contributors: [Sara Zullino, Alessandro Paglialonga, Walter Dastrù, Dario Longo, Silvio Aime] -page_id: xnat-pic +page_id: xnat_pic affiliations: [Euro BioImaging, IT] related_pages: your_tasks: [data_organisation, storage, data_analysis] @@ -22,9 +22,9 @@ training: ## What is XNAT-PIC? -Preclinical imaging centers deal with many challenges mostly related to the variety of imaging instrumentation yielding huge volumes of raw data. The current procedures to collect, share and reuse preclinical image data are insufficient, thus revealing an urgent need of standardization in terms of data storage and image processing. **XNAT for Preclinical Imaging Centers (XNAT-PIC)** has been developed to overcome this limitation by extending XNAT’s basic functionalities to meet the needs of preclinical imaging facilities. +Preclinical imaging centers deal with many challenges mostly related to the variety of imaging instrumentation yielding huge volumes of raw data. The current procedures to collect, share and reuse preclinical image data are insufficient, thus revealing an urgent need of standardization in terms of data storage and image processing. **{% tool "xnat" %} for Preclinical Imaging Centers (XNAT-PIC)** has been developed to overcome this limitation by extending XNAT’s basic functionalities to meet the needs of preclinical imaging facilities. -**XNAT for Preclinical Imaging Centers (XNAT-PIC)** consists of a set of tools built in Python and MATLAB to [store](storage), [process](processing) and [share](sharing) preclinical imaging studies built on top of the [XNAT](https://www.xnat.org/) imaging informatics platform. +**XNAT for Preclinical Imaging Centers (XNAT-PIC)** consists of a set of tools built in Python and MATLAB to [store](storage), [process](processing) and [share](sharing) preclinical imaging studies built on top of the {% tool "xnat" %} imaging informatics platform. ## Who is XNAT-PIC intended for? @@ -33,15 +33,15 @@ XNAT-PIC is inteded for scientists, researchers and data stewards working in the ## Which task can be solved with XNAT-PIC? XNAT-PIC is a set of tools to support preclinical imaging scientists in their data management and processing needs. -The Extensible Neuroimaging Archive Toolkit [XNAT](https://www.xnat.org/) is an imaging informatics platform developed by the Neuroinformatics Research Group at the Washington University for the management, storage and analysis of biomedical image data. XNAT is an open-source project that can support a wide range of imaging modalities thanks to its extensibility. +The Extensible Neuroimaging Archive Toolkit {% tool "xnat" %} is an imaging informatics platform developed by the Neuroinformatics Research Group at the Washington University for the management, storage and analysis of biomedical image data. XNAT is an open-source project that can support a wide range of imaging modalities thanks to its extensibility. {% include image.html file="xnat-pic.png" caption="Figure 1. Schematic overview of the XNAT-PIC tool assembly." alt="Schematic overview of the XNAT-PIC tool assembly." %} XNAT-PIC consists of: -* **MRI2DICOM** to [process](processing) Magnetic Resonance (MR) images and convert them from ParaVision® (Bruker, Inc. Billerica, MA) file format to DICOM standard; -* **XNAT-PIC Uploader** to import and [store](storage) multimodal DICOM image datasets to XNAT; -* **XNAT-PIC Pipelines** for [analysing](analysing) single or multiple subjects within the same project in XNAT. +* {% tool "mri2dicom" %} to [process](processing) Magnetic Resonance (MR) images and convert them from ParaVision® (Bruker, Inc. Billerica, MA) file format to DICOM standard; +* {% tool "xnat-pic-uploader" %} to import and [store](storage) multimodal DICOM image datasets to XNAT; +* {% tool "xnat-pic-pipelines" %} for [analysing](analysing) single or multiple subjects within the same project in XNAT. ## Citation diff --git a/pages/your_domain.md b/pages/your_domain.md index 82e1e31bf..21fa78ea0 100644 --- a/pages/your_domain.md +++ b/pages/your_domain.md @@ -12,6 +12,6 @@ In this section, information is organised based on different domains in life sci -{% include section-navigation-tiles.html type="your_domain" search=true %} +{% include section-navigation-tiles.html type="your_domain" search=true except="your_domain.md" %} diff --git a/pages/your_domain/bioimaging_data.md b/pages/your_domain/bioimaging_data.md index 595665fc0..31e6b212f 100644 --- a/pages/your_domain/bioimaging_data.md +++ b/pages/your_domain/bioimaging_data.md @@ -5,7 +5,7 @@ contributors: [Sébastien Besson, Jean-Marie Burel, Susanne Kunis, Josh Moore, S page_id: bioimaging_data related_pages: your_tasks: [dmp, data_organisation, data_publication, existing_data, transfer, licensing, metadata, storage] - tool_assembly: [ome, xnat-pic] + tool_assembly: [ome, xnat_pic] training: - name: RDMbites for using REMBI registry: TeSS @@ -25,7 +25,7 @@ An image is much more than a collection of zeros and ones. The image will contain the binary representing the pixels on screen but it is usually packed with useful metadata. You will find the obvious keys indicating how to interpret the zeros and ones, you can also find a lot of acquisition metadata e.g. hardware/instrument used, settings used, etc. The number of image proprietary formats is very large and keeps increasing. It is challenging to support so many proprietary file formats i.e. read/extract metadata. -The [Bio-formats](https://bio-formats.readthedocs.io/) library currently supports over [150 different file formats](https://bio-formats.readthedocs.io/en/latest/supported-formats.html). +The {% tool "bio-formats" %} library currently supports over [150 different file formats](https://bio-formats.readthedocs.io/en/latest/supported-formats.html). The [Dataset Structure Table](https://bio-formats.readthedocs.io/en/latest/formats/dataset-table.html) shows the extension of the files to read and indicates the structure of the image itself e.g. single file, multiple files, one image file and a companion file, etc. ### Data management challenges @@ -63,21 +63,21 @@ Unlike other domains, the bioimaging community has not yet agreed on a single st **Open source translators**: Members of the community have developed multi-format translators that can be used to access your data on-the-fly i.e. the original format is preserved, no file written on disk. This implies that you will need to perform this translation each time you access your data and, depending on the size of the image(s), you could run out of memory. Translation libraries include, - - [Bio-Formats](https://www.openmicroscopy.org/bio-formats/) (Java) - supports over 150 file formats - - [OpenSlide](https://openslide.org/) (C++) - primarily for whole-slide imaging (WSI) formats - - [aicsimageio](https://github.com/AllenCellModeling/aicsimageio) (Python) - wraps vendor libraries and Bio-Formats to support a wide-range of formats in Python + - {% tool "bio-formats" %} (Java) - supports over 150 file formats + - {% tool "openslide" %} (C++) - primarily for whole-slide imaging (WSI) formats + - {% tool "aicsimageio" %} (Python) - wraps vendor libraries and Bio-Formats to support a wide-range of formats in Python **Permanent conversion**: An alternative is to permanently convert your data to - - [OME-Files](https://www.openmicroscopy.org/ome-files/) - The [Open Microscopy Consortium (OME)](https://www.openmicroscopy.org/) has developed an open format, "OME-TIFF", to which you can convert your data. The Bio-Formats (above) library comes with a command line to tool [bfconvert](https://bio-formats.readthedocs.io/en/stable/users/comlinetools/conversion.html) that can be used to convert to files to OME-TIFF - - The [bioformats2raw](https://github.com/glencoesoftware/bioformats2raw) and [raw2ometiff](https://github.com/glencoesoftware/raw2ometiff) toolchain provided by [Glencoe Software](https://www.glencoesoftware.com/) allows the more performant conversion of your data, but requires an extra intermediate copy of the data. If you have available space, the toolchain could also be an option to consider. + - [OME-Files](https://www.openmicroscopy.org/ome-files/) - The [Open Microscopy Consortium (OME)](https://www.openmicroscopy.org/) has developed an open format, "OME-TIFF", to which you can convert your data. The Bio-Formats (above) library comes with a command line to tool {% tool "bfconvert" %} that can be used to convert to files to OME-TIFF + - The {% tool "bioformats2raw" %} and {% tool "raw2ometiff" %} toolchain provided by [Glencoe Software](https://www.glencoesoftware.com/) allows the more performant conversion of your data, but requires an extra intermediate copy of the data. If you have available space, the toolchain could also be an option to consider. **Cloud (or "object") storage**: If you are storing your data in the cloud, you will likely need a different file format since most current image file formats are not suitable for cloud storage. OME is currently developing a [next-generation file format (NGFF)](https://ngff.openmicroscopy.org/latest/) that you can use. **Metadata**: If metadata are stored separately from the image data, the format of the metadata should follow the subject-specific standards regarding the schema, vocabulary or ontologies and storage format used such as: - [OME model](https://docs.openmicroscopy.org/ome-model/latest/) XML-based representation of microscopy data. - - [Quality assessment and Reproducibility in Light Microscopy (QUAREP-LiMi)](https://quarep.org/). + - {% tool "4dn-bina-ome-quarep" %}. - [REMBI](https://www.nature.com/articles/s41592-021-01166-8). @@ -108,20 +108,20 @@ Due to the scale of data, keeping track of the image data and the associated dat ### Solutions - Agnostic platforms that can be used to bridge between domain data include: - - [iRODS](https://irods.org/). - - [b2share](https://b2share.eudat.eu/). + - {% tool "irods" %}. + - {% tool "b2share" %}. - Image-specific data management platforms include: - - [OMERO](https://www.openmicroscopy.org/omero/) - broad support for a large number of imaging formats. - - [Cytomine-IMS](https://github.com/cytomine/Cytomine-IMS) - image specific. - - [XNAT](https://www.xnat.org/) - medical imaging platform, DICOM-based. - - [MyTardis](http://www.mytardis.org/) - largely file-system based platform handling the transfer of data. - - [BisQue](https://bioimage.ucsb.edu/bisque) - resource for management and analysis of 5D biological images. - - Platforms like [OMERO](https://www.openmicroscopy.org/omero/), [b2share](https://b2share.eudat.eu/) also allow you to publish the data associated with a given project. + - {% tool "omero" %} - broad support for a large number of imaging formats. + - {% tool "cytomine-ims" %} - image specific. + - {% tool "xnat" %} - medical imaging platform, DICOM-based. + - {% tool "mytardis" %} - largely file-system based platform handling the transfer of data. + - {% tool "bisque" %} - resource for management and analysis of 5D biological images.m + - Platforms like {% tool "omero" %}, {% tool "b2share" %} also allow you to publish the data associated with a given project. - Metadata standards can be found at the [Metadata Standards Directory Working Group](https://rdamsc.bath.ac.uk/). - Ontologies Resources available at: - - [Zooma](https://www.ebi.ac.uk/spot/zooma/) - Resource to find ontology mapping for free text terms. - - [Ontology Search](https://www.ebi.ac.uk/ols/index) - Ontology lookup service. - - [BioPortal](https://www.bioontology.org/) - Biomedical ontologies. + - {% tool "zooma" %} - Resource to find ontology mapping for free text terms. + - {% tool "ontology-lookup-service" %} - Ontology lookup service. + - {% tool "bioportal" %} - Biomedical ontologies. - Existing data can be found by using the following resources: - [LINCS](https://lincsproject.org/LINCS/tools/workflows/explore-microscopy-imaging-data-collected-across-the-lincs-centers). - [Research Data repositories Registry](https://www.re3data.org/). @@ -148,7 +148,7 @@ Two distinct types of resources should be considered: - Select and choose the repositories based on the following characteristics: - Storage vs Added-value resources. - Images format support. - - Supported licenses e.g. CC0 or CC-BY license. For example the [Image Data Resource (IDR)](http://idr.openmicroscopy.org/) uses Creative Commons Licenses for submitted datasets and encourages submitting authors to choose. + - Supported licenses e.g. CC0 or CC-BY license. For example the {% tool "image-data-resource" %} uses Creative Commons Licenses for submitted datasets and encourages submitting authors to choose. - Which types of access are required for the users e.g. download only, browse search and view data and metadata, API access. - Does an entry have an access e.g. idr-xxx, EMPIAR-#####? - Does an entry have a DOI (Digital Object Identifier)? @@ -158,15 +158,76 @@ Two distinct types of resources should be considered: Comparative table of some repositories that can be used to deposit imaging data: -| Repository | Type | Data Restrictions | Data Upload Restrictions | DOI | Cost | -|------------|------|-------------------|---------------------|-----|------| -| [BioImageArchive](https://www.ebi.ac.uk/bioimage-archive/) | Archive | No PIH data | None | --- | Free | -| [Dryad](https://datadryad.org/)| Archive | No PIH data | 300GB | Yes | over 50GB (*) | -| [EMPIAR](https://www.ebi.ac.uk/empiar/) | Added-value | Electron microscopy imaging data | None | Yes | Free | -| [IDR](https://idr.openmicroscopy.org/) | Added-value | Cell/Tissue imaging data, no PIH data | None| Yes | Free | -| [SSBD:database](https://ssbd.riken.jp/database/) | Added-value | Biological dynamics imaging data | None | --- | Free | -| [SSBD:repository](https://ssbd.riken.jp/repository/) | Archive | Biological dynamics imaging data | None | --- | Free | -| [Zenodo](https://zenodo.org) | Archive | None | 50GB per dataset | Yes | Free | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RepositoryTypeData RestrictionsData Upload RestrictionsDOICost
{% tool "bioimagearchive" %}ArchiveNo PIH dataNone---Free
{% tool "dryad" %}ArchiveNo PIH data300GBYesover 50GB (*)
{% tool "empiar" %}Added-valueElectron microscopy imaging dataNoneYesFree
{% tool "image-data-resource" %}Added-valueCell/Tissue imaging data, no PIH dataNoneYesFree
{% tool "ssbd-database" %}Added-valueBiological dynamics imaging dataNone---Free
{% tool "ssbd-repository" %}ArchiveBiological dynamics imaging dataNone---Free
{% tool "zenodo" %}ArchiveNone50GB per datasetYesFree
- PIH: Protected health information. - (*) unless submitter is based at member institution. diff --git a/pages/your_domain/biomolecular_simulation_data.md b/pages/your_domain/biomolecular_simulation_data.md index ed3dd0ea0..4ec2b00ce 100644 --- a/pages/your_domain/biomolecular_simulation_data.md +++ b/pages/your_domain/biomolecular_simulation_data.md @@ -47,32 +47,32 @@ The biomolecular simulation data comes in several forms and multiple formats, wh ### Solutions * Deposit your data to a suitable repository for sharing. There’s a long (and incomplete) list of repositories available for data sharing. Repositories are divided into two main categories, *general-purpose* and *discipline-specific*, and both categories are utilised in the domain of biomolecular modeling and simulation. For a general introduction to repositories, you are advised to read the [data publication](data_publication) page. - * General-purpose repositories such as [Zenodo](https://zenodo.org/), [FigShare](https://figshare.com/), [Mendeley Data](https://data.mendeley.com/), [DataDryad](https://datadryad.org/), and [OpenScienceFramework](https://osf.io/) can be used. + * General-purpose repositories such as {% tool "zenodo" %}, {% tool "figshare" %}, {% tool "mendeley-data" %}, {% tool "dryad" %}, and {% tool "openscienceframework" %} can be used. * Discipline-specific repositories can be used when the repository supports the type of data to be shared e.g. molecular dynamics data. Repositories for various data types and models are listed below: * Molecular Dynamics repositories - * [GPCRmd](http://gpcrmd.org/) - for GPCR protein simulations, [with submission process](https://submission.gpcrmd.org/accounts/login/?next=/accounts/memberpage/). - * [MoDEL](http://mmb.irbbarcelona.org/MoDEL/) - (https://bio.tools/model) specific database for protein MD simulations. - * [BigNASim](http://mmb.irbbarcelona.org/BigNASim/) - (https://bio.tools/bignasim) specific database for Nucleic Acids MD simulations, [with submission process](https://github.com/NMRLipids). - * [MoDEL-CNS](http://mmb.irbbarcelona.org/MoDEL-CNS/#/) - specific database for Central Nervous System-related, mainly membrane protein, MD simulations. - * [NMRlipids](http://nmrlipids.blogspot.com/) - project to validate lipid force fields with NMR data with submission process - * [MolSSI - BioExcel COVID-19 therapeutics hub](https://covid.bioexcel.eu/) - database with COVID-19 related simulations, [with submission process](https://covid.bioexcel.eu/contributing/). + * {% tool "gpcrmd" %} - for GPCR protein simulations, [with submission process](https://submission.gpcrmd.org/accounts/login/?next=/accounts/memberpage/). + * {% tool "model" %} - (https://bio.tools/model) specific database for protein MD simulations. + * {% tool "bignasim" %} - (https://bio.tools/bignasim) specific database for Nucleic Acids MD simulations, [with submission process](https://github.com/NMRLipids). + * {% tool "model-cns" %} - specific database for Central Nervous System-related, mainly membrane protein, MD simulations. + * {% tool "nmrlipids" %} - project to validate lipid force fields with NMR data with submission process + * {% tool "molssi" %} - database with COVID-19 related simulations, [with submission process](https://covid.bioexcel.eu/contributing/). * Molecular Dynamics databases - allow access to precalculated data - * [BioExcel-CV19](https://bioexcel-cv19.bsc.es/#/) - database and associated web server to offer in a graphical way analyses on top of COVID-19 related MD trajectories stored in the MolSSI-BioExcel COVID-19 therapeutics hub. - * [Dynameomics](http://www.dynameomics.org/) - database of folding/unfolding pathways - * [MemprotMD](http://memprotmd.bioch.ox.ac.uk/) - database of automatically generated membrane proteins from PDB inserted into simulated lipid bilayers + * {% tool "bioexcel-covid-19" %} - database and associated web server to offer in a graphical way analyses on top of COVID-19 related MD trajectories stored in the MolSSI-BioExcel COVID-19 therapeutics hub. + * {% tool "dynameomics" %} - database of folding/unfolding pathways + * {% tool "memprotmd" %} - database of automatically generated membrane proteins from PDB inserted into simulated lipid bilayers * Docking respositories - * [MolSSI - BioExcel COVID-19 therapeutics hub](https://covid.bioexcel.eu/) - database with COVID-19 related simulations, [with submission process](https://covid.bioexcel.eu/contributing/). - * [PDB-Dev](https://pdb-dev.wwpdb.org/) - prototype archiving system for structural models using integrative or hybrid modeling, [with submission process](https://pdb-dev.wwpdb.org/deposit.html). + * {% tool "molssi" %} - database with COVID-19 related simulations, [with submission process](https://covid.bioexcel.eu/contributing/). + * {% tool "pdb-dev" %} - prototype archiving system for structural models using integrative or hybrid modeling, [with submission process](https://pdb-dev.wwpdb.org/deposit.html). * [Model Archive](https://www.modelarchive.org/) - theoretical models of macromolecular structures, [with submission process](https://www.modelarchive.org/projects/new/basic). * Virtual Screening repositories: - * [Bioactive Conformational Ensemble](http://mmb.irbbarcelona.org/BCE) - small molecule conformations, [with submission process](http://mmb.irbbarcelona.org/BCE/db/upload). - * [BindingDB](https://www.bindingdb.org/) - database of measured binding affinities, focusing chiefly on the interactions of protein considered to be drug-targets with small, drug-like molecules, [with submission process](https://www.bindingdb.org/bind/contributedata.jsp). + * {% tool "bioactive-conformational-ensemble" %} - small molecule conformations, [with submission process](http://mmb.irbbarcelona.org/BCE/db/upload). + * {% tool "bindingdb" %} - database of measured binding affinities, focusing chiefly on the interactions of protein considered to be drug-targets with small, drug-like molecules, [with submission process](https://www.bindingdb.org/bind/contributedata.jsp). * Repositories for the analyzed data from simulations: - * [MolMeDB](https://molmedb.upol.cz/) - for molecule-membrane interactions and free energy profiles, [with submission process](mailto:molmedb@upol.cz). + * {% tool "molmedb" %} - for molecule-membrane interactions and free energy profiles, [with submission process](mailto:molmedb@upol.cz). * [ChannelsDB](https://webchemdev.ncbr.muni.cz/ChannelsDB/index.html) - resource of channels, pores and tunnels found in biomacromolecules, [with submission process](https://webchemdev.ncbr.muni.cz/ChannelsDB/contribute.html). * Based on the type of data to be shared, pay attention to what should be included and the data and metadata that will be deposited to repositories. Below listed are some suggested examples of types of essential and optional data describing the biomolecular simulation data: diff --git a/pages/your_domain/epitranscriptome_data.md b/pages/your_domain/epitranscriptome_data.md index f732e0f5c..f0ff82e46 100644 --- a/pages/your_domain/epitranscriptome_data.md +++ b/pages/your_domain/epitranscriptome_data.md @@ -24,7 +24,7 @@ Several high-throughput experimental approaches have been developed for profilin ### Solutions - Define the sequencing protocol depending on the target RNA modification (transient or not-transient). In case of using data from public databases, carefully look at the method used to generate them. - Prefer profiling methods allowing the detection of RNA modifications at single nucleotide level. -- Epitranscriptome data is generally reused from literature or public and established databases, such as [REDIportal](http://srv00.recas.ba.infn.it/atlas/). All data must have an identifier from the original database that it comes from. The source database is used also to retrieve metadata. +- Epitranscriptome data is generally reused from literature or public and established databases, such as {% tool "rediportal" %}. All data must have an identifier from the original database that it comes from. The source database is used also to retrieve metadata. ## Processing and analysis of epitranscriptome data @@ -38,8 +38,8 @@ Epitranscriptome is a novel field and in rapid expansion. Since a variety of tra - Are you profiling or transient not-transient RNA modifications? ### Solutions -- The current pipeline for RNA editing ([REDItools](https://github.com/BioinfoUNIBA/REDItools)) requires the use of time intensive computational resources to browse position by position all genomic sites covered by RNAseq reads. In order to overcome that, a novel tool ([REDItools2](https://github.com/BioinfoUNIBA/REDItools2)) able to employ HPC resources and reduce the computing time has been developed. However, for transient modifications identified by direct RNA sequencing, compute intensive tools are still required. The computational speed up could be obtained by using GPU graphical cards. In general, for standard RNAseq experiments, each sample requires 8-10 CPUs and at least 8-10 GB of RAM memory. Direct RNA sequencing, instead, requires 8-10 CPUs, at least 1 GPU and 8-10 GB of RAM memory. Once a pipeline has been adopted, it should be used for all samples. -- Data storage is a big issue and not all intermediate files produced during the analyses can be maintained. However, since original data are easily and always available from public sources, analysis files are stored until the end of the established computational workflow. Then, only the final table file including epitranscriptomic variants are recovered and included in [REDIportal](http://srv00.recas.ba.infn.it/atlas/). Although this procedure could be time consuming in case of important updates, such as the adoption of a novel genome assembly, it preserves the storage requirements. +- The current pipeline for RNA editing ({% tool "reditools" %}) requires the use of time intensive computational resources to browse position by position all genomic sites covered by RNAseq reads. In order to overcome that, a novel tool ({% tool "reditools2" %}) able to employ HPC resources and reduce the computing time has been developed. However, for transient modifications identified by direct RNA sequencing, compute intensive tools are still required. The computational speed up could be obtained by using GPU graphical cards. In general, for standard RNAseq experiments, each sample requires 8-10 CPUs and at least 8-10 GB of RAM memory. Direct RNA sequencing, instead, requires 8-10 CPUs, at least 1 GPU and 8-10 GB of RAM memory. Once a pipeline has been adopted, it should be used for all samples. +- Data storage is a big issue and not all intermediate files produced during the analyses can be maintained. However, since original data are easily and always available from public sources, analysis files are stored until the end of the established computational workflow. Then, only the final table file including epitranscriptomic variants are recovered and included in {% tool "rediportal" %}. Although this procedure could be time consuming in case of important updates, such as the adoption of a novel genome assembly, it preserves the storage requirements. - Epitranscriptome experts often provide reviews on the best tools and practices, so a good starting point is to read such publications. A good example is [Investigating RNA editing in deep transcriptome datasets with REDItools and REDIportal](https://www.nature.com/articles/s41596-019-0279-7). - For RNA editing events, prefer RNAseq data from total and rRNA depleted RNA. Strand oriented reads will improve the read mappability, mitigating mis-mapping biases. @@ -54,6 +54,6 @@ Storing epitranscriptome data is relevant for investigating the biological prope - Can epitranscriptome data be openly shared? ### Solutions -- For long term storage and for preserving epitranscriptome data, raw reads have to be submitted to public databases. This is a mandatory requirement to upload epitranscriptomic annotations in specialized databases. In case of data deposited in public databases such as ENA or SRA, RNA modifications could be uploaded in dedicated databases as [REDIportal](http://srv00.recas.ba.infn.it/atlas/). +- For long term storage and for preserving epitranscriptome data, raw reads have to be submitted to public databases. This is a mandatory requirement to upload epitranscriptomic annotations in specialized databases. In case of data deposited in public databases such as ENA or SRA, RNA modifications could be uploaded in dedicated databases as {% tool "rediportal" %}. - To avoid the storage of a large amount of files, raw data is used to complete all computational steps. Soon after, they are removed as well as intermediate files. Only final tables are preserved and stored in our portal. Data is actually preserved because raw data is always available through public and established databases. - All data included in the REDIportal, including individual variants, annotations and metadata, is sharable and open. Only one database is mentioned here because there is the plan of having a unique and individual resource for epitranscriptome data. diff --git a/pages/your_domain/human_data.md b/pages/your_domain/human_data.md index a5436d0a2..216ddb8c7 100644 --- a/pages/your_domain/human_data.md +++ b/pages/your_domain/human_data.md @@ -48,31 +48,31 @@ When working with human data, you must follow established research ethical guide - The legislation that governs this differs between countries. Do seek advice from your research institute. * In most cases, you should get **informed consents** from your research subjects. - An informed consent is an agreement from the research subject to participate in and share personal data for a particular purpose. It shall describe the purpose and any risks involved (along with any mitigations to minimise those risks) in such a way that the research subject can make an informed choice about participating. It should also state under what circumstances the data can be used for the initial purpose, as well as for later re-use by others. - - Consider describing data use conditions using a machine-readable formalised description such as the Data Use Ontology [DUO](https://github.com/EBISPOT/DUO). This will greatly improve the possibilities to make the data FAIR later on. + - Consider describing data use conditions using a machine-readable formalised description such as the Data Use Ontology {% tool "data-use-ontology" %}. This will greatly improve the possibilities to make the data FAIR later on. - Informed consents should be acquired for different purposes: - It is a cornerstone of _research ethics_. Regardless of legal obligations, it is important to ask for informed consents as it is a good research ethics practice and maintains trust in research. - _Ethical permission legislation_ to perform research on human subjects demand informed consents in many cases. - _Personal data protection legislation_ might have informed consent as one legal basis for processing the personal data. - _**Note that the content of an informed consent, as defined by one piece of legislation, might not live up to the demands of another piece of legislation.**_ For example, an informed consent that is good enough for an ethical permit, might not be good enough for the demands of the GDPR. - * The [Global Alliance for Genomics and Health (GA4GH)](https://www.ga4gh.org) has recommendations for these issues in their [GA4GH regulatory and ethical toolkit](https://www.ga4gh.org/genomic-data-toolkit/regulatory-ethics-toolkit/), see for instance the [Consent Clauses for Genomic Research](https://drive.google.com/file/d/1O5Ti7g7QJqS3h0ABm-LyTe02Gtq8wlKM/view?usp=sharing). + * The [Global Alliance for Genomics and Health (GA4GH)](https://www.ga4gh.org) has recommendations for these issues in their [GA4GH regulatory and ethical toolkit](https://www.ga4gh.org/genomic-data-toolkit/regulatory-ethics-toolkit/), see for instance the {% tool "consent-clauses-for-genomic-research" %}. * Personal data protection legislation: * **Within the EU.** If you are performing human data research in the EU, or your data subjects are located in the EU, then you must adhere to the General Data Protection Regulation - GDPR. * Requirements for research that fall under the GDPR are outlined in the [RDMkit Data protection page](data_protection). * Attributes of the data determines data sensitivity and sensitivity affects the considerations for data handling. The [RDMkit Data Sensitivity page](sensitive_data) provides guidance on determining and reducing data sensitivity. - * **Outside the EU.** For countries outside the EU, the [International Compilation of Human Research Standards](https://www.hhs.gov/ohrp/sites/default/files/2020-international-compilation-of-human-research-standards.pdf) list relevant legislations. + * **Outside the EU.** For countries outside the EU, the {% tool "international-compilation-of-human-research-standards" %} list relevant legislations. ### Solutions - * [Tryggve ELSI Checklist](https://scilifelab-data-guidelines.readthedocs.io/en/latest/docs/general/sensitive_data.html) is a list of Ethical, Legal, and Societal Implications (ELSI) to consider for research projects on human subjects. - * [Data Information System DAISY](https://daisy-demo.elixir-luxembourg.org/) is software tool from ELIXIR that allows the record keeping of data processing activities in research projects. - * [Data Agreement Wizard DAWID](https://dawid.elixir-luxembourg.org) is a software tool from ELIXIR that allows generation of tailor-made data sharing agreements - * [Privacy Impact Assessment Tool](https://www.cnil.fr/en/open-source-pia-software-helps-carry-out-data-protection-impact-assesment) is a software tool to make Data Protection Impact Assessments. - * [MONARC](https://open-source-security-software.net/project/MONARC) is a risk assessment tool that can be used to do Data Protection Impact Assessments - * [Data Use Ontology](https://github.com/EBISPOT/DUO) - * [Informed Consent Ontology](https://github.com/ICO-ontology/ICO) - * [GA4GH regulatory and ethical toolkit](https://www.ga4gh.org/genomic-data-toolkit/regulatory-ethics-toolkit/) - * [EU General Data Protection Regulation](https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32016R0679&from=EN). - * [BBMRI-ERIC's ELSI Knowledge Base](https://www.bbmri-eric.eu/elsi/knowledge-base/) contains a glossary, agreement templates and guidance. + * {% tool "tryggve-elsi-checklist" %} is a list of Ethical, Legal, and Societal Implications (ELSI) to consider for research projects on human subjects. + * {% tool "daisy" %} is software tool from ELIXIR that allows the record keeping of data processing activities in research projects. + * {% tool "dawid" %} is a software tool from ELIXIR that allows generation of tailor-made data sharing agreements + * {% tool "pia" %} is a software tool to make Data Protection Impact Assessments. + * {% tool "monarc" %} is a risk assessment tool that can be used to do Data Protection Impact Assessments + * {% tool "data-use-ontology" %} + * {% tool "informed-consent-ontology" %} + * {% tool "ga4gh-regulatory-and-ethics-toolkit" %} + * {% tool "eu-general-data-protection-regulation" %} + * {% tool "bbmri-eric-s-elsi-knowledge-base" %} contains a glossary, agreement templates and guidance. ## Processing and analysing human data @@ -90,7 +90,7 @@ For human data, it is very important to use technical and procedural measures to - Information security measures are both _procedural_ and _technical_. - What information security measures that need to be established should be defined at the planning stage (see above), when doing a risk assessment, e.g. a GDPR Data Protection Impact Assessment. This should identify information security risks, and define measures to mitigate those risks. - Contact the IT or Information security office at your institution to get guidance and support to address these issues. - - [ISO/IEC 27001](https://en.wikipedia.org/wiki/ISO/IEC_27001) is an international information security standard adopted by data centres of some universities and research institutes. + - {% tool "iso-iec-27001" %} is an international information security standard adopted by data centres of some universities and research institutes. * Check whether there are local/national tools and platforms suited to handle human data. - Local research infrastructures have established compute and/or storage solutions with strong information security measures tailored for working on human data. The [RDMkit national resources page](national_resources) lists the sensitive data support facilities available in various countries. Contact your institute or your ELIXIR node for guidance. - There are also emerging alternative approaches to analyse sensitive data, such as doing “distributed” computation, where defined analysis workflows are used to do analysis on datasets that do not leave the place where they are stored. @@ -100,14 +100,14 @@ For human data, it is very important to use technical and procedural measures to ### Solutions -* [European Patient Identity Management EUPID](https://eupid.eu/#/home) is a tool that allows researchers to generate unique pseudonyms for patients that participate in rare disease studies. -* [RD-Connect Genome Phenome Analysis Platform](https://rd-connect.eu/) is a platform to improve the study and analysis of Rare Diseases. -* [DisGeNET](https://www.disgenet.org/) is a platform containing collections of genes and variants associated to human diseases. -* [PMut](http://mmb.irbbarcelona.org/PMut) is a platform for the study of the impact of pathological mutations in protein structures. -* [IntoGen](https://www.intogen.org) collects and analyses somatic mutations in thousands of tumor genomes to identify cancer driver genes. -* [BoostDM](https://www.intogen.org/boostdm/search) is a method to score all possible point mutations in cancer genes for their potential to be involved in tumorigenesis. -* [Cancer Genome Interpreter](https://www.cancergenomeinterpreter.org) is designed to identify tumor alterations that drive the disease and detect those that may be therapeutically actionable. -* GA4GH's [Data Security](https://www.ga4gh.org/genomic-data-toolkit/data-security-toolkit/), and [Genomic Data](https://www.ga4gh.org/genomic-data-toolkit/) toolkits provide policies, standards for the secure transfer and processing of human genomics data. GA4GH standards are often implemented into multiple tools. For example, the [Crypt4GH data encryption standard](https://www.ga4gh.org/news/crypt4gh-a-secure-method-for-sharing-human-genetic-data/) is implemented both in [SAMTools](http://samtools.github.io/hts-specs/crypt4gh.pdf) and also provided as a [utility from the EGA Archive](https://github.com/EGA-archive/crypt4gh). +* {% tool "eupid" %} is a tool that allows researchers to generate unique pseudonyms for patients that participate in rare disease studies. +* {% tool "rd-connect-genome-phenome-analysis-platform" %} is a platform to improve the study and analysis of Rare Diseases. +* {% tool "disgenet" %} is a platform containing collections of genes and variants associated to human diseases. +* {% tool "pmut" %} is a platform for the study of the impact of pathological mutations in protein structures. +* {% tool "intogen" %} collects and analyses somatic mutations in thousands of tumor genomes to identify cancer driver genes. +* {% tool "boostdm" %} is a method to score all possible point mutations in cancer genes for their potential to be involved in tumorigenesis. +* {% tool "cancer-genome-interpreter" %} is designed to identify tumor alterations that drive the disease and detect those that may be therapeutically actionable. +* GA4GH's [Data Security](https://www.ga4gh.org/genomic-data-toolkit/data-security-toolkit/), and [Genomic Data](https://www.ga4gh.org/genomic-data-toolkit/) toolkits provide policies, standards for the secure transfer and processing of human genomics data. GA4GH standards are often implemented into multiple tools. For example, the [Crypt4GH data encryption standard](https://www.ga4gh.org/news/crypt4gh-a-secure-method-for-sharing-human-genetic-data/) is implemented both in [SAMTools](http://samtools.github.io/hts-specs/crypt4gh.pdf) and also provided as a utility from the EGA Archive, {% tool "crypt4gh" %}. * [GA4GH's Cloud Workstream](https://www.ga4gh.org/how-we-work/2020-2021-roadmap/2020-2021-roadmap-part-ii/cloud-2020-2021-roadmap/) is a more recent initiative and focuses on keeping data in secure cloud environments and meanwhile bringing computational analysis to the data. ## Preserving human data @@ -131,9 +131,9 @@ It is a good ethical practice to ensure that data underlying research is preserv * Do address these issues of long-term preservation and data publication as early as possible, preferably already at the planning stage. If you are relying on your research institution to provide a solution, it might need time to plan for this. ### Solutions -* [GA4GH data security toolkit](https://www.ga4gh.org/genomic-data-toolkit/data-security-toolkit/) -* [International Standards Organisation's ISO/IEC 27001](https://en.wikipedia.org/wiki/ISO/IEC_27001) is an international information security standard adopted by data centres of some universities and research institutes. -* [The European Genome-phenome Archive (EGA)](https://ega-archive.org/) is an international service for secure archiving and sharing of all types of personally identifiable genetic and phenotypic data resulting from biomedical studies and healthcare centres. All services are free of charge. The EGA stores the data and metadata long-term, without ending date of the service. The data is backed-up in two separate geographical locations. The storing is GDPR-compliant, thanks to the use of [Ga4GH encryption standard](https://www.ga4gh.org/news/crypt4gh-a-secure-method-for-sharing-human-genetic-data/) and continuously kept up-to-date. National repositories working as Federated EGA nodes are available in some countries like Sweden, Norway, Finland, Germany and Spain. Those may address specific additional national legal needs, not included in European regulation. +* {% tool "ga4gh-data-security-toolkit" %} +* {% tool "iso-iec-27001" %} is an international information security standard adopted by data centres of some universities and research institutes. +* {% tool "the-european-genome-phenome-archive" %} is an international service for secure archiving and sharing of all types of personally identifiable genetic and phenotypic data resulting from biomedical studies and healthcare centres. All services are free of charge. The EGA stores the data and metadata long-term, without ending date of the service. The data is backed-up in two separate geographical locations. The storing is GDPR-compliant, thanks to the use of [Ga4GH encryption standard](https://www.ga4gh.org/news/crypt4gh-a-secure-method-for-sharing-human-genetic-data/) and continuously kept up-to-date. National repositories working as Federated EGA nodes are available in some countries like Sweden, Norway, Finland, Germany and Spain. Those may address specific additional national legal needs, not included in European regulation. ## Sharing and reusing of human data @@ -154,8 +154,9 @@ To make human data reusable for others, it must be discoverable, stored in a saf * Transferring human data has to be done in a secure way in order to avoid breaches of privacy. Encrypting of human data whilst it is being transferred provides successful protection if the data is intercepted by an external party while the transfer is being done. ### Solutions -* The [European Genome-phenome Archive (EGA)](https://ega-archive.org/) is an international service for secure archiving and sharing of all types of personally identifiable genetic and phenotypic data resulting from biomedical studies and healthcare centres. All services are free of charge. The EGA platform offers secure and European law-compliant data sharing. Data treatment is FAIR-compliant, thus data is discoverable in the EGA website and shareable with other researchers through authorisation and authentication protocols. The right to allow access to any dataset belongs to the Data controllers (and not to the EGA), who are responsible to sign a Data Access Agreement (DAA) with researchers requesting access to their data. Templates of the legal documents are provided. The EGA hosts data from all around the world and distributes it where and when the data controllers permit. -* [dbGAP](https://www.ncbi.nlm.nih.gov/gap/) and [JGA](https://www.ddbj.nig.ac.jp/jga/index-e.html) are other international data repositories, based in the USA and Japan respectively, that adopt a controlled-access model based on their national regulations. Due to European GDPR specific requirements, it may not be possible to deposit EU subjects’ data to these repositories. -* The [GA4GH Beacon](https://beacon-project.io) project is a GA4GH initiative that enables genomic and clinical data sharing across federated networks. A Beacon is defined as a web-accessible service that can be queried for information about a specific allele with no reference to a specific sample or patient, thereby reducing privacy risks. -* The [GA4GH Data Use Ontology DUO](https://github.com/EBISPOT/DUO) is an international standard, which provides codes to represent data use restrictions for controlled access datasets. -* [Crypt4gh](https://crypt4gh.readthedocs.io/en/latest/) is a Python tool to encrypt, decrypt or re-encrypt files, according to the GA4GH encryption file format. +* The {% tool "the-european-genome-phenome-archive" %} is an international service for secure archiving and sharing of all types of personally identifiable genetic and phenotypic data resulting from biomedical studies and healthcare centres. All services are free of charge. The EGA platform offers secure and European law-compliant data sharing. Data treatment is FAIR-compliant, thus data is discoverable in the EGA website and shareable with other researchers through authorisation and authentication protocols. The right to allow access to any dataset belongs to the Data controllers (and not to the EGA), who are responsible to sign a Data Access Agreement (DAA) with researchers requesting access to their data. Templates of the legal documents are provided. The EGA hosts data from all around the world and distributes it where and when the data controllers permit. +* {% tool "dbgap" %} and {% tool "jga" %} are other international data repositories, based in the USA and Japan respectively, that adopt a controlled-access model based on their national regulations. Due to European GDPR specific requirements, it may not be possible to deposit EU subjects’ data to these repositories. +* The {% tool "beacon" %} project is a GA4GH initiative that enables genomic and clinical data sharing across federated networks. A Beacon is defined as a web-accessible service that can be queried for information about a specific allele with no reference to a specific sample or patient, thereby reducing privacy risks. +* The {% tool "data-use-ontology" %} is an international standard, which provides codes to represent data use restrictions for controlled access datasets. +* {% tool "crypt4gh" %} is a Python tool to encrypt, decrypt or re-encrypt files, according to the GA4GH encryption file format. +* {% tool "humanmine" %} is an integrative database of *Homo sapiens* genomic data, that integrates many types of human data and provides a powerful query engine, export for results, analysis for lists of data and FAIR access via web services. diff --git a/pages/your_domain/intrinsically_disordered_proteins.md b/pages/your_domain/intrinsically_disordered_proteins.md index 4310845d9..95ff80e2a 100644 --- a/pages/your_domain/intrinsically_disordered_proteins.md +++ b/pages/your_domain/intrinsically_disordered_proteins.md @@ -26,13 +26,13 @@ You can split the experimental process in several steps: * How should you publish IDP data to a wider audience? ### Solutions -* The IDP community developed a [MIADE](http://www.psidev.info/intrinsically-disordered-proteins-workgroup) standard under a PSI-ID workgroup. The standard specifies the minimum information required to comprehend the result of a disorder experiment. +* The IDP community developed a {% tool "miade" %} standard under a PSI-ID workgroup. The standard specifies the minimum information required to comprehend the result of a disorder experiment. The standard is available in XML and TAB format. You can check example annotation in [XML](https://github.com/normandavey/HUPO-PSI-ID/blob/master/HUPO-PSI-ID_XML_format_compact_NFAT_example.xml) and [TAB](https://github.com/normandavey/HUPO-PSI-ID/blob/master/HUPO-PSI-ID_TAB_format.xlsx) format and adapt it to your data. -* The IDP community developed the Intrinsically Disordered Proteins Ontology ([IDPO](https://disprot.org/ontology)). The ontology is an agreed consensus of terms used in the community, organised in a structured way. +* The IDP community developed the Intrinsically Disordered Proteins Ontology ({% tool "idpo" %}). The ontology is an agreed consensus of terms used in the community, organised in a structured way. The ontology is available in [OWL](https://disprot.org/assets/data/idpontology_disprot_8_v0.1.0.owl) and [OBO](https://disprot.org/assets/data/idpontology_disprot_8_v0.1.0.obo) format. -* You should deposit primary data into relevant community databases ([BMRB](http://www.bmrb.wisc.edu/), [PCDDB](https://pcddb.cryst.bbk.ac.uk/), [SASBDB](https://www.sasbdb.org/)). You should deposit literature data to the manually curated database [DisProt](https://disprot.org/). DisProt is built on MIADE standard and IDPO ontology. As such, DisProt requires curators to annotate all new data according to community standards. IDP data from primary databases, together with curated experimental annotations and software predictions, is integrated in the comprehensive [MobiDB](https://mobidb.org/) database. DisProt and MobiDB add and expose [Bioschemas](https://bioschemas.org/) markup to all data records increasing data findability and interoperability. +* You should deposit primary data into relevant community databases ({% tool "bmrb" %}, {% tool "pcddb" %}, {% tool "sasbdb" %}). You should deposit literature data to the manually curated database {% tool "disprot" %}. DisProt is built on MIADE standard and IDPO ontology. As such, DisProt requires curators to annotate all new data according to community standards. IDP data from primary databases, together with curated experimental annotations and software predictions, is integrated in the comprehensive {% tool "mobidb" %} database. DisProt and MobiDB add and expose {% tool "bioschemas" %} markup to all data records increasing data findability and interoperability. ## Issues annotating or describing an IDP related term or study @@ -45,9 +45,9 @@ Most common issues that you as a researcher can encounter during the mapping pro * how to deal with missing terms in IDPO? ### Solutions -* In order to uniquely identify the protein under study, you should identify the protein on [UniProt](https://www.uniprot.org/) reference protein database. The protein identifier must be complemented with an isoform identifier (if needed) in order to completely match the experimental protein sequence. +* In order to uniquely identify the protein under study, you should identify the protein on {% tool "uniprot" %} reference protein database. The protein identifier must be complemented with an isoform identifier (if needed) in order to completely match the experimental protein sequence. - Use the [SIFTS](https://www.ebi.ac.uk/pdbe/docs/sifts/) database to precisely map the experimental protein fragment (deposited at [PDB](https://www.ebi.ac.uk/pdbe/)) to a reference protein database ([UniProt](https://www.uniprot.org/)) at an amino acid level. + Use the {% tool "sifts" %} database to precisely map the experimental protein fragment (deposited at {% tool "pdb" %}) to a reference protein database ({% tool "uniprot" %}) at an amino acid level. * Experimental evidence from literature must be mapped to relevant IDPO terms. If no suitable term could be found in IDPO, try with following resources: * [Evidence & Conclusion Ontology (ECO)](https://www.ebi.ac.uk/ols/ontologies/eco) for experimental methods * [Molecular Interactions Controlled Vocabulary](https://www.ebi.ac.uk/ols/ontologies/mi) for molecular interactions diff --git a/pages/your_domain/marine_metagenomics.md b/pages/your_domain/marine_metagenomics.md index 4c882678d..69082bf1e 100644 --- a/pages/your_domain/marine_metagenomics.md +++ b/pages/your_domain/marine_metagenomics.md @@ -28,7 +28,7 @@ Moreover, in marine metagenomics, it is also necessary to characterize the marin - As a starting point to get acquainted with the intricacies of reporting marine metagenomics experiments, the following publications are recommended reading: - [The metagenomic data life-cycle: standards and best practices](https://doi.org/10.1093/gigascience/gix047) which describes the metagenomics data life-cycle in detail. - [Marine microbial biodiversity, bioinformatics and biotechnology (M2B3) data reporting and service standards](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4511511/), guided by marine microbial research, and providing clear examples and colour-coded illustrations. -- Metadata standards that apply to marine metagenomics data are the [Genome Standards Consortium](https://gensc.org/) family of minimum information standards, including the core standard [Minimum Information about any (x) Sequence (MIxS)](https://genomicsstandardsconsortium.github.io/mixs/), the derived [Minimum Information about (Meta)genome Sequence (MIGS/MIMS)](https://www.gensc.org/pages/projects/mixs-gsc-project.html), and the also derived [Minimum Information About a Metagenome-Assembled Genome (MIMAG)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6436528/) that is presently only available as a scientific publication. +- Metadata standards that apply to marine metagenomics data are the {% tool "genomic-standards-consortium" %} family of minimum information standards, including the core standard {% tool "mixs" %}, the derived {% tool "migs-mims" %}, and the also derived [Minimum Information About a Metagenome-Assembled Genome (MIMAG)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6436528/) that is presently only available as a scientific publication. ## Tools and resources for analyzing metagenomics datasets diff --git a/pages/your_domain/microbial_biotechnology.md b/pages/your_domain/microbial_biotechnology.md index 789c3647d..c655e11be 100644 --- a/pages/your_domain/microbial_biotechnology.md +++ b/pages/your_domain/microbial_biotechnology.md @@ -48,7 +48,7 @@ Ultimately, the ideal scenario is that data is captured in a standard format and Due to the interdisciplinary nature of the field, data arising from studies in microbial biotechnology relate to both computational studies, such as modelling and simulation, and the results of wet-lab based studies used for the construction and experimental characterisation of microbial systems. Given the breadth, scope and rapid development of the field of microbial biotechnology, this guide is by no means exhaustive. -This guide is by no means comprehensive. Please get in touch with further suggestions for relevant standards and data sharing tools that can make it more complete. Sites such as [Fairsharing](https://fairsharing.org/) can provide a wealth of information about standards that may be appropriate for a given data type and not mentioned in this brief guide. +This guide is by no means comprehensive. Please get in touch with further suggestions for relevant standards and data sharing tools that can make it more complete. Sites such as {% tool "fairsharing" %} can provide a wealth of information about standards that may be appropriate for a given data type and not mentioned in this brief guide. ## Design: Biological hosts - metadata, ontologies and (meta)data publication @@ -69,20 +69,20 @@ It is recommended to publish and share information about biological hosts in ded ### Solutions #### Metadata schemas and ontologies -* Current data standards to capture the taxonomic and phenotypic data are still evolving, with notable work on the [Access to Biological Collection Data Schema (ABCD)](https://www.tdwg.org/standards/abcd/) and the activities of the [Biodiversity Information Standards task force (TDWG)](https://www.tdwg.org/). The Darwin Core standard from the [(TDWG)](https://www.tdwg.org/) is an appropriate standard to provide metadata about the taxonomic properties of a particular microorganism. -* The [NCBI taxonomy homepage](https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/) can also provide appropriate taxon IDs for recording taxonomic information. -* Information about proposed standardised nomenclature for prokaryotes can be found at the [List of Prokaryotic names with Standing in Nomenclature (LPSN)](https://lpsn.dsmz.de/) ([Parte et al., 2020](https://pubmed.ncbi.nlm.nih.gov/32701423/)). +* Current data standards to capture the taxonomic and phenotypic data are still evolving, with notable work on the {% tool "access-to-biological-collection-data-schema" %} and the activities of the {% tool "biodiversity-information-standards" %}. The Darwin Core standard from the {% tool "biodiversity-information-standards" %} is an appropriate standard to provide metadata about the taxonomic properties of a particular microorganism. +* The {% tool "ncbi-taxonomy" %}homepage can also provide appropriate taxon IDs for recording taxonomic information. +* Information about proposed standardised nomenclature for prokaryotes can be found at the {% tool "list-of-prokaryotic-names-with-standing-in-nomenclature" %} ([Parte et al., 2020](https://pubmed.ncbi.nlm.nih.gov/32701423/)). * Data standards for recording the information about where a microorganism was isolated from do exist and this topic is covered in other RDMkit pages such as the [marine metagenomics](marine_metagenomics) domain. Information can also be found in a publication by Ten Hoopen and colleagues ([Ten Hoopen et al., 2015](https://pubmed.ncbi.nlm.nih.gov/26203332/)). -* [The Environment Ontology](https://sites.google.com/site/environmentontology/) is also relevant here to describe environmental entities of all kinds, from microscopic to intergalactic scales. +* {% tool "the-environment-ontology" %} is also relevant here to describe environmental entities of all kinds, from microscopic to intergalactic scales. * A set of genetic nomenclature standards have been established by microbiologists and have been used for many years. These are still a useful way of communicating data about the genotype of a strain ([Maloy and Hughes, 2007](https://pubmed.ncbi.nlm.nih.gov/17352909/)). * Minimal information standards have been established to specify this metadata, such as the MIGS standard ([Field et al., 2008](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2409278/)). #### (Meta)data publication and sharing -* For sharing host information, you can use databases such as the [Bacterial Diversity Metadatabase (Bacdive)](https://bacdive.dsmz.de). You can also deposit strains and associated information in a strain repository such as the [National Collection of Industrial, Food and Marine Bacteria (NCIMB)](https://www.ncimb.com/culture-collection/) or the [American Type Culture Collection (ATCC)](https://www.lgcstandards-atcc.org/?geo_country=gb). There are also many organisations established for individual species of microorganisms, the [Bacillus Genetic Stock Centre (BGSC)](http://www.bgsc.org/) being one example. -* Databases such as [CellRepo](https://cellrepo.herokuapp.com/) allow strains that have been barcoded to be tracked using a version control type system ([Tellechea-Luzardo et al., 2020](https://pubmed.ncbi.nlm.nih.gov/32078768/)). -* Genomic information can be captured at the nucleotide level using the well-known [European Nucleotide Archive standard (ENA)](https://www.ebi.ac.uk/ena/browser/home) and submitted to the ENA database to allow the information to be shared. -* The database collection from the [International Nucleotide Sequence Database Collaboration](http://www.insdc.org/) provides an umbrella for gathering and sharing a variety of sequence data from different sequence databases internationally. -* Other databases such as [GenBank](https://www.ncbi.nlm.nih.gov/genbank/) and the [DNA Data Bank of Japan (DDBJ)](https://www.ddbj.nig.ac.jp/index-e.html) also cater for sequence information. +* For sharing host information, you can use databases such as the {% tool "bacdive" %}. You can also deposit strains and associated information in a strain repository such as the {% tool "ncimb" %} or the {% tool "atcc" %}. There are also many organisations established for individual species of microorganisms, the {% tool "bacillus-genetic-stock-center" %} being one example. +* Databases such as {% tool "cellrepo" %} allow strains that have been barcoded to be tracked using a version control type system ([Tellechea-Luzardo et al., 2020](https://pubmed.ncbi.nlm.nih.gov/32078768/)). +* Genomic information can be captured at the nucleotide level using the well-known {% tool "european-nucleotide-archive" %} and submitted to the ENA database to allow the information to be shared. +* The database collection from the {% tool "international-nucleotide-sequence-database-collaboration" %} provides an umbrella for gathering and sharing a variety of sequence data from different sequence databases internationally. +* Other databases such as {% tool "genbank" %} and the {% tool "dna-data-bank-of-japan" %} also cater for sequence information. ## Design: Synthetic parts - existing data, metadata collection and publication @@ -100,30 +100,30 @@ Appropriate and detailed description of the synthetic parts design is critical f #### Existing data * Sequences are characterised as parts which can be found with the assistance of various repositories such as: - * [iGEM Parts Registry](http://parts.igem.org/Main_Page) - * [The Joint BioEnergy Institute's Inventory of Composable Elements (JBEI-ICE)](https://ice.jbei.org) ([Ham et al., 2012](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3467034/)) - * [SynBioHub](https://synbiohub.org) -* Sequences can be isolated from standard genetic databases such as [ENA](https://www.ebi.ac.uk/ena/browser/home) and [GenBank](https://www.ncbi.nlm.nih.gov/genbank/). + * {% tool "igem-parts-registry" %} + * {% tool "jbei-ice" %} ([Ham et al., 2012](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3467034/)) + * {% tool "synbiohub" %} +* Sequences can be isolated from standard genetic databases such as {% tool "european-nucleotide-archive" %} and {% tool "genbank" %}. #### Tools for metadata collection -* You can manage the design stage using genetic computer aided design tools, such as [Benchling](https://benchling.com) for example, where information can be shared within small teams. [Benchling](https://benchling.com) supports a number of different data standards including FASTA, GenBank and SBOL1. +* You can manage the design stage using genetic computer aided design tools, such as {% tool "benchling" %} for example, where information can be shared within small teams. {% tool "benchling" %} supports a number of different data standards including FASTA, GenBank and SBOL1. * Sometimes FASTA will be the most relevant format, for example when sending for DNA synthesis. * Formats like GenBank, DICOM-SB ([Sainz de Murieta, Bultelle and Kitney, 2016](https://pubmed.ncbi.nlm.nih.gov/26854090/)) or SBOL may be more applicable for instances where more information, such as functional annotation, would be useful to be shared. * SBOL 2.0 and higher allows more than just the genetics of a system to be captured and shared. Using SBOL allows interactions between components in the design to be specified, information about RNA and proteins can be included and the provenance of a design can also be captured. Experimental information relating to the test and build of a system can also be captured and shared. -* SBOL data can be made using tools such as [Benchling](https://benchling.com) (SBOL1 only), [SBOL Designer](https://sboldesigner.github.io/) ([Zhang et al., 2017](https://pubmed.ncbi.nlm.nih.gov/28441476/)) and [ShortBOL](http://shortbol.org/) to name but a few. A more comprehensive list of SBOL tools can be found on the [sbolstandard](https://sbolstandard.org/) website. +* SBOL data can be made using tools such as {% tool "benchling" %} (SBOL1 only), {% tool "sboldesigner" %} ([Zhang et al., 2017](https://pubmed.ncbi.nlm.nih.gov/28441476/)) and {% tool "shortbol" %} to name but a few. A more comprehensive list of SBOL tools can be found on the {% tool "synthetic-biology-open-language" %} website. * More generally, the [Investigation/Study/Assay (ISA)](https://isa-specs.readthedocs.io/) model can be used in systems biology, life sciences, environmental and biomedical domains to structure research outputs. The [ISA-Tab](https://isa-specs.readthedocs.io/en/latest/isatab.html) format provides a framework for capturing these data in CSV files. -* [Rightfield](https://rightfield.org.uk/download.html) provides a mechanism for capturing metadata using easy to use spreadsheets. +* {% tool "rightfield" %} provides a mechanism for capturing metadata using easy to use spreadsheets. #### (Meta)data publication and sharing * Once the design is complete, you can share this information via a repository such as: - * [iGEM Parts Registry](http://parts.igem.org/Main_Page) - * [SynBioHub](https://synbiohub.org) - * [JBEI-ICE](https://ice.jbei.org) - * [Addgene](https://www.addgene.org) + * {% tool "igem-parts-registry" %} + * {% tool "synbiohub" %} + * {% tool "jbei-ice" %} + * {% tool "addgene" %} * Much information about its performance can be included, varying from experimental results such as fluorescence curves to predicted performance based on modelling. * It would be recommended to use standard figures that can be easily understood. - * [SBOL-Visual](https://sbolstandard.org/visual-glyphs/) is a good example of a graphical standard; it utilises standard shapes to represent different genetic parts which can help clarify a complex synthetic construct. [SBOL-Visual](https://sbolstandard.org/visual-glyphs/) can be crafted using tools such as [VISBOL](http://visbol.org/). -* Platforms such as [FAIRDOM-SEEK](https://fair-dom.org/platform/seek/), built on technologies such as ISA, support a large range of systems and synthetic biology projects. [FAIRDOM-SEEK](https://fair-dom.org/platform/seek/) provides a web-based resource for sharing scientific research datasets, models or simulations, and processes. [FAIRDOM-SEEK](https://fair-dom.org/platform/seek/) can be installed locally or [FAIRDOMHub](https://fairdomhub.org/), a version of [FAIRDOM-SEEK](https://fair-dom.org/platform/seek/) is available for general community use. + * {% tool "sbol-visual" %} is a good example of a graphical standard; it utilises standard shapes to represent different genetic parts which can help clarify a complex synthetic construct. {% tool "sbol-visual" %} can be crafted using tools such as {% tool "visbol" %}. +* Platforms such as {% tool "fairdom-seek" %}, built on technologies such as ISA, support a large range of systems and synthetic biology projects. {% tool "fairdom-seek" %} provides a web-based resource for sharing scientific research datasets, models or simulations, and processes. {% tool "fairdom-seek" %} can be installed locally or {% tool "fairdomhub" %}, a version of {% tool "fairdom-seek" %} is available for general community use. ## Design: Metabolomic pathways and enzymes - metadata, ontologies and (meta)data publication @@ -137,13 +137,13 @@ Here we describe some of the available options to accurately represent and store ### Solutions #### Metadata and ontologies * SBOL allows information about the enzymes and the metabolic pathways to be captured in the design document and so this is a viable approach for sharing more than just the genetics of the system. -* Enzymes can be assigned EC numbers, according to the guidance from the [International Union of Biochemistry and Molecular Biology (IUBMB)](https://www.qmul.ac.uk/sbcs/iubmb/), to indicate their function and an entry made in the [BRaunschweig ENzyme DAtabase](https://www.brenda-enzymes.org/) (BRENDA). -* More generally, the [IUPAC-IUBMB Joint Commission on Biochemical Nomenclature (JCBN)](https://www.qmul.ac.uk/sbcs/iupac/jcbn/) encourages the communication of biochemical information using generally understood terminology. +* Enzymes can be assigned EC numbers, according to the guidance from the {% tool "iupac-iubmb-joint-commission-on-biochemical-nomenclature" %}, to indicate their function and an entry made in the {% tool "brenda" %} (BRENDA). +* More generally, the {% tool "iupac-iubmb-joint-commission-on-biochemical-nomenclature" %} encourages the communication of biochemical information using generally understood terminology. #### (Meta)data publication -* Databases such as SBOLME ([Kuwahara et al., 2017](https://pubmed.ncbi.nlm.nih.gov/28076956/)) or [SynBioHub](https://synbiohub.org) can be used to share the data. -* Metabolite information can also be submitted to, or referred to in, [ChEBI](https://www.ebi.ac.uk/chebi/). -* [BRaunschweig ENzyme DAtabase](https://www.brenda-enzymes.org/) (BRENDA). +* Databases such as SBOLME ([Kuwahara et al., 2017](https://pubmed.ncbi.nlm.nih.gov/28076956/)) or {% tool "synbiohub" %} can be used to share the data. +* Metabolite information can also be submitted to, or referred to in, {% tool "chebi" %}. +* {% tool "brenda" %} (BRENDA). ## Design: mathematical model - standards and (meta)data publication @@ -158,10 +158,10 @@ How can the models be shared via repositories and made available in a way that * It is important to associate the genetic design with its corresponding model. ### Solutions -* [Systems Biology Markup Language (SBML)](https://sbml.org) is a popular standardised format for sharing mathematical models for which a variety of tools are available for model building. -* More generally, the [COmputational Modeling in BIology NEtwork (COMBINE)](http://co.mbine.org/), provides a platform for coordinating standardisation of models in biology. +* {% tool "systems-biology-markup-language" %} is a popular standardised format for sharing mathematical models for which a variety of tools are available for model building. +* More generally, the {% tool "computational-modeling-in-biology-network" %}, provides a platform for coordinating standardisation of models in biology. * SBOL can also be used to associate a genetic design with its corresponding model. -* Models can be shared in model repositories such as [biomodels](https://www.ebi.ac.uk/biomodels/). +* Models can be shared in model repositories such as {% tool "biomodels" %}. ## Build: methods - documentation and (meta)data publication @@ -183,11 +183,11 @@ The current method of sharing information about the building of microbial system The metadata standards for a build exercise are still to be defined and so at the discretion of the data manager. * SBOL versions 2.0 and above provides a data standard that allows build data that has been grouped to be associated with design data for a part, device or system along with a minimal amount of metadata. -* Similarly, [research object bundles](https://www.researchobject.org/), and more recently [RO-Crates](https://www.researchobject.org/ro-crate/), can be used to gather together build data and test data with information about the overall study. +* Similarly, [research object bundles](https://www.researchobject.org/), and more recently {% tool "research-object-crate" %}, can be used to gather together build data and test data with information about the overall study. #### (Meta)data publication and sharing * The design information about the vector DNA or RNA sequence should be shared via public databases such as ENA or Genbank. -* Various DNA synthesis companies build DNA from a computer specification of the sequence and also a variety of experimental approaches for assembling DNA molecules. This information can be shared as free text attached to a design in SBOL format and uploaded to a repository that supports SBOL2 format and above such as [SynBioHub](https://synbiohub.org). +* Various DNA synthesis companies build DNA from a computer specification of the sequence and also a variety of experimental approaches for assembling DNA molecules. This information can be shared as free text attached to a design in SBOL format and uploaded to a repository that supports SBOL2 format and above such as {% tool "synbiohub" %}. * Once grouped together in a free form the data can be archived along with the metadata, collecting the data together in an archived form using a file compression format. The [combine archive format](http://co.mbine.org/specifications/omex.version-1) may also be useful. @@ -211,29 +211,29 @@ The data arising from assays for product development is highly variable and beyo #### Metadata standards * **Minimum Information Standard for Engineered Organism Experiments (MIEO).** Minimal information necessary to record the growth of an organism in culture, has been described by Hect and colleagues ([Hecht et al., 2018](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6283831/)). -* **Enzyme.** If your product is a protein such as an enzyme then some standards developed by the [Standards for Reporting Enzyme Data (STRENDA) Consortium](https://www.beilstein-institut.de/en/projects/strenda/) may be helpful ([‘Standards for Reporting Enzyme Data: The STRENDA Consortium: What it aims to do and why it should be helpful’, 2014](https://www.sciencedirect.com/science/article/pii/S2213020914000135)). +* **Enzyme.** If your product is a protein such as an enzyme then some standards developed by the {% tool "standards-for-reporting-enzyme-data" %} may be helpful ([‘Standards for Reporting Enzyme Data: The STRENDA Consortium: What it aims to do and why it should be helpful’, 2014](https://www.sciencedirect.com/science/article/pii/S2213020914000135)). -* **Microscopy.** Microscopy is often also used to characterise the behaviour of engineered microorganisms. Standards such as the [Open Microscopy Environment Ontology](https://fairsharing.org/bsg-s001430/) and the [Cellular Microscopy Phenotype Ontology (CMPO)](https://www.ebi.ac.uk/cmpo/) can help provide standardised metadata terms. +* **Microscopy.** Microscopy is often also used to characterise the behaviour of engineered microorganisms. Standards such as the [Open Microscopy Environment Ontology](https://fairsharing.org/bsg-s001430/) and the {% tool "cellular-microscopy-phenotype-ontology" %} can help provide standardised metadata terms. -* **Flow Cytometry data.** The [International Society for the Advancement of Cytometry (ISAC)](https://isac-net.org/page/Data-Standards) provides information on a variety of appropriate data standards for capturing Flow Cytometry data (used to characterise microbial populations at a single cell level) ([Spidlen et al., 2021](https://pubmed.ncbi.nlm.nih.gov/32881398/)). +* **Flow Cytometry data.** The {% tool "international-society-for-the-advancement-of-cytometry" %} provides information on a variety of appropriate data standards for capturing Flow Cytometry data (used to characterise microbial populations at a single cell level) ([Spidlen et al., 2021](https://pubmed.ncbi.nlm.nih.gov/32881398/)). -* **Nucleic acids information.** The [ENA](https://www.ebi.ac.uk/ena/browser/home), amongst others, provides guidance on the metadata for RNAseq datasets. +* **Nucleic acids information.** The {% tool "european-nucleotide-archive" %}, amongst others, provides guidance on the metadata for RNAseq datasets. -* **Proteomics.** [HUPO proteomics standards initiative](https://www.hupo.org/Proteomics-Standards-Initiative) provides a range of guidance for capturing and sharing proteomics data. +* **Proteomics.** {% tool "proteomics-standards-initiative" %} provides a range of guidance for capturing and sharing proteomics data. #### (Meta)data publication and sharing * **Protocols.** Protocols used for testing can be shared using platforms such as: - * [protocols.io](https://www.protocols.io/). + * {% tool "protocols-io" %}. * [iGEM engineering hub](https://2021.igem.org/Engineering/Introduction), which also provides some guidance for a variety of data capture protocols and standardised units. -* **Images.** Images can be shared with the community by repositories such as the [Image Data Resource (IDR)](https://idr.openmicroscopy.org/). +* **Images.** Images can be shared with the community by repositories such as the {% tool "image-data-resource" %}. * **Nucleic acids information.** Information about nucleic acids can be shared via - * [ENA](https://www.ebi.ac.uk/ena/browser/home) - * [GEO](https://www.ncbi.nlm.nih.gov/geo/) - * [ArrayExpress](https://www.ebi.ac.uk/arrayexpress/) -* **Proteomics.** Proteomics data can be shared via [HUPO proteomics standards initiative](https://www.hupo.org/Proteomics-Standards-Initiative). -* **Metabolic studies.** Metabolomic studies can be shared through the [Metabolome Exchange Database](http://www.metabolomexchange.org/site/), which provides a resource for sharing data from metabolic studies and guidance for the submission of metabolome data. -* **Biological sources.** Information about biological sources can be shared via the [BioStudies database](https://www.ebi.ac.uk/biostudies/), which has been set up to capture and share information about multi-omics and other biological studies ([Sarkans et al., 2018](https://pubmed.ncbi.nlm.nih.gov/29069414/)). + * {% tool "european-nucleotide-archive" %} + * {% tool "gene-expression-omnibus" %} + * {% tool "arrayexpress" %} +* **Proteomics.** Proteomics data can be shared via {% tool "proteomics-standards-initiative" %}. +* **Metabolic studies.** Metabolomic studies can be shared through the {% tool "metabolomexchange" %}, which provides a resource for sharing data from metabolic studies and guidance for the submission of metabolome data. +* **Biological sources.** Information about biological sources can be shared via the {% tool "biostudies" %} database, which has been set up to capture and share information about multi-omics and other biological studies ([Sarkans et al., 2018](https://pubmed.ncbi.nlm.nih.gov/29069414/)). ## Bibliography diff --git a/pages/your_domain/plant_sciences.md b/pages/your_domain/plant_sciences.md index 6b134a7e5..0fa0ce4b3 100644 --- a/pages/your_domain/plant_sciences.md +++ b/pages/your_domain/plant_sciences.md @@ -28,7 +28,7 @@ Another particularity of this domain is the absence of central deposition databa ## Plant biological materials: (meta)data collection and sharing ### Description -Plant genetic studies such as genomic-based prediction of phenotypes requires the integration of genomic and phenotypic data with data about their environment. While phenotypic and environmental data are typically stored together in phenotyping databases, genomic and other types of molecular data are typically deposited in international deposition databases, for example, those of the [International Nucleotide Sequence Database Collaboration INSDC global consortium](http://www.insdc.org/). +Plant genetic studies such as genomic-based prediction of phenotypes requires the integration of genomic and phenotypic data with data about their environment. While phenotypic and environmental data are typically stored together in phenotyping databases, genomic and other types of molecular data are typically deposited in international deposition databases, for example, those of the {% tool "international-nucleotide-sequence-database-collaboration" %}. It can be challenging to integrate phenotypic and molecular data even within a single project, particularly if the project involves studying a panel of genetic resources in different conditions. It is paramount to maintain the link between the plant material in the field, the samples extracted from them (e.g. at different development stages), and the results of omics experiments (e.g. transcriptomics, metabolomics) performed on those samples, across all datasets that will be generated and published. @@ -50,28 +50,28 @@ Integrating phenotyping and molecular data, both within and between studies, hin * Detailed metadata needs to be captured on the biological materials used in the study—the accession in the genebank or the experimental identification and, when applicable, the seed lots or the parent plants as well as the possible samples taken from the plant—as they are the key to integrating omics and phenotyping datasets. #### Checklists and metadata standard -* The identification and description of plant materials should comply with the standard for the identification of plant genetic resources, The [Multi-Crop Passport Descriptors](https://www.bioversityinternational.org/e-library/publications/detail/faobioversity-multi-crop-passport-descriptors-v21-mcpd-v21/)(MCPD). +* The identification and description of plant materials should comply with the standard for the identification of plant genetic resources, The {% tool "multi-crop-passport-descriptor" %}. * If you are studying experimental plant materials that cannot be traced to an existing genebank or germplasm database, you should describe them in accordance with the MCPD in as much detail as possible. * If your plant materials can be traced to an existing genebank or germplasm database, you need only to cross reference to the MCPD information already published in the genebank or germplasm database. * The minimal fields from MCPD are listed in the Biological Material section of the Minimum Information About Plant Phenotyping Experiments (MIAPPE) metadata standard. * For wild plants and accessions from tree collections, precise identification often requires the GPS coordinates of the tree. MIAPPE provides the necessary fields. #### Tools for (meta)data collection -* For identifying your plant material in a plant genetic resource repository (genebank or germplasm database), you can consult the [European Cooperative Programme for Plant Genetic Resources](https://www.ecpgr.cgiar.org/) (ECPGR), which includes a [central germplasm database](https://www.ecpgr.cgiar.org/resources/germplasm-databases/ecpgr-central-crop-databases) and a catalogue of relevant [external databases](https://www.ecpgr.cgiar.org/resources/germplasm-databases/international-multicrop-databases). +* For identifying your plant material in a plant genetic resource repository (genebank or germplasm database), you can consult the European Cooperative Programme for Plant Genetic Resources {% tool "ecpgr" %}, which includes a {% tool "ecpgr-central-crop-databases" %} and a catalogue of relevant {% tool "international-multicrop-databases" %}. * Other key databases for identifying plant material are - * the [European Search Catalogue for Plant Genetic Resources](https://eurisco.ipk-gatersleben.de/) (EURISCO), which provides information about more than 2 million accessions of crop plants and their wild relatives, from hundreds of European institutes in 43 member countries - * [Genesys](https://www.genesys-pgr.org/), an online platform with a search engine for Plant Genetic Resources for Food and Agriculture (PGRFA) conserved in genebanks worldwide. -* The “Biological Material” section of the [MIAPPE](https://github.com/MIAPPE/MIAPPE/tree/master/MIAPPE_Checklist-Data-Model-v1.1) checklist deals with sample description. + * the European Search Catalogue for Plant Genetic Resources {% tool "eurisco" %}, which provides information about more than 2 million accessions of crop plants and their wild relatives, from hundreds of European institutes in 43 member countries. + * {% tool "genesys" %}, an online platform with a search engine for Plant Genetic Resources for Food and Agriculture (PGRFA) conserved in genebanks worldwide. +* The “Biological Material” section of the {% tool "miappe-checklist-data-model" %} checklist deals with sample description. #### (Meta)Data sharing and publication -* For identifying samples from which molecular data was produced, the [BioSamples](https://www.ebi.ac.uk/biosamples/) database is recommended as a provider of international unique identifiers. - * The [plant-miappe.json](https://www.ebi.ac.uk/biosamples/schemas/certification/plant-miappe.json) model provided by BioSample is aligned with all recommendations provided above for plant identification and is therefore recommended for your sample submission. +* For identifying samples from which molecular data was produced, the {% tool "biosamples" %} database is recommended as a provider of international unique identifiers. + * The {% tool "plant-miappe-json" %} model provided by BioSamples is aligned with all recommendations provided above for plant identification and is therefore recommended for your sample submission. * It is also recommended that you provide permanent access to a description of the project or study, that contains links to all the data, molecular or phenotypic. Several databases are recommended for this purpose including: - * [Recherche Data Gouv](https://recherche.data.gouv.fr/) - * [e!DAL](https://edal.ipk-gatersleben.de/) - * [Zenodo](https://zenodo.org/) - * [Biostudies](https://www.ebi.ac.uk/biostudies/) - * [FAIRDOMHub](https://fairdomhub.org/) + * {% tool "recherche-data-gouv" %} + * {% tool "e-dal" %} + * {% tool "zenodo" %} + * {% tool "biostudies" %} + * {% tool "fairdomhub" %} ## Phenotyping: (meta)data collection and publication @@ -85,47 +85,47 @@ It is recommended that metadata collection is contemplated from the start of the ### Considerations * Did you collect the metadata for the identification of your plant material according to the recommendation provided in the [above section](#plant-biological-materials-metadata-collection-and-sharing)? * Have you documented your phenotyping and environment assays (i.e. measurement or computation methodology based on the trait, method, scale triplet) both for direct measures (data collection) and computed data (after data processing or analysis)? - * Is there an existing [Crop Ontology](https://www.cropontology.org) for the species you experiment and does it describe your assay? If not, have you described your data following the trait, method, scale triplet? -* Do you have your own system to collect data and is it compliant with the [MIAPPE](https://www.miappe.org/) standard? + * Is there an existing {% tool "crop-ontology" %} for the species you experiment and does it describe your assay? If not, have you described your data following the trait, method, scale triplet? +* Do you have your own system to collect data and is it compliant with the {% tool "miappe" %} standard? * Are you exchanging data with individual researchers? * In what media is data being collected? - * Is the data described in a [MIAPPE](https://www.miappe.org/)-compliant manner? + * Is the data described in a {% tool "miappe" %}-compliant manner? * Are you exchanging data across different data management platforms? - * Do these platforms implement the [Breeding API (BrAPI)](https://brapi.org/) specification? + * Do these platforms implement the Breeding API {% tool "brapi" %} specification? * If not, are they MIAPPE-compliant and do they enable automated data exchange? ### Solutions #### Checklists and ontologies -* The metadata standard applicable to plant phenotyping experiments is [MIAPPE](https://www.miappe.org/). - * There is a section dedicated to the identification of plant biological materials that follows [The Multi-Crop Passport Descriptors](https://www.bioversityinternational.org/e-library/publications/detail/faobioversity-multi-crop-passport-descriptors-v21-mcpd-v21/) (MCPD) described [above](#plant-biological-materials-metadata-collection-and-sharing). - * There is a section to describe the phenotyping assays based on the [Crop Ontology](https://www.cropontology.org) recommendations. +* The metadata standard applicable to plant phenotyping experiments is {% tool "miappe" %}. + * There is a section dedicated to the identification of plant biological materials that follows {% tool "multi-crop-passport-descriptor" %} described [above](#plant-biological-materials-metadata-collection-and-sharing). + * There is a section to describe the phenotyping assays based on the {% tool "crop-ontology" %} recommendations. * There is a section describing the type of experiment (greenhouse, field, etc.) and it is advisable to collect the location (geographical coordinates) and time where it was performed for linkage with geo-climatic data. * Other sections include description of investigations, studies, people involved, data files, environmental parameters, experimental factors, events, observed variables. * Tools and resources for data collection and management: - * [FAIRDOM-SEEK](https://seek4science.org/) is a free data management platform for which MIAPPE templates are in development. - * [Dataverse](https://dataverse.org/) is a free data management platform for which MIAPPE templates are in development. It is used in several repositories such as [Recherche Data Gouv](https://recherche.data.gouv.fr/). - * [e!DAL](https://edal.ipk-gatersleben.de/) is a free data management platform for which MIAPPE templates are in development. - * The [ISA-Tools](https://isa-tools.org/) also include a configuration for MIAPPE and can be used both for filling-in metadata and for validating. - * [Collaborative Open Plant Omics (COPO)](https://copo-project.org/) is a data management platform specific for the plant sciences. - * [FAIRsharing](https://fairsharing.org) is a manually curated registry of reporting guidelines, vocabularies, identifier schemes, models, formats, repositories, knowledge bases, and data policies that includes many resources relevant for managing plant phenotyping data. -* Validation of MIAPPE compliance can be done via [ISA-Tools](https://isa-tools.org/) or upon data deposition in a [Breeding API](https://brapi.org/) (BrAPI) [compliant repository](https://www.brapi.org/servers). + * {% tool "fairdom-seek" %} is a free data management platform for which MIAPPE templates are in development. + * {% tool "dataverse" %} is a free data management platform for which MIAPPE templates are in development. It is used in several repositories such as {% tool "recherche-data-gouv" %}. + * {% tool "e-dal" %} is a free data management platform for which MIAPPE templates are in development. + * The {% tool "isa-tools" %} also include a configuration for MIAPPE and can be used both for filling-in metadata and for validating. + * Collaborative Open Plant Omics {% tool "copo" %} is a data management platform specific for the plant sciences. + * {% tool "fairsharing" %} is a manually curated registry of reporting guidelines, vocabularies, identifier schemes, models, formats, repositories, knowledge bases, and data policies that includes many resources relevant for managing plant phenotyping data. +* Validation of MIAPPE compliance can be done via {% tool "isa-tools" %} or upon data deposition in a Breeding API ({% tool "brapi" %}) {% tool "brapi-compatible-server" %}. * If you or your partners collect data manually, it is critical to adopt a spreadsheet template that is compatible with the structure of the database that will be used for data deposition. - * If the database is MIAPPE compliant, you can use the [MIAPPE-compliant spreadsheet template](https://github.com/MIAPPE/MIAPPE/raw/master/MIAPPE_Checklist-Data-Model-v1.1/MIAPPE_templates/MIAPPEv1.1_training_spreadsheet.xlsx). - * This template could make use of tools for handling ontology annotations in a spreadsheet, such as [RightField](https://rightfield.org.uk/) or [OntoMaton](https://github.com/ISA-tools/OntoMaton). + * If the database is MIAPPE compliant, you can use the {% tool "miappe-compliant-spreadsheet-template" %}. + * This template could make use of tools for handling ontology annotations in a spreadsheet, such as {% tool "rightfield" %} or {% tool "onotomaton" %}. * If you or your partners collect data into data management platforms: * If it implements BrAPI, you can exchange data using BrAPI calls. * If it doesn’t implement BrAPI, the simplest solution would be to export data into the MIAPPE spreadsheet template, or another formally defined data template. -* For data deposition, it is highly recommended that you opt for one of the many [repositories that implement BrAPI](https://www.brapi.org/servers), as they enhance findability through the ELIXIR plant data discovery service, [FAIR Data-finder for Agronomic Research (FAIDARE)](https://urgi.versailles.inrae.fr/faidare/), enable machine actionable access to MIAPPE compliant data and validation of that compliance. +* For data deposition, it is highly recommended that you opt for one of the many repositories that implement {% tool "brapi-compatible-server" %}, as they enhance findability through the ELIXIR plant data discovery service, FAIR Data-finder for Agronomic Research ({% tool "faidare" %}), enable machine actionable access to MIAPPE compliant data and validation of that compliance. ## Genotyping: (meta)data collection and publication ### Description -Here are described the mandatory, recommended and optional metadata fields for data interoperability and re-use, as well as for data deposition in EVA (European Variation Archive), the EMBL-EBI's open-access genetic variation archive connected to [BioSamples](https://www.ebi.ac.uk/biosamples/), described [above](#plant-biological-materials-metadata-collection-and-sharing). +Here are described the mandatory, recommended and optional metadata fields for data interoperability and re-use, as well as for data deposition in EVA (European Variation Archive), the EMBL-EBI's open-access genetic variation archive connected to {% tool "biosamples" %}, described [above](#plant-biological-materials-metadata-collection-and-sharing). ### Considerations * Did you collect the metadata for the identification of your plant samples according to the recommendations provided in the [above section](#plant-biological-materials-metadata-collection-and-sharing)? -* Is the reference genome assembly available in an [INSDC](https://www.insdc.org/) archive and has a Genome Collections Accession number, either GCA or GCF? +* Is the reference genome assembly available in an {% tool "international-nucleotide-sequence-database-collaboration" %} archive and has a Genome Collections Accession number, either GCA or GCF? * Is the analytic approach used for creating the VCF file available in a publication and has a Digital Object Identifier (DOI)? ### Solutions diff --git a/pages/your_domain/proteomics.md b/pages/your_domain/proteomics.md index 8290abafc..6cf897210 100644 --- a/pages/your_domain/proteomics.md +++ b/pages/your_domain/proteomics.md @@ -1,7 +1,7 @@ --- title: Proteomics description: Data management solutions for proteomics data. -contributors: [Michael Turewicz, Martin Eisenacher, Anika Frericks-Zipper, Ulrike Wittig] +contributors: [Michael Turewicz, Martin Eisenacher, Anika Frericks-Zipper, Ulrike Wittig, Dirk Winkelhardt] page_id: proteomics related_pages: your_tasks: [metadata] @@ -34,13 +34,13 @@ For different proteomics experiments and different steps of the respective data ### Solutions -The Human Proteome Organisation (HUPO) Proteomics Standards Initiative ([HUPO-PSI](https://www.psidev.info/)), a proteomics community-driven organization, provides several different controlled vocabularies, standard data formats, converter and validator software tools. The most important include: +The Human Proteome Organisation (HUPO) Proteomics Standards Initiative ({% tool "proteomics-standards-initiative" %}), a proteomics community-driven organization, provides several different controlled vocabularies, standard data formats, converter and validator software tools. The most important include: - Controlled vocabularies: PSI-MS, PSI-MI, XLMOD and sepCV, which are provided as OBO files. -- The Minimum Information About a Proteomics Experiment (MIAPE) guidelines document. -- mzML - a standard format for encoding raw mass spectrometer output. -- mzIdentML - a standard exchange format for peptides and proteins identified from mass spectra. -- mzQuantML - a standard format that is intended to store the systematic description of workflows quantifying molecules (principally peptides and proteins) by mass spectrometry. -- mzTab - a tab delimited text file format to report proteomics and metabolomics results. +- The Minimum Information About a Proteomics Experiment ([MIAPE](https://psidev.info/miape)) guidelines document. +- [mzML](https://www.psidev.info/mzML) - a standard format for encoding raw mass spectrometer output. +- [mzIdentML](https://www.psidev.info/mzidentml) - a standard exchange format for peptides and proteins identified from mass spectra. +- [mzQuantML](https://psidev.info/mzquantml) - a standard format that is intended to store the systematic description of workflows quantifying molecules (principally peptides and proteins) by mass spectrometry. +- [mzTab](https://www.psidev.info/mztab) - a tab delimited text file format to report proteomics and metabolomics results. ## Processing and analysis of proteomics data @@ -52,13 +52,17 @@ For all steps within a FAIR proteomics data analysis pipeline software is needed - Can your proteomics raw data recorded by a mass spectrometer be stored as an mzML file? - Is it possible to convert your raw data to mzML? - Does your search engine support mzML and/or mzIdentML? -- Does your quantification software support mzQuantML or mzTAB? +- Does your quantification software support mzQuantML or mzTab? ### Solutions -- Within the proteomics community various converter software tools such as [msconvert](https://proteowizard.sourceforge.io/) were implemented, which support the conversion of mass spectrometer output formats to the mzML standard data format as well as other conversions to standard data formats. -- Information on software tools that support HUPO-PSI standard data formats can be found on the standard format-specific web pages of the HUPO-PSI (e.g., [mzML](https://www.psidev.info/mzML) , [mzIdentML](https://www.psidev.info/mzidentml) and [MZTAB](https://www.psidev.info/mztab) ). - +- Within the proteomics community various converter software tools such as {% tool "msconvert" %} were implemented, which support the conversion of mass spectrometer output formats to the mzML standard data format as well as other conversions to standard data formats. +- Information on software tools that support HUPO-PSI data formats can be found on the standard format-specific web pages of the HUPO-PSI (e.g., [mzML](https://www.psidev.info/mzML) , [mzIdentML](https://www.psidev.info/mzidentml) and [mzTab](https://www.psidev.info/mztab) ). The following list shows just a few tools using standard data formats as input and/or output: + * {% tool "comet" %} + * {% tool "mascot" %} + * {% tool "openms" %} + * {% tool "pia-protein-inference-algorithms" %} + * {% tool "skyline" %} ## Preserving and sharing proteomics data @@ -75,6 +79,6 @@ In order to make proteomics data and results worldwide findable and accessible f ### Solution -- You can find an appropriate data repository via the website of the [ProteomeXchange](http://www.proteomexchange.org/) Consortium. ProteomeXchange was established to provide globally coordinated standard data submission and dissemination pipelines involving the main proteomics repositories, and to encourage open data policies in the field. Currently, member repositories include PRIDE, PepideAtlasq, MassIVE, jPOST, iProx and PanoramaPublic. -- Information on data uploads can be found on [proteomexchange.org](http://www.proteomexchange.org/submission) or on the websites of the particular data repositories. E.g. PRIDE uploads are conducted via a [submission tool](https://www.ebi.ac.uk/pride/markdownpage/pridesubmissiontool). There are data repository-specific requirements. -- Advantages of data publication: fulfillment of journal requirements, higher visibility of research, free storage, worldwide accessibility, basic re-analysis by repository-associated tools +- You can find an appropriate data repository via the website of the {% tool "proteomexchange" %} Consortium. ProteomeXchange was established to provide globally coordinated standard data submission and dissemination pipelines involving the main proteomics repositories, and to encourage open data policies in the field. Currently, member repositories include {% tool "pride" %}, {% tool "peptideatlas" %}, {% tool "massive" %}, jPOST, iProx and PanoramaPublic. +- Information on data uploads can be found on [ProteomeXchange submissions](http://www.proteomexchange.org/submission) or on the websites of the particular data repositories. E.g. PRIDE uploads are conducted via the {% tool "pride-submission-tool" %}. There are data repository-specific requirements. +- Advantages of data publication: fulfillment of journal requirements, higher visibility of research, free storage, worldwide accessibility, basic re-analysis by repository-associated tools and possible integration in more specialized knowledgebases like: {% tool "human-protein-atlas" %}, {% tool "macpepdb" %}, {% tool "string" %}, {% tool "unimod" %} or {% tool "uniprot" %} diff --git a/pages/your_domain/rare_disease_data.md b/pages/your_domain/rare_disease_data.md index 8f6afe97a..361f56522 100644 --- a/pages/your_domain/rare_disease_data.md +++ b/pages/your_domain/rare_disease_data.md @@ -9,8 +9,8 @@ related_pages: --- ## Introduction -The rare disease (RD) domain brings some unique challenges when it comes to data management. Rare disease research is often scarce and scattered among many institutions in different countries – due to the, per definition, low prevalence of RDs. This makes rare diseases a prime example of a research area that can strongly profit from coordination on an international scale, including data management. RD research should be improved to overcome fragmentation, leading to efficacious use of data and resources, faster scientific progress and competitiveness, and most importantly to decrease unnecessary hardship and prolonged suffering of RD patients. Considering the introduction of omics into care practice and the structuration of RD care centers in [European Reference Networks](https://ec.europa.eu/health/european-reference-networks_en) (ERNs), data management is key to ensure data reuse and interpretation. The go-to guidelines for efficient data management are the FAIR Principles for research data management and stewardship. These principles provide guidance for making (meta)data more Findable, Accessible, Interoperable, and Reusable. -Research data on RDs can be found in patient registries, biobanks, genomics & multi-omics repositories, knowledge bases, resources (such as animal models and cell lines libraries), omics deposition & analysis platforms, and translational & clinical research supporting materials and services. This page provides an overview of what steps one should take to make data from those sources FAIR, with an emphasis on patient registries. It is written by people affiliated with the [European Joint Programme on Rare Diseases](https://www.ejprarediseases.org/) (EJP RD) and, therefore, reflects the vision of this project. Information is grouped into six topics: administrative aspects of rare disease data, creating and collecting data, processing data, interpreting data, describing data, and giving access to data. +The rare disease (RD) domain brings some unique challenges when it comes to data management. Rare disease research is often scarce and scattered among many institutions in different countries – due to the, per definition, low prevalence of RDs. This makes rare diseases a prime example of a research area that can strongly profit from coordination on an international scale, including data management. RD research should be improved to overcome fragmentation, leading to efficacious use of data and resources, faster scientific progress and competitiveness, and most importantly to decrease unnecessary hardship and prolonged suffering of RD patients. Considering the introduction of omics into care practice and the structuration of RD care centers in {% tool "european-reference-networks" %}, data management is key to ensure data reuse and interpretation. The go-to guidelines for efficient data management are the FAIR Principles for research data management and stewardship. These principles provide guidance for making (meta)data more Findable, Accessible, Interoperable, and Reusable. +Research data on RDs can be found in patient registries, biobanks, genomics & multi-omics repositories, knowledge bases, resources (such as animal models and cell lines libraries), omics deposition & analysis platforms, and translational & clinical research supporting materials and services. This page provides an overview of what steps one should take to make data from those sources FAIR, with an emphasis on patient registries. It is written by people affiliated with the {% tool "european-joint-programme-on-rare-diseases" %} and, therefore, reflects the vision of this project. Information is grouped into six topics: administrative aspects of rare disease data, creating and collecting data, processing data, interpreting data, describing data, and giving access to data. ## Teams for managing rare disease data ### Description @@ -42,18 +42,18 @@ Recommended additional roles: This section covers ways of creating and collecting data in a FAIR way focusing on how to use your electronic data capture system to make you data FAIR as you collect it. ### Considerations -Data collection for clinical research is often done through (electronic) Case Report Forms (CRFs) using an Electronic Data Capture (EDC) system. When collecting rare disease data, one should ensure that they collect the minimal set of data elements for rare disease registration. The Common Data Elements (CDEs) are a list of core data elements to be collected by rare disease registries, especially the ERN registries, to ensure a certain level of interoperability. The full list of CDEs for rare disease registries can be found [here](https://eu-rd-platform.jrc.ec.europa.eu/set-of-common-data-elements_en). +Data collection for clinical research is often done through (electronic) Case Report Forms (CRFs) using an Electronic Data Capture (EDC) system. When collecting rare disease data, one should ensure that they collect the minimal set of data elements for rare disease registration. The Common Data Elements (CDEs) are a list of core data elements to be collected by rare disease registries, especially the ERN registries, to ensure a certain level of interoperability. The full list of CDEs for rare disease registries can be found in the {% tool "set-of-common-data-elements-for-rare-diseases-registration" %}. When choosing an EDC system, it is important to check if the system is open to and supports an implementation of FAIR. Two aspects to take into consideration are, for example: -* Does the EDC system support the implementation of a [FAIR Data Point](https://www.fairdatapoint.org/)? +* Does the EDC system support the implementation of a {% tool "fair-data-point" %}? * A FAIR Data Point stores the metadata of the data that has been collected. It makes metadata findable and reusable for others on the internet and offers a uniform way for accessing the data for those who are authorized. * Does the EDC system support semantic data models? - * Mapping the eCRFs to the elements of a semantic data model helps making the data being collected interoperable and reusable. The EJP RD developed and published a [semantic data model based on the CDEs for rare disease registries](https://github.com/ejp-rd-vp/CDE-semantic-model). + * Mapping the eCRFs to the elements of a semantic data model helps making the data being collected interoperable and reusable. The EJP RD developed and published a {% tool "semantic-data-model-of-the-set-of-common-data-elements-for-rare-diseases-registration" %}. ### Solutions -* [Common Data Elements for rare disease registries](https://eu-rd-platform.jrc.ec.europa.eu/set-of-common-data-elements_en) -* [Semantic data model EJP RD](https://github.com/ejp-rd-vp/CDE-semantic-model) +* {% tool "set-of-common-data-elements-for-rare-diseases-registration" %} +* {% tool "semantic-data-model-of-the-set-of-common-data-elements-for-rare-diseases-registration" %} ## Processing rare disease data @@ -61,11 +61,11 @@ When choosing an EDC system, it is important to check if the system is open to a This section covers the processing of data as it is being collected. It covers the different pseudonymisation tools that could be used for registry data. ### Considerations -For data pseudonymization, it is recommended to use the [pseudonymization tool](https://eu-rd-platform.jrc.ec.europa.eu/erdri/pseudonymisation-tool_en) offered by the European Platform on Rare Disease Registration. +For data pseudonymization, it is recommended to use the {% tool "spider-pseudonymisation-tool" %} offered by the European Platform on Rare Disease Registration. When making data FAIR retrospectively, it is recommended to follow the [retrospective FAIRification workflow](https://doi.org/10.1162/dint_a_00028). On the other hand, when registry data must be FAIR right from when it is being collected by an EDC system, it is recommended to read two papers ([here](https://ojrd.biomedcentral.com/articles/10.1186/s13023-021-02004-y) and [here](https://pubmed.ncbi.nlm.nih.gov/34454078/)), to learn more about the denovo FAIRification process. ### Solutions -* [Pseudonymization tool EU RD platform](https://eu-rd-platform.jrc.ec.europa.eu/erdri/pseudonymisation-tool_en) +* {% tool "spider-pseudonymisation-tool" %} ## Interpreting rare disease data @@ -73,23 +73,23 @@ When making data FAIR retrospectively, it is recommended to follow the [retrospe This section deals with the modeling of your data, so it can be annotated with unambiguous terms and the different ways it can be queried. ### Considerations -EJP RD’s CDE semantic model comes with a data transformation tool called ‘CDE in a box’, which transforms data in CSV format to linked data according to the model. The '[CDE in a box](https://github.com/ejp-rd-vp/cde-in-box)' tool works independently from any EDC system. Additionally, the EJP RD will provide mappings to other data models such as the Observational Health Data Sciences and Informatics (OMOP) Common Data Model, the Clinical Data Interchange Standards Consortium (CDISC) Operational Data Model, and Health Level 7’s Fast Healthcare Interoperability Resources (FHIR). +EJP RD’s CDE semantic model comes with a data transformation tool called ‘CDE in a box’, which transforms data in CSV format to linked data according to the model. The {% tool "common-data-elements-in-a-box" %} tool works independently from any EDC system. Additionally, the EJP RD will provide mappings to other data models such as the Observational Health Data Sciences and Informatics (OMOP) Common Data Model, the Clinical Data Interchange Standards Consortium (CDISC) Operational Data Model, and Health Level 7’s Fast Healthcare Interoperability Resources (FHIR). To enable data discovery and querying, the EJP RD is developing a Virtual Platform for rare disease resources. This Virtual Platform is a federated ecosystem in which resources are enhanced to be amenable for rare disease research. Data stays at its source but can be queried remotely through an EJP RD query endpoint. As an ecosystem, multiple query endpoints will be present, allowing for sending interrogations from one resource to another. Thus, federated discovery, querying, and analysis are made possible. All while preserving patient privacy and respecting the access conditions of individual resources. ### Solutions -* [CDE in a box](https://github.com/ejp-rd-vp/cde-in-box) -* [EJP RD Virtual Platform](https://vp.ejprarediseases.org/) +* {% tool "common-data-elements-in-a-box" %} +* {% tool "european-joint-programme-on-rare-diseases-virtual-platform" %} ## Describing rare disease data ### Description This section deals with the information needed to properly describe your data, so users can reuse it. It covers the use of FAIR Data Points and database technologies to store data. ### Considerations -When describing rare disease data (i.e., describing the metadata), one could make use of the FAIR Data Point specification as mentioned earlier. This specification offers an extended metadata model based on the [Data Catalog Vocabulary (DCAT) version 2](https://www.w3.org/TR/vocab-dcat-2/), a [World Wide Web Consortium (W3C)](https://www.w3.org/) recommendation. Once the FAIR Data Point has been set up properly it should be visible in the list of active [FAIR Data Points](https://home.fairdatapoint.org/). Note: make sure that the registry’s Data Access Policy allows for sharing of metadata. +When describing rare disease data (i.e., describing the metadata), one could make use of the FAIR Data Point specification as mentioned earlier. This specification offers an extended metadata model based on the {% tool "data-catalog-vocabulary" %}, a [World Wide Web Consortium (W3C)](https://www.w3.org/) recommendation. Once the FAIR Data Point has been set up properly it should be visible in the list of active {% tool "fair-data-points" %}. Note: make sure that the registry’s Data Access Policy allows for sharing of metadata. ### Solutions -* [EJP RD metadata model](https://github.com/ejp-rd-vp/resource-metadata-schema) -* [FAIR Data Point](https://www.fairdatapoint.org/) +* {% tool "european-joint-programme-on-rare-diseases-metadata-model" %} +* {% tool "fair-data-points" %} ## Giving access to rare disease data @@ -97,8 +97,8 @@ When describing rare disease data (i.e., describing the metadata), one could mak This section deals with the information needed by people who will re-use your data, and with the access conditions they will need to follow. ### Considerations -Two main topics can be addressed when dealing with data access. First, the collection of informed consent through an informed consent form. Second, specifying who is allowed access to which data using an Authentication and Authorization Infrastructure (AAI). The informed consent form should use existing standards for informed consent. The EJP RD has developed a generic informed consent form for ERN registries that can be found [here](https://www.ejprarediseases.org/ern-registries-generic-icf/). +Two main topics can be addressed when dealing with data access. First, the collection of informed consent through an informed consent form. Second, specifying who is allowed access to which data using an Authentication and Authorization Infrastructure (AAI). The informed consent form should use existing standards for informed consent. The EJP RD has developed a {% tool "ern-registries-generic-informed-consent-forms" %}. ### Solutions -* [EJP RD metadata model](https://github.com/ejp-rd-vp/resource-metadata-schema) -* [Informed consent form for ERN registries](https://www.ejprarediseases.org/ern-registries-generic-icf/) +* {% tool "european-joint-programme-on-rare-diseases-metadata-model" %} +* {% tool "ern-registries-generic-informed-consent-forms" %} diff --git a/pages/your_domain/structural_bioinformatics.md b/pages/your_domain/structural_bioinformatics.md index 15c29b5d5..fe0c61e69 100644 --- a/pages/your_domain/structural_bioinformatics.md +++ b/pages/your_domain/structural_bioinformatics.md @@ -35,16 +35,16 @@ Researchers in the field should be able to find predictions of macromolecular st ### Solutions * There are three main options to make your models available: - * Deposit in [ModelArchive](https://www.modelarchive.org) for theoretical models of macromolecular structures. Models deposited in the ModelArchive are made available under the CC BY-SA 4.0 licence (see [here for details](https://modelarchive.org/terms-of-use)). - * Deposit in [PDB-Dev](https://pdb-dev.wwpdb.org) for models using integrative or hybrid modelling. Models deposited in PDB-Dev are made available under the CC0 1.0 licence (see [here for details](https://www.wwpdb.org/about/usage-policies)). If theoretical models were used as part of the modelling, they can either be included in the PDB-Dev deposition or, if they are expected to be useful by themselves, deposited in ModelArchive and referenced to. - * Make available using a dedicated web service for large-scale modelling efforts which are updated on a regular basis using automated prediction methods. Unified access to such services can be provided with the [3D-Beacons network](https://3d-beacons.org) which is being developed by the [ELIXIR 3D-BioInfo Community](https://elixir-europe.org/communities/3d-bioinfo). The data providers currently connected in the network are listed in [the 3D-Beacons documentation](https://www.ebi.ac.uk/pdbe/pdbe-kb/3dbeacons/docs#partners). An appropriate licence must be associated with the models (check the [RDMkit licensing page](licensing) for guidance on this) and must be compatible with CC-BY 4.0 if the models are to be distributed in the 3D-Beacons network. -* Model coordinates are preferably stored in the standard PDB archive format [PDBx/mmCIF](https://mmcif.wwpdb.org/). While, for many purposes, the legacy PDB format may suffice to store model coordinates and is still widely used, the format is no longer being modified or extended. -* Model quality estimates can be computed globally, per-residue, and per-residue-pair. The estimates should be computed using a relatively recent and well benchmarked tool or by the structure prediction method itself. Please check [CAMEO](https://cameo3d.org), [CASP](https://predictioncenter.org), and [CAPRI](https://www.ebi.ac.uk/pdbe/complex-pred/capri/) to find suitable quality estimators. The [3D-BioInfo Community](https://elixir-europe.org/communities/3d-bioinfo) is also currently working to further improve benchmarking for protein complexes, protein-ligand interactions, and nucleic acid structures. By convention, the main per-residue quality estimates are stored in place of B-factors in model coordinate files. In mmCIF files any number of quality estimates can be properly described and stored in the ma_qa_metric category of the PDBx/mmCIF ModelArchive Extension Dictionary described below. -* Metadata for theoretical models of macromolecular structures should preferably be stored using the [PDBx/mmCIF ModelCIF Extension Dictionary](https://mmcif.wwpdb.org/dictionaries/mmcif_ma.dic/Index) independently of the deposition process. The extension is being developed by the [ModelCIF working group](https://wwpdb.org/task/modelcif) with input from the community. Feedback and change requests are welcome and can be given on [github](https://github.com/ihmwg/ModelCIF). The same information can also be provided manually during the deposition in ModelArchive and there is [additional documentation](https://modelarchive.org/help) on how to provide metadata and minimal requirements for it. Generally, the metadata must include: + * Deposit in {% tool "modelarchive" %} for theoretical models of macromolecular structures. Models deposited in the ModelArchive are made available under the CC BY-SA 4.0 licence (see [here for details](https://modelarchive.org/terms-of-use)). + * Deposit in {% tool "pdb-dev" %} for models using integrative or hybrid modelling. Models deposited in PDB-Dev are made available under the CC0 1.0 licence (see [here for details](https://www.wwpdb.org/about/usage-policies)). If theoretical models were used as part of the modelling, they can either be included in the PDB-Dev deposition or, if they are expected to be useful by themselves, deposited in ModelArchive and referenced to. + * Make available using a dedicated web service for large-scale modelling efforts which are updated on a regular basis using automated prediction methods. Unified access to such services can be provided with the {% tool "3d-beacons" %} which is being developed by the [ELIXIR 3D-BioInfo Community](https://elixir-europe.org/communities/3d-bioinfo). The data providers currently connected in the network are listed in [the 3D-Beacons documentation](https://www.ebi.ac.uk/pdbe/pdbe-kb/3dbeacons/docs#partners). An appropriate licence must be associated with the models (check the [RDMkit licensing page](licensing) for guidance on this) and must be compatible with CC-BY 4.0 if the models are to be distributed in the 3D-Beacons network. +* Model coordinates are preferably stored in the standard PDB archive format {% tool "pdbx-mmcif-format-and-tools" %}. While, for many purposes, the legacy PDB format may suffice to store model coordinates and is still widely used, the format is no longer being modified or extended. +* Model quality estimates can be computed globally, per-residue, and per-residue-pair. The estimates should be computed using a relatively recent and well benchmarked tool or by the structure prediction method itself. Please check {% tool "cameo" %}, {% tool "casp" %}, and {% tool "capri" %} to find suitable quality estimators. The [3D-BioInfo Community](https://elixir-europe.org/communities/3d-bioinfo) is also currently working to further improve benchmarking for protein complexes, protein-ligand interactions, and nucleic acid structures. By convention, the main per-residue quality estimates are stored in place of B-factors in model coordinate files. In mmCIF files any number of quality estimates can be properly described and stored in the ma_qa_metric category of the PDBx/mmCIF ModelArchive Extension Dictionary described below. +* Metadata for theoretical models of macromolecular structures should preferably be stored using the {% tool "pdbx-mmcif-modelcif-extension-dictionary" %} independently of the deposition process. The extension is being developed by the [ModelCIF working group](https://wwpdb.org/task/modelcif) with input from the community. Feedback and change requests are welcome and can be given on [github](https://github.com/ihmwg/ModelCIF). The same information can also be provided manually during the deposition in ModelArchive and there is [additional documentation](https://modelarchive.org/help) on how to provide metadata and minimal requirements for it. Generally, the metadata must include: * a short description of the study for which the model was generated; * if available, a citation to the manuscript referring to the models; - * the source for the sequences of modelled proteins with references to databases such as [UniProtKB](https://www.uniprot.org); + * the source for the sequences of modelled proteins with references to databases such as {% tool "uniprot" %}; * modelling steps with references to available software or web services used and to manuscripts describing the method; - * input data needed for the modelling steps. For instance in homology modelling this could include the [PDB](https://www.wwpdb.org/) identifiers for the template structures used for modelling and their alignments to the target protein; + * input data needed for the modelling steps. For instance in homology modelling this could include the {% tool "pdb" %} identifiers for the template structures used for modelling and their alignments to the target protein; * model quality estimates. * If necessary, accompanying data can be provided in separate files using different file formats. The files can be added to ModelArchive depositions and referred to in the PDBx/mmCIF ModelArchive extension format. diff --git a/pages/your_domain/toxicology_data.md b/pages/your_domain/toxicology_data.md index e93d57d62..c944202a3 100644 --- a/pages/your_domain/toxicology_data.md +++ b/pages/your_domain/toxicology_data.md @@ -38,10 +38,10 @@ Results of in vitro assays are typically collected as dose-response curves. Thes ### Solutions -- [ToxCast](https://cran.r-project.org/web/packages/tcpl/index.html) has published an R-package with the tools used to process the high throughput chemical screening data. -- Benchmark concentrations (and doses) can be computed with free software as [PROAST](https://www.rivm.nl/en/proast) and [BMDS](https://www.epa.gov/bmds). -- For experiments where gene expression has been measured in response to a toxicant, R packages such as [DESEq2](https://bioconductor.org/packages/release/bioc/html/DESeq2.html) for RNA-Seq data, and [limma](https://bioconductor.org/packages/release/bioc/html/limma.html) for microarray data are used to find genes that are differentially expressed. -- In silico prediction models can be developed starting from a series of compounds annotated with the results on in vitro methods. The quality of the predictions provided by these methods are often comparable with those obtained by experimental methods, particularly when the models are used within their applicability domain. [Flame](https://github.com/phi-grib/flame) is an open-source modelling framework developed specifically for this purpose. +- {% tool "toxcast-data" %} has published an R-package with the tools used to process the high throughput chemical screening data. +- Benchmark concentrations (and doses) can be computed with free software as {% tool "proast" %} and {% tool "bmds" %}. +- For experiments where gene expression has been measured in response to a toxicant, R packages such as {% tool "deseq2" %} for RNA-Seq data, and {% tool "limma" %} for microarray data are used to find genes that are differentially expressed. +- In silico prediction models can be developed starting from a series of compounds annotated with the results on in vitro methods. The quality of the predictions provided by these methods are often comparable with those obtained by experimental methods, particularly when the models are used within their applicability domain. {% tool "flame" %} is an open-source modelling framework developed specifically for this purpose. ## Data from animal assays - Existing data and vocabularies @@ -51,7 +51,7 @@ Assays are expensive. Most animal data come from compiling normative studies whi In spite of their inconveniences (high costs, time consumption, requirements of significant amounts of the substance being tested, limited translatability of the observed results), in many cases, there is no suitable replacement for *in vivo* tests. The replacement of *in vivo* data for alternative approaches (often called NAM, New Approach methodologies) is an active research field. -Two important toxicogenomics resources containing animal data are [TG-GATEs](https://pubmed.ncbi.nlm.nih.gov/25313160/), and [DrugMatrix](https://pubmed.ncbi.nlm.nih.gov/25058030/). These resources contain gene expression data in several rat tissues for a large number of compounds, in several doses and exposure times. They also include histopathology annotations and chemistry measurements. +Two important toxicogenomics resources containing animal data are {% tool "tg-gates" %}, and {% tool "drug-matrix" %}. These resources contain gene expression data in several rat tissues for a large number of compounds, in several doses and exposure times. They also include histopathology annotations and chemistry measurements. ### Considerations @@ -60,9 +60,9 @@ Data generated in normative studies were obtained under Good Laboratory Practice ### Solutions -- Use information about genes, and variants associated with human adverse effects, from platforms such as [DisGeNET](https://www.disgenet.org/), [CTD](http://ctdbase.org/), and [PharmGKB](https://www.pharmgkb.org/). -- Histopathology data requires the use of a controlled vocabulary like [CDISC/SEND](https://evs.nci.nih.gov/ftp1/CDISC/SEND/SEND%20Terminology.html). -- The extension and curation of ontologies like CDISC/SEND to specific domains is facilitated by tools like [ONTOBROWSER](https://opensource.nibr.com/projects/ontobrowser/). +- Use information about genes, and variants associated with human adverse effects, from platforms such as {% tool "disgenet" %}, {% tool "ctd" %}, and {% tool "pharmgkb" %}. +- Histopathology data requires the use of a controlled vocabulary like {% tool "cdisc-send" %}. +- The extension and curation of ontologies like CDISC/SEND to specific domains is facilitated by tools like {% tool "ontobrowser" %}. - In order to reduce the number of animals used in toxicological studies, it has been suggested to replace control groups with historically collected data from studies carried out in comparable conditions (so-called Virtual Control Groups). VCGs are being developed by [eTRANSAFE project](https://etransafe.eu/virtual-control-groups-one-step-forward-into-the-future-of-animal-testing-in-toxicology). ## Data from human assays - Existing data and vocabularies @@ -83,24 +83,24 @@ Data from human assays are highly heterogeneous and integration with in vitro an - Similarly, medication identifiers are not always consistent among different sources. This is a challenging issue as many medicinal products have different denominations and available commercial presentations depending on the country/region where the product is commercialized. - Usually, structured resources present metadata explaining how the data is organized, thus enabling an easy data transformation process. Conversely, non-structured resources are not easy to harmonize as data organization is not consistent among the available documents. -Databases containing clinical toxicological data of drugs can contain the results of clinical studies ([clinicaltrials.gov](https://clinicaltrials.gov/)), frequent adversities (Medline), or collect pharmacovigilance data ([FAERS](https://www.fda.gov/drugs/surveillance/questions-and-answers-fdas-adverse-event-reporting-system-faers)) depending on the data being incorporated, the interpretation is different. For example, in the case of spontaneous reporting systems, the frequency with which an adverse event is reported should be considered relative to the time the compound has been in the market and the frequency of these adverse events in the population treated. +Databases containing clinical toxicological data of drugs can contain the results of clinical studies {% tool "clinicaltrials-gov" %}, frequent adversities (Medline), or collect pharmacovigilance data {% tool "faers" %} depending on the data being incorporated, the interpretation is different. For example, in the case of spontaneous reporting systems, the frequency with which an adverse event is reported should be considered relative to the time the compound has been in the market and the frequency of these adverse events in the population treated. ### Solutions Examples of databases containing drug toxicological data: -- [clinicaltrials.gov](https://clinicaltrials.gov/) is a resource depending on the National Library of medicine which makes available private and public-funded clinical trials. -- The [FDA Adverse Event Reporting System (FAERS)](https://www.fda.gov/drugs/surveillance/questions-and-answers-fdas-adverse-event-reporting-system-faers) contains adverse event reports, medication error reports and product quality complaints submitted by healthcare professionals, consumers, and manufacturers. +- {% tool "clinicaltrials-gov" %} is a resource depending on the National Library of medicine which makes available private and public-funded clinical trials. +- The FDA Adverse Event Reporting System {% tool "faers" %} contains adverse event reports, medication error reports and product quality complaints submitted by healthcare professionals, consumers, and manufacturers. Harmonization of terminologies can be achieved by using different resources: -- The [Unified Medical Language System (UMLS)](https://www.nlm.nih.gov/research/umls/index.html) provides mappings between different medical vocabularies. It includes common ontologies within the condition/diagnosis domain like SNOMED, ICD9CM, ICD10CM, and also the MedDRA ontology. -- The [OHDSI](https://www.ohdsi.org/analytic-tools/athena-standardized-vocabularies/) initiative for health data harmonization is an alternative solution for the mapping of vocabularies needed for the harmonization of different resources. This initiative maintains the ATHENA set of vocabularies which is in constant evolution and covers relevant domains in the realm of health care. The OHDSI community is paying special attention to the mappings between medication identifiers coming from national regulatory agencies of the countries of provenance of the institutions involved in the initiative, and the RxNorm identifier which is the standard vocabulary used by OHDSI. -- Resources in the context of environmental ([ITER](https://www.tera.org/iter/), [IRIS](https://www.epa.gov/iris)) or occupational ([Haz-Map](https://haz-map.com/)) toxicity using CAS Registry Number identifiers can be connected with those in the pharmaceutical field prone to use [ChEMBL](https://www.ebi.ac.uk/chembl/) identifiers via molecular identifiers available in both resources like the standard InChI or standard InChI Key representations. Services like EBI’s [UniChem](https://www.ebi.ac.uk/unichem/) can help to translate between different chemical identifiers. +- The Unified Medical Language System {% tool "umls" %} provides mappings between different medical vocabularies. It includes common ontologies within the condition/diagnosis domain like SNOMED, ICD9CM, ICD10CM, and also the MedDRA ontology. +- The {% tool "ohdsi" %} initiative for health data harmonization is an alternative solution for the mapping of vocabularies needed for the harmonization of different resources. This initiative maintains the ATHENA set of vocabularies which is in constant evolution and covers relevant domains in the realm of health care. The OHDSI community is paying special attention to the mappings between medication identifiers coming from national regulatory agencies of the countries of provenance of the institutions involved in the initiative, and the RxNorm identifier which is the standard vocabulary used by OHDSI. +- Resources in the context of environmental ({% tool "iter" %}, {% tool "iris" %}) or occupational ({% tool "haz-map" %}) toxicity using CAS Registry Number identifiers can be connected with those in the pharmaceutical field prone to use {% tool "chembl" %} identifiers via molecular identifiers available in both resources like the standard InChI or standard InChI Key representations. Services like EBI’s {% tool "unichem" %} can help to translate between different chemical identifiers. To import unstructured data sources into structured schemas is a really challenging task as it involves the application of natural language processing technologies. The development of these tools in the field of toxicology is still at the embryonic stage but several initiatives exist: -- The [LimTox](http://limtox.bioinfo.cnio.es/) system is a text mining approach devoted to the extraction of associations between chemical agents and hepatotoxicity. -- The [AOP4EUpest](http://www.biomedicale.parisdescartes.fr/aop4EUpest/home.php) webserver is a resource for the identification of annotated pesticides-biological events involved in Adverse Outcome Pathways (AOPs) via text mining approaches. +- The {% tool "limtox" %} system is a text mining approach devoted to the extraction of associations between chemical agents and hepatotoxicity. +- The {% tool "aop4eupest" %} webserver is a resource for the identification of annotated pesticides-biological events involved in Adverse Outcome Pathways (AOPs) via text mining approaches. ## Ecotoxicology data - Existing data @@ -114,5 +114,5 @@ When considering the effect of a substance on the environment, in addition to it ### Solutions -- The [ECOTOXicology Knowledgebase (ECOTOX)](https://cfpub.epa.gov/ecotox/) is a comprehensive, publicly available Knowledgebase providing single chemical environmental toxicity data on aquatic life, terrestrial plants, and wildlife. -- The [CompTox Chemicals Dashboard](https://comptox.epa.gov/dashboard) provides toxicological information for over 800.000 chemical compounds, including experimental and predicted fate information. +- The ECOTOXicology Knowledgebase ({% tool "ecotox" %}) is a comprehensive, publicly available Knowledgebase providing single chemical environmental toxicity data on aquatic life, terrestrial plants, and wildlife. +- The {% tool "comptox" %} provides toxicological information for over 800.000 chemical compounds, including experimental and predicted fate information. diff --git a/pages/your_role.md b/pages/your_role.md index 747a63458..7ee5c3e54 100644 --- a/pages/your_role.md +++ b/pages/your_role.md @@ -11,5 +11,5 @@ In this section, information is organised based on the different roles a profess -{% include section-navigation-tiles.html type="your_role" search=true %} +{% include section-navigation-tiles.html type="your_role" search=true except="your_role.md" %} diff --git a/pages/your_tasks.md b/pages/your_tasks.md index 1a5c2f907..c3e676731 100644 --- a/pages/your_tasks.md +++ b/pages/your_tasks.md @@ -11,4 +11,4 @@ In this section, information is organised around regular research data managemen - A summary table of tools and resources relevant for the specific task and recommended by communities. -{% include section-navigation-tiles.html type="your_tasks" search=true %} +{% include section-navigation-tiles.html type="your_tasks" search=true except="your_tasks.md" %} diff --git a/pages/your_tasks/compliance_monitoring.md b/pages/your_tasks/compliance_monitoring.md index 22b8e2225..c27c1dfc3 100644 --- a/pages/your_tasks/compliance_monitoring.md +++ b/pages/your_tasks/compliance_monitoring.md @@ -36,13 +36,15 @@ By knowing their capabilities institutions can spot areas of improvement and dir * FAIR data * GO-FAIR Initiative provides a framework for designing [metrics for the evaluation of FAIRness](https://www.go-fair.org/2017/12/11/metrics-evaluation-fairness/). * RDA developed a first set of [guidelines and a checklist](https://zenodo.org/record/3909563#.YKZV3i0RpN1) related to the implementation of the FAIR indicators. -* The [FAIRplus project](https://fairplus-project.eu) with its [FAIR Cookbook](https://fairplus.github.io/the-fair-cookbook/content/recipes/assessing-fairness.html#) provides services, tools, and indicators necessary for the assessment or the evaluation of data against the FAIR Principles. - * [FAIR Evaluators](https://fairsharing.github.io/FAIR-Evaluator-FrontEnd/#!/#%2F!) are an automated approach to evaluate FAIRness of data services. - * [FAIRassist.org](https://fairassist.org/#!/) aims to collect and describe existing resources for the assessment and/or evaluation of digital objects against the FAIR principles. +* The [FAIRplus project](https://fairplus-project.eu) with its {% tool "fair-cookbook" %} provides services, tools, and indicators necessary for the assessment or the evaluation of data against the FAIR Principles. + * {% tool "fair-evaluation-services" %} are an automated approach to evaluate FAIRness of data services. + * {% tool "fairassist-org" %} aims to collect and describe existing resources for the assessment and/or evaluation of digital objects against the FAIR principles. +* The {% tool "fair-wizard" %} utilizes FAIRification resources developed by the FAIRplus project and other platforms, suggests FAIRification materials based on the FAIRification requirements, and designs FAIRification solutions for data owners, data stewards, and other people involved in FAIRification. +* The {% tool "fairshake" %} evaluates the FAIRness of Digital Objects. * Information Security, Data Protection, Accountability * [21 CFR part 11](https://www.fda.gov/regulatory-information/search-fda-guidance-documents/part-11-electronic-records-electronic-signatures-scope-and-application) is a standard, which outlines criteria for electronic records in an IT system to be as valid as signed paper records. It is widely adopted in lab information systems and applications used in clinical trials and medical research. - * [ISO 27001](https://www.iso.org/isoiec-27001-information-security.html) is an international standard for the management of information security. It is adopted by some universities and research institutes to certify their data centres. + * {% tool "iso-iec-27001" %} is an international standard for the management of information security. It is adopted by some universities and research institutes to certify their data centres. * [ISO/IEC 27018](https://www.iso.org/standard/76559.html) is a standard aimed to be a code of practice for protection of personally identifiable information (PII) in public clouds. ## How can you ethically access genetic resources of another country? diff --git a/pages/your_tasks/costs_data_management.md b/pages/your_tasks/costs_data_management.md index 8c6005ca9..81d01a97d 100644 --- a/pages/your_tasks/costs_data_management.md +++ b/pages/your_tasks/costs_data_management.md @@ -22,7 +22,7 @@ The processes of data management will incur costs. The expenses may consist of p ### Solutions * To get an overview of possible costs in your research project, you can go through different research [life cycle phases](data_life_cycle) and activities specific for your project. -* Some organisations have created tools, for their users, to help formulate and budget data management costs; such as [Storage Costs Evaluator by Data Stewardship Wizard](https://storage-costs-evaluator.ds-wizard.org/), the [Data Management costing Tool](https://ukdataservice.ac.uk/learning-hub/research-data-management/plan-to-share/costing/) developed by the UK Data Service, and the [TU Delft data management costing tool](https://www.tudelft.nl/en/library/research-data-management/r/plan/data-management-costs). These tools can help to budget for personnel costs and/or additional costs that are needed to preserve and share research data beyond a research project. +* Some organisations have created tools, for their users, to help formulate and budget data management costs; such as {% tool "data-stewardship-wizard-storage-costs-evaluator" %}, the {% tool "uk-data-service-data-management-costing-tool" %} developed by the UK Data Service, and the {% tool "tu-delft-data-management-costing-tool" %}. These tools can help to budget for personnel costs and/or additional costs that are needed to preserve and share research data beyond a research project. #### Costs for data stewards * Personnel costs for data stewards is an eligible cost in many projects although with limitations on the number of full time employee (FTE). Check if this cost is eligible in your grant. diff --git a/pages/your_tasks/data_analysis.md b/pages/your_tasks/data_analysis.md index b5ba2ef7b..46e176a93 100644 --- a/pages/your_tasks/data_analysis.md +++ b/pages/your_tasks/data_analysis.md @@ -4,7 +4,7 @@ contributors: [Olivier Collin, Stian Soiland-Reyes, Michael R. Crusoe, Sven Twar description: How to make data analysis FAIR. page_id: data_analysis related_pages: - tool_assembly: [nels, xnat-pic, transmed, ome, galaxy] + tool_assembly: [nels, xnat_pic, transmed, ome, galaxy] training: - name: Training in TeSS registry: TeSS @@ -44,18 +44,18 @@ There are many ways that will bring reproducibility to your data analysis. You c ### Solutions * Make your code available. If you have to develop a software for your data analysis, it is always a good idea to publish your code. The git versioning system offers both a way to release your code but offers also a versioning system. You can also use Git to interact with your software users. Be sure to specify a license for your code (see the [licensing section](licensing)). -* Use package and environment management system. By using package and environment management systems like [Conda](https://anaconda.org/) and its bioinformatics specialized channel [Bioconda](https://bioconda.github.io/), researchers that have got access to your code will be able to easily install specific versions of tools, even older ones, in an isolated environment. They will be able to compile/run your code in an equivalent computational environment, including any dependencies such as the correct version of R or particular libraries and command-line tools your code use. You can also share and preserve your setup by specifying in a [environment file](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) which tools you installed. -* Use container environments. As an alternative to package management systems you can consider _container environments_ like [Docker](https://www.docker.com/) or [Singularity](https://sylabs.io/docs/). +* Use package and environment management system. By using package and environment management systems like {% tool "conda" %} and its bioinformatics specialized channel {% tool "bioconda" %}, researchers that have got access to your code will be able to easily install specific versions of tools, even older ones, in an isolated environment. They will be able to compile/run your code in an equivalent computational environment, including any dependencies such as the correct version of R or particular libraries and command-line tools your code use. You can also share and preserve your setup by specifying in a [environment file](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) which tools you installed. +* Use container environments. As an alternative to package management systems you can consider _container environments_ like {% tool "docker" %} or {% tool "singularity" %}. * Use workflow management systems. [Scientific Workflow management systems](https://en.wikipedia.org/wiki/Scientific_workflow_system) will help you organize and automate how computational tools are to be executed. Compared to composing tools using a standalone script, workflow systems also help document the different computational analyses applied to your data, and can help with scalability, such as cloud execution. Reproducibility is also enhanced by the use of workflows, as they typically have bindings for specifying software packages or containers for the tools you use from the workflow, allowing others to re-run your workflow without needing to pre-install every piece of software it needs. It is a flourishing field and [many other workflow management systems](https://s.apache.org/existing-workflow-systems) are available, some of which are general-purpose (e.g. any command line tool), while others are domain-specific and have tighter tool integration. Among the many workflow management systems available, one can mention - * Workflow platforms that manage your data and provide an interface (web, GUI, APIs) to run complex pipelines and review their results. For instance: [Galaxy]( https://galaxyproject.org/) and [Arvados]( https://arvados.org) ([CWL-based]( https://www.commonwl.org), open source). - * Workflow runners that take a workflow written in a proprietary or standardized format (such as the [CWL standard]( https://www.commonwl.org)) and execute it locally or on a remote compute infrastructure. For instance, [toil-cwl-runner](https://toil.readthedocs.io/en/latest/running/cwl.html), the reference CWL runner ([cwltool](https://pypi.org/project/cwltool/)), [Nextflow]( https://www.nextflow.io/), [Snakemake]( https://snakemake.readthedocs.io/), Cromwell. -* Use notebooks. Using notebooks, you will be able to create reproducible documents mixing text and code; which can help explain your analysis choices; but also be used as an exploratory method to examine data in detail. Notebooks can be used in conjunction with the other solutions mentioned above, as typically the notebook can be converted to a script. Some of the most well-known notebooks systems are: [Jupyter](https://jupyter.org/), with built-in support for code in Python, R and Julia, and many other [kernels](https://github.com/jupyter/jupyter/wiki/Jupyter-kernels); [RStudio](https://rstudio.com/products/rstudio/#rstudio-desktop) based on R. See the table below for additional tools. + * Workflow platforms that manage your data and provide an interface (web, GUI, APIs) to run complex pipelines and review their results. For instance: {% tool "galaxy" %} and {% tool "arvados" %} ({% tool "common-workflow-language" %}-based), open source. + * Workflow runners that take a workflow written in a proprietary or standardized format (such as the {% tool "common-workflow-language" %}) and execute it locally or on a remote compute infrastructure. For instance, {% tool "cwl-in-toil" %}, the reference CWL runner ({% tool "cwltool" %}), {% tool "nextflow" %}, {% tool "snakemake" %}, {% tool "cromwell" %}. +* Use notebooks. Using notebooks, you will be able to create reproducible documents mixing text and code; which can help explain your analysis choices; but also be used as an exploratory method to examine data in detail. Notebooks can be used in conjunction with the other solutions mentioned above, as typically the notebook can be converted to a script. Some of the most well-known notebooks systems are: {% tool "jupyter" %}, with built-in support for code in Python, R and Julia, and many other {% tool "jupyter-kernels" %}; {% tool "rstudio" %} based on R. See the table below for additional tools. ## How can you use package and environment management systems? ### Description -By using package and environment management systems like [Conda](https://anaconda.org/) and its bioinformatics specialized channel [Bioconda](https://bioconda.github.io/), you will be able to easily install specific versions of tools, even older ones, in an isolated environment. You can also share and preserve your setup by specifying in a [environment file](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) which tools you installed. +By using package and environment management systems like {% tool "conda" %} and its bioinformatics specialized channel {% tool "bioconda" %}, you will be able to easily install specific versions of tools, even older ones, in an isolated environment. You can also share and preserve your setup by specifying in a [environment file](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) which tools you installed. ### Considerations @@ -68,8 +68,8 @@ Conda works by making a nested folder containing the traditional UNIX directory ### Solutions -* MacOS-specific package management systems: [Homebrew](https://brew.sh/), [Macports](https://www.macports.org/). -* Windows-specific package management systems: [Chocolatey](https://chocolatey.org/) and [Windows Package Manager](https://docs.microsoft.com/en-us/windows/package-manager/) `winget`. +* MacOS-specific package management systems: {% tool "homebrew" %}, {% tool "macports" %}. +* Windows-specific package management systems: {% tool "chocolatey" %} and {% tool "windows-package-manager" %} `winget`. * Linux distributions also have their own package management systems (`rpm`/`yum`/`dnf`, `deb`/`apt`) that have a wide variety of tools available, but at the cost of less flexibility in terms of the tool versions, to ensure they exist co-installed. * Language-specific virtual environments and repositories including: [rvm](https://rvm.io/) and [RubyGems](https://rubygems.org/) for Ruby, [pip](https://docs.python.org/3/installing/index.html) and [venv](https://docs.python.org/3/tutorial/venv.html) for Python, [npm](https://www.npmjs.com/) for NodeJS/Javascript, [renv](https://rstudio.github.io/renv/) and [CRAN](https://cran.r-project.org/) for R, [Apache Maven](https://maven.apache.org/) or [Gradle](https://gradle.org/) for Java. * Tips and tricks to navigate the landscape of software package management solutions: @@ -81,7 +81,7 @@ Conda works by making a nested folder containing the traditional UNIX directory ### Description -Container environments like [Docker](https://www.docker.com/) or [Singularity](https://sylabs.io/docs/) allow you to easily install specific versions of tools, even older ones, in an isolated environment. +Container environments like {% tool "docker" %} or {% tool "singularity" %} allow you to easily install specific versions of tools, even older ones, in an isolated environment. ### Considerations @@ -94,13 +94,14 @@ In short containers works almost like a virtual machine (VMs), in that it re-cre ### Solutions -* [Docker](https://www.docker.com/) is the most well-known container runtime, followed by [Singularity](https://sylabs.io/docs/). These require (and could be used to access) system administrator privileges to be set up. -* [uDocker](https://indigo-dc.gitbook.io/udocker/) and [Podman](https://podman.io/) are also _user space_ alternatives that have compatible command line usage. -* Large registries of community-provided container images are [Docker Hub](https://hub.docker.com/) and [RedHat Quay.io](https://quay.io/search). These are often ready-to-go, not requiring any additional configuration or installations, allowing your application to quickly have access to open source server solutions. -* [Biocontainers](https://biocontainers.pro/) have a large selection of bioinformatics tools. -* To customize a Docker image, it is possible to use techniques such as [volumes](https://docs.docker.com/storage/volumes/) to store data and [Dockerfile](https://docs.docker.com/engine/reference/builder/). This is useful for installing your own application inside a new container image, based on a suitable _base image_ where you can do your `apt install` and software setup in a reproducible fashion - and share your own application as an image on Docker Hub. -* Container linkage can be done by _container composition_ using tools like [Docker Compose](https://docs.docker.com/compose/). -* More advanced container deployment solutions like [Kubernetes](https://kubernetes.io/) and Computational Workflow Management systems can also manage cloud instances and handle analytical usage. +* {% tool "docker" %} is the most well-known container runtime, followed by {% tool "singularity" %}. These require (and could be used to access) system administrator privileges to be set up. +* {% tool "udocker" %} and {% tool "podman" %} are also _user space_ alternatives that have compatible command line usage. +* Large registries of community-provided container images are {% tool "podman" %} and [RedHat Quay.io](https://quay.io/search). These are often ready-to-go, not requiring any additional configuration or installations, allowing your application to quickly have access to open source server solutions. +* {% tool "biocontainers" %} have a large selection of bioinformatics tools. +* To customize a Docker image, it is possible to use techniques such as {% tool "volumes" %} to store data and {% tool "dockerfile-reference" %}. This is useful for installing your own application inside a new container image, based on a suitable _base image_ where you can do your `apt install` and software setup in a reproducible fashion - and share your own application as an image on Docker Hub. +* Container linkage can be done by _container composition_ using tools like {% tool "docker-compose-overview" %}. +* More advanced container deployment solutions like {% tool "kubernetes" %} and Computational Workflow Management systems can also manage cloud instances and handle analytical usage. +* {% tool "openstack" %} is an open-source platform that uses pooled virtual resources to build and manage private and public clouds. It provides a stable base for deploying and managing containers, allowing for faster application deployment and simplified management. * Tips and tricks to navigate the landscape of container solutions: * If you just need to run a database server, describe how to run it as a Docker/Singularity container. * If you need several servers running, connected together, set up containers in Docker Compose. @@ -133,9 +134,9 @@ Creating an analysis workflow involves several steps that require careful consid ### Solutions - Most workflow management systems provide detailed tutorials and documentation for creating workflows and including containerization technologies. Here are documentations for [Nextflow](https://www.nextflow.io/docs/latest/docker.html), [Snakemake](https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html), [Cromwell](https://cromwell.readthedocs.io/en/stable/tutorials/Containers/), [CWL](https://www.commonwl.org/user_guide/topics/using-containers.html). -- The [Biocontainer](https://biocontainers.pro/) project provides a platform for storing and sharing containers that can used in your workflow. -- The [bio.tools](https://bio.tools/) repository lists state of the art tools and databases from the field of bioinformatics ordered by collections and communities. -- [OpenEBench](https://openebench.bsc.es/) is a framework for monitoring and benchmarking analysis tools and workflows. -- [WorkflowHub](https://workflowhub.eu/) and [Dockstore](https://dockstore.org/) are two popular services for sharing and re-using workflows. -- [LifeMonitor](https://crs4.github.io/life_monitor/) is a service designed to facilitate the long-term viability and reusability of published computational workflows. +- The {% tool "biocontainers" %} project provides a platform for storing and sharing containers that can used in your workflow. +- The {% tool "bio-tools" %} repository lists state of the art tools and databases from the field of bioinformatics ordered by collections and communities. +- {% tool "openebench" %} is a framework for monitoring and benchmarking analysis tools and workflows. +- {% tool "workflowhub" %} and {% tool "dockstore" %} are two popular services for sharing and re-using workflows. +- {% tool "life-monitor" %} is a service designed to facilitate the long-term viability and reusability of published computational workflows. - The [ELIXIR Cloud and AAI project](https://elixir-cloud-aai.github.io/) supports a framework for executing workflows in the cloud via the standards developed by the [GA4GH](https://www.ga4gh.org/) community. diff --git a/pages/your_tasks/data_brokering.md b/pages/your_tasks/data_brokering.md index 9dc80aa22..00f65a274 100644 --- a/pages/your_tasks/data_brokering.md +++ b/pages/your_tasks/data_brokering.md @@ -81,4 +81,5 @@ Once relevant repositories are identified for data submission and sharing, being * As a data broker, you generally wish to submit large amounts of data continuously. Hence, having access to a submission command-line-interface (CLI) or API is generally preferred over a user interface. ### Solutions -* For example, ENA offers a submission CLI and API as well as an official data broker role. For more information on data submission as a broker, please visit: [https://ena-docs.readthedocs.io/en/latest/faq/data_brokering.html?highlight=broker](https://ena-docs.readthedocs.io/en/latest/faq/data_brokering.html?highlight=broker) +* For example, {% tool "european-nucleotide-archive" %} offers a submission CLI and API as well as an official data broker role. For more information on data submission as a broker, please visit: [https://ena-docs.readthedocs.io/en/latest/faq/data_brokering.html?highlight=broker](https://ena-docs.readthedocs.io/en/latest/faq/data_brokering.html?highlight=broker) +* {% tool "ena-upload-tool" %}, a collaboratively developed and compiled Galaxy tools and workflows necessary to clean, assemble and submit sequences to the {% tool "european-nucleotide-archive" %}. diff --git a/pages/your_tasks/data_management_plan.md b/pages/your_tasks/data_management_plan.md index 3922f54bb..d9cb4b3c5 100644 --- a/pages/your_tasks/data_management_plan.md +++ b/pages/your_tasks/data_management_plan.md @@ -48,12 +48,14 @@ However, a number of web-based DMP tools are currently available that greatly fa ### Solutions * Use the tool suggested by your funding agency or institution. * Choose one of the following online DMP tools (ordered alphabetically). - * [Data Stewardship Wizard (DSW)](https://ds-wizard.org): publicly available open-source tool to collaboratively compose data management plans through smart and customisable questionnaires with FAIRness evaluation. - * [DAMAP](https://damap.org/): tool for machine actionable Data Management Plans. - * [DMP Canvas Generator](https://dmp.vital-it.ch): this tool, mainly for researchers in Switzerland, is based on a questionnaire following the structure of the SNSF (Swiss National Science Foundation) instructions for DMP submission. Each Swiss High School can develop a specific template/canvas. - * [DMPonline](https://dmponline.dcc.ac.uk): tool widely used in Europe and many universities or institutes provide a DMPonline instance to researchers. - * [DMPTool](https://dmptool.org): widely used tool and many universities or institutes provide a DMPTool instance to researchers. - * [EasyDMP](https://easydmp.no): tool provided by the pan-European network EUDAT. + * {% tool "data-stewardship-wizard" %}: publicly available open-source tool to collaboratively compose data management plans through smart and customisable questionnaires with FAIRness evaluation. + * {% tool "dmp-canvas-generator" %}: this tool, mainly for researchers in Switzerland, is based on a questionnaire following the structure of the SNSF (Swiss National Science Foundation) instructions for DMP submission. Each Swiss High School can develop a specific template/canvas. + * {% tool "damap" %}: tool for machine actionable Data Management Plans. + * {% tool "dmp-canvas-generator" %}: this tool, mainly for researchers in Switzerland, is based on a questionnaire following the structure of the SNSF (Swiss National Science Foundation) instructions for DMP submission. Each Swiss High School can develop a specific template/canvas. + * {% tool "dmponline" %}: tool widely used in Europe and many universities or institutes provide a DMPonline instance to researchers. + * {% tool "dmptool" %}: widely used tool and many universities or institutes provide a DMPTool instance to researchers. + * {% tool "dmproadmap" %}: DMP Roadmap is a Data Management Planning tool. Management and development of DMP Roadmap is jointly provided by the Digital Curation Centre (DCC), http://www.dcc.ac.uk/, and the University of California Curation Center (UC3), http://www.cdlib.org/services/uc3/. The DMPTool and DMPonline sites are both now running from the joint DMPRoadmap codebase. + * {% tool "easy-dmp" %}: tool provided by the pan-European network EUDAT. * Additional tools for creating a DMP are listed in the table below. diff --git a/pages/your_tasks/data_organisation.md b/pages/your_tasks/data_organisation.md index e11ef0449..b2ccb076f 100644 --- a/pages/your_tasks/data_organisation.md +++ b/pages/your_tasks/data_organisation.md @@ -4,7 +4,7 @@ contributors: [Siiri Fuchs, Minna Ahokas, Yvonne Kallberg, Ivan Mičetić, Marin description: Best practices to name and organise research data. page_id: data_organisation related_pages: - tool_assembly: [ome, transmed, xnat-pic] + tool_assembly: [ome, transmed, xnat_pic] dsw: - name: How will you do file naming and file organization? uuid: 8e886b55-3287-48e7-b353-daf6ab40f7d8 @@ -43,7 +43,7 @@ Brief and descriptive file names are important in keeping your data files organi * Include a unique identifier (see: [Identifiers](identifiers)) * Include a version number if appropriate: minimum two digits (V02) and extend it, if needed for minor corrections (V02-03). The leading zeros, will ensure the files are sorted correctly. * Write your file naming convention down and explain abbreviations in your data documentation. -* If you need to rename a lot of files in order to organize your project data and manage your files better, it is possible to use applications like [Bulk Rename Utility](https://www.bulkrenameutility.co.uk/) (Windows, free) and [Renamer4Mac](https://renamer.com/) (Mac). +* If you need to rename a lot of files in order to organize your project data and manage your files better, it is possible to use applications like {% tool "bulk-rename-utility" %} (Windows, free) and {% tool "renamer4mac" %} (Mac). #### Example elements to include in the file name * Date of creation @@ -108,7 +108,7 @@ File versioning is a way to keep track of changes made to files and datasets. Wh ### Solutions * Smaller demands of versioning can be managed manually e.g. by keeping a log where the changes for each respective file is documented, version by version. -* For automatic management of versioning, conflict resolution and back-tracing capabilities, use a proper version control software such as [Git](https://git-scm.com/), hosted by e.g. [GitHub](https://github.com/) and [BitBucket](https://bitbucket.org/). +* For automatic management of versioning, conflict resolution and back-tracing capabilities, use a proper version control software such as {% tool "git" %}, hosted by e.g. {% tool "github" %} and {% tool "bitbucket" %}. * Use a Cloud Storage service (see [Data storage](storage#what-features-do-you-need-in-a-storage-solution-when-collecting-data) page) that provides automatic file versioning. It can be very handy for spreadsheets, text files and slides. diff --git a/pages/your_tasks/data_protection.md b/pages/your_tasks/data_protection.md index 6c78b9deb..874bca7ec 100644 --- a/pages/your_tasks/data_protection.md +++ b/pages/your_tasks/data_protection.md @@ -75,7 +75,7 @@ To protect your research data, code, and other information assets you should est ### Description -Where scientific research involves the processing of data concerning people in the European Union (EU), it is subject to the General Data Protection Regulation (GDPR). The GDPR applies a ["special regime"](https://edps.europa.eu/sites/edp/files/publication/20-01-06_opinion_research_en.pdf) to research, providing +Where scientific research involves the processing of data concerning people in the European Union (EU), it is subject to the {% tool "eu-general-data-protection-regulation" %} (GDPR). The GDPR applies a ["special regime"](https://edps.europa.eu/sites/edp/files/publication/20-01-06_opinion_research_en.pdf) to research, providing derogations from some obligations given appropriate criteria are met and safeguards are in place. The criteria is to follow standards in research method and ethics, as well as to aim societal benefit rather than serving private interests in research. The safeguards are a multitude and include: * data collection with informed consent under ethical oversight and accountability; @@ -117,10 +117,10 @@ Record your data processing. To meet GDPR's accountability requirement you shou ### Solution - * [EU General Data Protection Regulation](https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32016R0679&from=EN). * [European Data Protection Supervisor's "Preliminary opinion on Data Protection and Scientific Research"](https://edps.europa.eu/sites/edp/files/publication/20-01-06_opinion_research_en.pdf) - * [BBMRI-ERIC's Ethical Legal Societal Issues (ELSI) Knowledge Base](https://www.bbmri-eric.eu/elsi/knowledge-base/) contains a glossary, agreement templates and guidance. - * [Data Information System DAISY](https://daisy-demo.elixir-luxembourg.org/) is software tool from ELIXIR that allows the record keeping of data processing activities in research projects. - * [DAWID](https://dawid.elixir-luxembourg.org) is a software tool from ELIXIR that allows generation of tailor-made data sharing agreements - * [Tryggve ELSI Checklist](https://scilifelab-data-guidelines.readthedocs.io/en/latest/docs/general/sensitive_data.html) is a list of Ethical, Legal, and Societal Implications (ELSI) to consider for research projects on human subjects. + * {% tool "bbmri-eric-s-elsi-knowledge-base" %} contains a glossary, agreement templates and guidance. + * {% tool "daisy" %} and {% tool "erpa" %} are software tools from ELIXIR that allows the record keeping of data processing activities in research projects. + * {% tool "dawid" %} is a software tool from ELIXIR that allows generation of tailor-made data sharing agreements + * {% tool "dpia-knowledge-model" %} is designed to leverage {% tool "data-stewardship-wizard" %} to perform DPIA. + * {% tool "tryggve-elsi-checklist" %} is a list of Ethical, Legal, and Societal Implications (ELSI) to consider for research projects on human subjects. diff --git a/pages/your_tasks/data_provenance.md b/pages/your_tasks/data_provenance.md index 245fecaa9..3e0babe69 100644 --- a/pages/your_tasks/data_provenance.md +++ b/pages/your_tasks/data_provenance.md @@ -30,7 +30,7 @@ However, knowing what’s the best way to document provenance can be challenging - Provence information can be recorded: - as free text and unstructured information (mainly readable for humans, not for machines/software), describing data collection and processing method. - according to metadata schemas or standards, that can be generic (e.g. Dublin Core) or discipline specific such as [ISO19115-2](https://www.iso.org/standard/67039.html). - - according to Provenance Data Model ([PROV-DM](https://www.w3.org/TR/prov-dm/)) and ontology ([PROV-O](https://www.w3.org/TR/prov-o/)). + - according to Provenance Data Model ({% tool "prov-dm-the-prov-data-model" %}) and ontology ([PROV-O](https://www.w3.org/TR/prov-o/)). - As for documentation and metadata, the medium to capture provenance information can also varies. Provenance trails can be captured - in text files or spreadsheets - in registries or databases @@ -41,10 +41,10 @@ However, knowing what’s the best way to document provenance can be challenging ### Solutions -- Record provenance according to schemas or defined profiles. These can be generic or domain-specific, and can be found in [RDA Metadata Standards Catalog](https://rdamsc.bath.ac.uk) or [FAIRsharing](https://fairsharing.org/search?fairsharingRegistry=Standard). Use metadata schemas containing provenance information in your README file and in any kind of data documentation and metadata file. Best practices for documentation and metadata, and data organisation should be applied for provenance file as well. +- Record provenance according to schemas or defined profiles. These can be generic or domain-specific, and can be found in {% tool "rda-standards" %} or {% tool "fairsharing" %}. Use metadata schemas containing provenance information in your README file and in any kind of data documentation and metadata file. Best practices for documentation and metadata, and data organisation should be applied for provenance file as well. - Implement serialisation specification of the [PROV-MODEL](https://www.w3.org/TR/prov-overview/) in your data management tools to record provenance in machine-actionable format (RDF, Linked data, owl, xml, etc.). - Use [RO-Crate specifications](https://www.researchobject.org/ro-crate/1.1/provenance.html) and/or specific profiles for provenance (e.g., [RO-Crate profiles to capture the provenance of workflow runs](https://www.researchobject.org/workflow-run-crate/)). - Make use of tools and software that help you record provenance in a manual or an automated way. Use: - Electronic Data Capture (EDC) systems, Laboratory Information Management Systems (LIMS) or similar tools. - - Workflow management systems (such as Kepler, Galaxy, Taverna, VisTrails); provenance information embedded in such software or tools are usually available to users of the same tool or can be exported as separated file in several formats, such as [RO-Crate](https://www.researchobject.org/ro-crate/). - - Registries such as [WorkflowHub](https://workflowhub.eu). + - Workflow management systems (such as Kepler, {% tool "galaxy" %}, Taverna, VisTrails); provenance information embedded in such software or tools are usually available to users of the same tool or can be exported as separated file in several formats, such as {% tool "research-object-crate" %}. + - Registries such as {% tool "workflowhub" %}. diff --git a/pages/your_tasks/data_publication.md b/pages/your_tasks/data_publication.md index a242b0da3..febf2bde4 100644 --- a/pages/your_tasks/data_publication.md +++ b/pages/your_tasks/data_publication.md @@ -31,7 +31,7 @@ Sometimes it is difficult to determine if publishing data you have at hand is th * Registered access or with authentication procedure. * Controlled access or via Data Access Committees (DACs). * Decide what [licence](licensing) should be applied to your metadata and data. -* Certain repositories offer solutions for depositing data that need to be under restricted access. This allows for data to be findable even when it can not be published openly. One example is the [The European Genome-phenome Archive (EGA)](https://ega-archive.org/) that can be used to deposit potentially identifiable genetic and phenotypic human data. +* Certain repositories offer solutions for depositing data that need to be under restricted access. This allows for data to be findable even when it can not be published openly. One example is the {% tool "the-european-genome-phenome-archive" %} that can be used to deposit potentially identifiable genetic and phenotypic human data. * Many repositories provide the option to put an embargo on a deposited dataset. This might be useful if you prefer to use the data in a publication before making it available for others to use. * Establish an agreement outlining the controllership of the data and each collaborators' rights and responsibilities. * Even if the data cannot be published, it is good practice to publish the metadata of your datasets. @@ -58,13 +58,13 @@ The most suitable repository will depend on the data type and your discipline. * The [licence](licensing) that should be applied to your metadata and data. * Check if/what discipline-specific repositories can apply the necessary access conditions and licences to your (meta)data. * Discipline-specific repositories: if a discipline-specific repository, recognised by the community, exists this should be your first choice since discipline-specific repositories often increases the FAIRness of the data. - * The [EMBL-EBI's data submission wizard](https://www.ebi.ac.uk/submission/) can help you choose a suitable repository based on your data type. + * The {% tool "embl-ebi-s-data-submission-wizard" %} can help you choose a suitable repository based on your data type. * There are lists of discipline-specific, community-recognised repositories e.g.: - * [ELIXIR Deposition Databases](https://elixir-europe.org/platforms/data/elixir-deposition-databases) + * {% tool "elixir-deposition-databases-for-biomolecular-data" %} * [Scientific Data journal's recommended repositories](https://www.nature.com/sdata/policies/repositories) * General-purpose and institutional repositories: For other cases, a repository that accepts data of different types and disciplines should be considered. It could be a [general-purpose repository](https://www.nature.com/sdata/policies/repositories#general) or a centralised repository provided by your institution or university. -* [re3data.org](https://www.re3data.org) or [Repository Finder](https://repositoryfinder.datacite.org) gather information about existing repositories and allows you to filter them based on access and licence types. -* [re3data.org](https://www.re3data.org) and [FAIRsharing](https://fairsharing.org) websites gather features of repositories, which you can filter by discipline, data type, taxonomy and many other features. +* {% tool "re3data" %} or {% tool "repository-finder" %} gather information about existing repositories and allows you to filter them based on access and licence types. +* {% tool "re3data" %} and {% tool "fairsharing" %} websites gather features of repositories, which you can filter by discipline, data type, taxonomy and many other features. ## How do you prepare your data for publication in data repositories? @@ -88,9 +88,9 @@ Once you have decided where to publish your data, you will have to make your (me * Costs for sharing and storing data * Repositories generally have information about data formats, metadata requirements and how data can be uploaded under a section called "submit", "submit data", "for submitters" or something similar. Read this section in detail. * To ascertain re-usability data should be released with a clear and accessible data usage [licence](licensing). We suggest making your data available under licences that permit free reuse of data, e.g. a Creative Commons licence, such as CC0 or CC-BY. - * Note that every repository can have one default licence for all datasets. For instance, sequence data submitted to for example European Nucleotide Archive [ENA](https://www.ebi.ac.uk/ena/browser/home) are implicitly free to reuse by others as specified in the [INSDC standards and policies](https://www.ebi.ac.uk/ena/browser/about/policies). + * Note that every repository can have one default licence for all datasets. For instance, sequence data submitted to for example {% tool "european-nucleotide-archive" %} are implicitly free to reuse by others as specified in the {% tool "international-nucleotide-sequence-database-collaboration" %}. * See the corresponding pages for more detailed information about [metadata](metadata_management), [licences](licensing) and [data transfer](data_transfer). - * There are many tools available to remove human reads from your non-human data, e.g. [Metagen-FastQC](https://github.com/alakob/Metagen-FastQC-Docker) + * There are many tools available to remove human reads from your non-human data, e.g. {% tool "metagen-fastqc" %} ## How do you update or delete a published entry from a data repository? @@ -103,4 +103,4 @@ You will sometimes need to update or delete some entries that were incomplete or * Does the repository have a test-server where data can be submitted for testing purpose? ### Solutions -Solutions are very much repository-dependent. For example, on the [ENA](https://www.ebi.ac.uk/ena/browser/home), entries can be easily updated using a CLI. However, the updated information is not automatically redistributed to other registries linked to ENA. Upon email request, entries may also be suppressed from public view. Note that ENA also has a test server to make test submissions before submitting to the actual production server, which can be very useful when sending large batches of data to test for any systematic errors. Please check these points with your repository of choice. +Solutions are very much repository-dependent. For example, on the {% tool "european-nucleotide-archive" %}, entries can be easily updated using a CLI. However, the updated information is not automatically redistributed to other registries linked to ENA. Upon email request, entries may also be suppressed from public view. Note that ENA also has a test server to make test submissions before submitting to the actual production server, which can be very useful when sending large batches of data to test for any systematic errors. Please check these points with your repository of choice. diff --git a/pages/your_tasks/data_quality.md b/pages/your_tasks/data_quality.md index cb82cf73c..2f303dc75 100644 --- a/pages/your_tasks/data_quality.md +++ b/pages/your_tasks/data_quality.md @@ -47,16 +47,16 @@ There are many frameworks proposed in the literature to define and evaluate over * verification (focusing on the intrinsic consistency, such as adherence to a format or specified value range); * validation (focusing on the alignment of values with respect to external benchmarks). -For health data, a nice example of working out what data quality means can be found in the [OHDSI](https://www.ohdsi.org/) community. The context in this case is observational healthcare data represented in the [OMOP Common Data Model](https://ohdsi.github.io/CommonDataModel/). +For health data, a nice example of working out what data quality means can be found in the {% tool "ohdsi" %} community. The context in this case is observational healthcare data represented in the {% tool "omop-cdm" %}. ### Solutions -* Electronic data capturing system: [REDCap](https://www.project-redcap.org) allows you to design electronic data capture forms and allows you to monitor the quality of data collected via those forms. +* Electronic data capturing system: {% tool "redcap" %} allows you to design electronic data capture forms and allows you to monitor the quality of data collected via those forms. * An example of [data dictionary](https://webdav-r3lab.uni.lu/public/elixir/templates/Data_dictionary_example.xlsx) illustrating the elements and factors that should be defined for the variable needed by data collection. * The World Bank provides [quality assurance guidance](https://dimewiki.worldbank.org/wiki/Data_Quality_Assurance_Plan) for survey design and execution. * The U.S. National Institute's of Health's provides [introductory training material](https://oir.nih.gov/sites/default/files/uploads/sourcebook/documents/ethical_conduct/data_quality_management-2015_05_15.pdf) on data quality. * Bio.tools' listing for [computational tools and pipelines for data quality control in life sciences](https://bio.tools/t?page=1&q=quality&sort=score). * Data integration tools that include pre-defined building blocks to monitor and check data quality, such as [Pentaho Community Edition (CE)](https://wiki.pentaho.com/display/COM/Community+Edition+Downloads?desktop=true¯oName=ul), [Talend Open Studio](https://sourceforge.net/projects/talend-studio/). -* Data curation tools such as [OpenRefine](https://openrefine.org/) that help you to identify quality issues, correct (curate) them, carry out transformations in the collected data with easy-to-use graphic interface and visualisation. It also documents all the steps during the curation for reproducibility and backtracking. +* Data curation tools such as {% tool "openrefine" %} that help you to identify quality issues, correct (curate) them, carry out transformations in the collected data with easy-to-use graphic interface and visualisation. It also documents all the steps during the curation for reproducibility and backtracking. -* For heath data, the [Book of OHDSI](http://book.ohdsi.org) has several [chapters](https://ohdsi.github.io/TheBookOfOhdsi/EvidenceQuality.html) on methods for assessing the data quality of observational health datasets, split out by data quality, clinical validity, software validity and method validity. Frameworks proposed in the literature, to define and evaluate overall data quality, could be used to create computational representations of the data quality of a dataset. [OHDSI DataQualityDashboard](https://github.com/OHDSI/DataQualityDashboard), which leverages the Kahn framework referenced above (adapted from [original thehyve.nl blogpost](https://www.thehyve.nl/articles/fair-data-for-machine-learning)), is a software framework for assessing the quality and suitability of routinely generated healthcare data that is represented in the [OMOP Common Data Model](https://ohdsi.github.io/CommonDataModel/). +* For heath data, the [Book of OHDSI](http://book.ohdsi.org) has several [chapters](https://ohdsi.github.io/TheBookOfOhdsi/EvidenceQuality.html) on methods for assessing the data quality of observational health datasets, split out by data quality, clinical validity, software validity and method validity. Frameworks proposed in the literature, to define and evaluate overall data quality, could be used to create computational representations of the data quality of a dataset. [OHDSI DataQualityDashboard](https://github.com/OHDSI/DataQualityDashboard), which leverages the Kahn framework referenced above (adapted from [original thehyve.nl blogpost](https://www.thehyve.nl/articles/fair-data-for-machine-learning)), is a software framework for assessing the quality and suitability of routinely generated healthcare data that is represented in the {% tool "omop-cdm" %}. diff --git a/pages/your_tasks/data_transfer.md b/pages/your_tasks/data_transfer.md index 94452c111..f9c2e749e 100644 --- a/pages/your_tasks/data_transfer.md +++ b/pages/your_tasks/data_transfer.md @@ -60,30 +60,30 @@ Preferable transfer channel depends on the volume of your data and number of fil * gzip: since tar does not compress the archive created, a compression tool such as gzip is often used to reduce the size of the archive. * Ask the IT team of your institution or organisation about available services for data transfer. Usually, for **small data volume or limited number of files** universities and professional organisations can provide: - * Secure server- or cloud-based applications where you should store work-related data files, synchronize files from different computers and share files by sending a link for access or download. This solution is ideal in case of a small number of files, since files need to be downloaded one by one and this can be inconvenient. Examples of these kinds of applications are NextCloud, Box, ownCloud (see [Data storage page](storage)). - * Access to Office 365 (Software as a Service, or SaaS) that includes cloud storage on OneDrive, and SharePoint for collaborations and files sharing - you can “transfer” your data with these services by generating and sending a link for access or download of specific files. + * Secure server- or cloud-based applications where you should store work-related data files, synchronize files from different computers and share files by sending a link for access or download. This solution is ideal in case of a small number of files, since files need to be downloaded one by one and this can be inconvenient. Examples of these kinds of applications are NextCloud, {% tool "box" %}, {% tool "owncloud" %} (see [Data storage page](storage)). + * Access to Office 365 (Software as a Service, or SaaS) that includes cloud storage on {% tool "microsoft-onedrive" %}, and SharePoint for collaborations and files sharing - you can “transfer” your data with these services by generating and sending a link for access or download of specific files. -* Usually, universities and institutions strongly **discourage** the use of personal accounts on Google Drive, Amazon Drive, Dropbox and similar, to share and transfer work related data, and especially sensitive or personal data. Moreover, it is not allowed to store human data in clouds which are not hosted in the EU. +* Usually, universities and institutions strongly **discourage** the use of personal accounts on {% tool "google-drive" %}, Amazon Drive, {% tool "dropbox" %} and similar, to share and transfer work related data, and especially sensitive or personal data. Moreover, it is not allowed to store human data in clouds which are not hosted in the EU. -* Institutions and professional organisations could also make use of Infrastructure as a Service (IaaS), such as Microsoft Windows Azure, Amazon Web Services (Amazon Simple Storage Service or S3), Oracle Cloud Infrastructure or Google Cloud Platform. +* Institutions and professional organisations could also make use of Infrastructure as a Service (IaaS), such as {% tool "microsoft-azure" %}, {% tool "amazon-web-services" %} (Amazon Simple Storage Service or S3), Oracle Cloud Infrastructure or Google Cloud Platform. * A useful [comparison of cloud-computing software and providers](https://en.wikipedia.org/wiki/Cloud-computing_comparison ) is on Wikipedia. Cloud-computing infrastructures, services and platforms offer a variety of file hosting services; a [comparison of file hosting services](https://en.wikipedia.org/wiki/Comparison_of_file_hosting_services ) is available on Wikipedia. -* If you are considering transferring data from or to cloud-based services (Microsoft Azure or Amazon S3) by shipping hard disks through carrier services, it is useful to know that services such as Amazon Snowball and Azure Data Box Disk will help you with the shipping of hard disks or appliances through carrier services. +* If you are considering transferring data from or to cloud-based services ({% tool "microsoft-azure" %} or Amazon S3) by shipping hard disks through carrier services, it is useful to know that services such as Amazon Snowball and Azure Data Box Disk will help you with the shipping of hard disks or appliances through carrier services. * Countries could provide national file sender services (browser based or other) which could be useful for one time transfer of data files, limited in number and volume (for instance, up to 100 GB or 250 GB), from person to person. Importantly, an academic account is usually needed to use these kinds of services, therefore contact the IT team in your institute for more information. * If you have the technical skills and the knowledge, you can use the most common data transfer protocols. These protocols are useful for data volume **larger than 50GB or for hundreds of data files**. - * Applications suitable for small to mid size data available on any operating system and that can be used either through command-line (directly or with tools like [cURL](https://curl.se)) or through a graphical interface, are: + * Applications suitable for small to mid size data available on any operating system and that can be used either through command-line (directly or with tools like {% tool "curl" %}) or through a graphical interface, are: * FTP (File Transfer Protocol) will transfer files between a client and an FTP server, which will require an account in order to transfer the files. - * Be sure to use a **secure** version of this protocol, such as FTPS or SFTP (SSH File Transfer Protocol). A possible tool with graphical interface is [FileZilla](https://filezilla-project.org). + * Be sure to use a **secure** version of this protocol, such as FTPS or SFTP (SSH File Transfer Protocol). A possible tool with graphical interface is {% tool "filezilla" %}. * HTTP (HyperText Transfer Protocol). * Rsync (remote synchronization) can be used to transfer files between two computers and to keep the files synchronized between these two computers. - * SCP (secure copy protocol) will securely transfer files between a client and a server. It will require an account on the server and can use SSH key based authentication. A possible tool with graphical interface is [WinSCP](https://winscp.net/eng/index.php). + * SCP (secure copy protocol) will securely transfer files between a client and a server. It will require an account on the server and can use SSH key based authentication. A possible tool with graphical interface is {% tool "winscp" %}. * For massive amounts of data, additional protocols have been developed, parallelizing the flow of data. These transfer solutions require specific tools and as such they are available mostly on large computational centres. - * FASP protocol implemented in [IBM Aspera tool](https://www.ibm.com/products/aspera). - * GridFTP protocol used by [Globus tool](https://www.globus.org). + * FASP protocol implemented in {% tool "ibm-aspera" %}. + * GridFTP protocol used by {% tool "globus" %}. * Several algorithms can be used for checksum calculation. * MD5 checksums can be generated and verified in command line of all operational systems or throught tools with a graphical interface, e.g. [MD5Summer](http://www.md5summer.org/) for Windows. diff --git a/pages/your_tasks/dm_coordination.md b/pages/your_tasks/dm_coordination.md index d7d27aaa5..2d9492a3c 100644 --- a/pages/your_tasks/dm_coordination.md +++ b/pages/your_tasks/dm_coordination.md @@ -40,6 +40,7 @@ Here, we provide some advice and methods to help consortia with data management * If applicable, each partner (actually, its representative) should consult with personal data legislation experts (e.g. GDPR or equivalent), Data Protection Officers (DPOs) and the legal office of the institution (e.g. for technology transfer) to reach a consensus at the consortium level about data protection, availability and open science. * It is recommended to discuss as early as possible the licensing and the intellectual property (IP) rights of project outcomes (datasets, software, tools, etc.), in order to comply with open science requirements and to avoid legal issues later on. * Discuss a common plan for the sharing of data, [documentation and metadata](metadata_management) between partners. +* Tools such as {% tool "fair-implementation-profile" %} and {% tool "fip-wizard" %} could be used to explicitly declare FAIR Implementation Profiles. ## How to execute data management in collaborative projects? @@ -50,7 +51,7 @@ Once the project is awarded, the data management plan needs to be executed throu * Some data management challenges may not have been foreseen at the pre-award stage. * Possible difficulties during the execution of the DMP by individual partners may arise. Not all partners have the same skills and resources. * Possible problems with data exchange between partners can resurface during the project. -* Consider contacting and establishing collaborations for depositing data with key repositories, e.g. setting up an [ENA Compare Data Hub](https://github.com/nadimm-rahman/ena-datahub-setup)for depositing sequence data at the European Nucleotide Archive. +* Consider contacting and establishing collaborations for depositing data with key repositories, e.g. setting up an {% tool "ena-compare-data-hubs" %} for depositing sequence data at the {% tool "european-nucleotide-archive" %}. ### Solutions * The DMWG should have regular meetings to find appropriate solutions to arising data management issues. diff --git a/pages/your_tasks/existing_data.md b/pages/your_tasks/existing_data.md index a142eca6f..493316e68 100644 --- a/pages/your_tasks/existing_data.md +++ b/pages/your_tasks/existing_data.md @@ -33,14 +33,16 @@ Many datasets could exist that you can reuse for your project. Even if you know * Search for research communities in the field, and find out whether they have policies for data submission that mention data repositories. For instance, [ELIXIR communities in Life Sciences](https://elixir-europe.org/communities). * Locate the primary journals in the field, and find out what data repositories they endorse. - * Journal websites will have a “Submitter Guide”, where you’ll find lists of recommended deposition databases per discipline, or generalist repositories. For instance, [Scientific Data's Recommended Repositories]( https://www.nature.com/sdata/policies/repositories). - * You can also find the databases supported by a journal through the policy interface of [FAIRsharing](https://fairsharing.org/policies/). + * Journal websites will have a “Submitter Guide”, where you’ll find lists of recommended deposition databases per discipline, or generalist repositories. For instance, {% tool "scientific-data-s-recommended-repositories" %}. + * You can also find the databases supported by a journal through the policy interface of {% tool "fairsharing" %}. * Search registries for suitable data repositories. - * [FAIRsharing](https://fairsharing.org) is an ELIXIR resource listing repositories. - * [Re3data](https://www.re3data.org) lists repositories from all fields of science. - * [Google Dataset Search](https://datasetsearch.research.google.com) or [DataCite](https://search.datacite.org) for localization of datasets. - * The [Omics Discovery Index (OmicsDI)](https://www.omicsdi.org) provides a knowledge discovery framework across heterogeneous omics data (genomics, proteomics, transcriptomics and metabolomics). + * {% tool "fairsharing" %} is an ELIXIR resource listing repositories. + * {% tool "re3data" %} lists repositories from all fields of science. + * {% tool "google-dataset-search" %} or {% tool "datacite" %} for localization of datasets. + * The {% tool "omicsdi" %} provides a knowledge discovery framework across heterogeneous omics data (genomics, proteomics, transcriptomics and metabolomics). + * The {% tool "elixir-core-data-resources" %} list of knowledge resources recommended by ELIXIR. + * {% tool "openaire-explore" %} provides linked open research datasets. * Search through all repositories you found to identify what you could use. Give priority to curated repositories. @@ -70,7 +72,7 @@ When you find data of interest, you should first check if the quality is good an * Verify the quality of the data. Some repositories have quality indicators, such as: * Star system indicating level of curation, e.g. for manually curated/non-curated entries. - * [Evidence & Conclusion Ontology](https://evidenceontology.org). + * {% tool "evidence-and-conclusion-ontology" %}. * Detailed quality assessment methods. For instance, PDB has several [structure quality assessment metrics](https://validate.wwpdb.org/). * If metadata is available, check the quality of metadata. For instance, information about experimental setup, sample preparation, data analysis/processing can be necessary to reuse the data and reproduce the experiments. diff --git a/pages/your_tasks/identifiers.md b/pages/your_tasks/identifiers.md index 9c19bb9c1..928e203e0 100644 --- a/pages/your_tasks/identifiers.md +++ b/pages/your_tasks/identifiers.md @@ -32,14 +32,14 @@ If the research institute or group has a centralised and structured system (such ### Considerations -* At the beginning of your research project, check if your institute or research group has a centralised database where data must be entered during data collection. Usually, large and international research projects, industries, research institutes or hospitals have a centralised electronic database, an Electronic Data Capture (EDC) system, a Laboratory Information Management System (LIMS) or an Electronic Lab Notebook (ELN) with an user interface for data entry. More details about using ELNs are given by e.g. [University of Cambridge - Electronic Research Notebook Products](https://www.data.cam.ac.uk/data-management-guide/electronic-research-notebooks) and [Harvard Medical School - Electronic Lab Notebooks](https://datamanagement.hms.harvard.edu/analyze/electronic-lab-notebooks). +* At the beginning of your research project, check if your institute or research group has a centralised database where data must be entered during data collection. Usually, large and international research projects, industries, research institutes or hospitals have a centralised electronic database, an Electronic Data Capture (EDC) system, a Laboratory Information Management System (LIMS) or an Electronic Lab Notebook (ELN) with an user interface for data entry. More details about using ELNs are given by e.g. {% tool "university-of-cambridge-electronic-research-notebook-products" %} and {% tool "harvard-medical-school-electronic-lab-notebooks" %}. * If you can choose how to manage your data entry system, consider what’s the level of exposure of the identifier for each record or observation in the dataset. Define the context in which the identifier should be used and is unique. This is a key aspect to define what kind of identifier for each individual record is appropriate in your case. * Should the identifier of a record or observation be unique within your spreadsheet, your entire research project files or across the whole institute? What’s the reference system (or “target audience") of your identifier? * Will your reference system change in due time? If it will be opened up later, assigning globally unique identifiers from the beginning may be saving time. * Will the identifiers for individual records or observations be made openly accessible on the internet, during data collection? * If the identifier of an individual record or observation should be unique only within your research group (within an intranet), and it will not be available on the internet, it can be considered an “internal or [local identifier](https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.2001414#pbio-2001414-g001)”. A local identifier is unique only in a specific local context (e.g. single collection or dataset). * Local identifiers can be applied not only for individual records or observations in a tabular dataset but also for each variable or even value ([columns and cells in a tabular dataset](https://datacarpentry.org/spreadsheet-ecology-lesson/01-format-data/index.html), respectively). -* Identifiers for an individual record, variable and value in a dataset can be assigned by using ontology terms (see [metadata page](metadata_management#how-do-you-find-appropriate-vocabularies-or-ontologies)) or accession numbers provided by public databases such as, [EBI](https://www.ebi.ac.uk/services/all) and [NCBI](https://www.ncbi.nlm.nih.gov/guide/all/) repositories. Here there are few examples for tabular (meta)data, but the same type of identifiers can be applied independently of the (meta)data structure and format. +* Identifiers for an individual record, variable and value in a dataset can be assigned by using ontology terms (see [metadata page](metadata_management#how-do-you-find-appropriate-vocabularies-or-ontologies)) or accession numbers provided by public databases such as, [EBI](https://www.ebi.ac.uk/services/all) and {% tool "national-center-for-biotechnology-information" %} repositories. Here there are few examples for tabular (meta)data, but the same type of identifiers can be applied independently of the (meta)data structure and format. * The patient ID is in its own row, a column header is the variable “[disease](http://www.ebi.ac.uk/efo/EFO_0000408)” from the EFO ontology (ID EFO:0000408), and the value in the cell is the child term “[chronic fatigue syndrome](http://www.ebi.ac.uk/efo/EFO_0004540)” (ID EFO:0004540) of “disease”. * The specimen ID is in its own row, a column header is the variable “Ensembl gene ID” from the Ensembl genome browser and the value in the cell is the identifier for [BRCA1](https://www.ensembl.org/Homo_sapiens/Gene/Summary?g=ENSG00000012048;r=17:43044295-43170245) gene ENSG00000012048. @@ -50,15 +50,15 @@ If the research institute or group has a centralised and structured system (such * Avoid embedding meaning into your local identifier. If you need to convey meaning in a short name implement a “label” for human readability only ([Lesson 4. Avoid embedding meaning or relying on it for uniqueness](https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.2001414#pbio-2001414-g001)). * Do not use problematic characters and patterns into your local identifier ([Lesson 5. Avoid embedding meaning or relying on it for uniqueness](https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.2001414#pbio-2001414-g001)). Problematic strings can be misinterpreted by some software. In this case it is better to fix the bugs or explicitly declare this possible issue in documentation. * Ontology terms or accession numbers provided by public databases, such as EBI and NCBI repositories, can be applied to uniquely identify genes, proteins, chemical compounds, diseases, species, etc. Choose exactly one for each type in order to be the most interoperable with yourself. Identifiers for molecules, assigned by EBI and NCBI repositories, keep track of relations between identifiers (for instance, different versions of a molecule). You can also submit your newly identified molecules to EBI or NCBI repositories to get a unique identifier. -* Applying ontologies to variables keeps clear structure and relations between variables (i.e.,"compound & dose", "variable & unit") . Software that allow you to integrate ontology terms into a spreadsheet are: [RightField](https://rightfield.org.uk) and [OntoMaton](https://github.com/ISA-tools/OntoMaton#readme). +* Applying ontologies to variables keeps clear structure and relations between variables (i.e.,"compound & dose", "variable & unit") . Software that allow you to integrate ontology terms into a spreadsheet are: {% tool "rightfield" %} and {% tool "onotomaton" %}. * If you keep track of each record in a tabular format that gets new rows every day, use a versioning system to track the changes. Many cloud [storage](storage#what-features-do-you-need-in-a-storage-solution-when-collecting-data) services offer automatic versioning, or keep a versioning log (see [data organisation page](data_organisation#how-do-you-manage-file-versioning)). Some parts of the tabular (meta)data file must be stable to be useful: do not delete nor duplicate essential columns. Generate documentation about your tabular (meta)data file (README file, Codebook, etc..). * If you collect data from a database that is frequently updated (dynamic or evolving database), it is recommended to keep track not only of the database ID, but also of the used version (by timestamp, or by recording date and time of data collection) and of the exact queries that you performed. In this way, the exact queries can be re-executed against the timestamped data store ([Data citation of evolving data](https://zenodo.org/record/1406002#.YHXAVS0Rrs1)). * If you reuse an existing dataset, keep the provided identifier for provenance and give a new identifier according to your system, but preserve the relation with the original identifier to be able to trace back to the source. Use a spreadsheet or create a mapping file to keep the relation between provenance and internal identifier. * To set up a centralised machine readable database, an EDC, a LIMS or an ELN for large research projects or institutes (available on intranet), highly specialised technical skills in databases, programming and computer science might be needed. We encourage you to talk to the IT team or experts in the field to find software and tools to implement such a system. * Software to make a machine-readable system for databases and data collection are available. Their interfaces are quite user friendly but command-line skills might be needed depending on the kind of use that you need. - * [MOLGENIS](https://molgenis.gitbooks.io/molgenis/content/) is a modular web application for scientific data. MOLGENIS was born from molecular genetics research but has grown to be used in many scientific areas such as biobanking, rare disease research, patient registries and even energy research. MOLGENIS provides researchers with user friendly and scalable software infrastructures to capture, exchange, and exploit the large amounts of data that is being produced by scientific organisations all around the world. - * [Castor](https://www.castoredc.com) is an EDC system for researchers and institutions. With Castor, you can create and customize your own database in no time. Without any prior technical knowledge, you can build a study in just a few clicks using an intuitive Form Builder. Simply define your data points and start collecting high quality data, all you need is a web browser. - * [REDCap](https://projectredcap.org) is a secure web application for building and managing online surveys and databases. While REDCap can be used to collect virtually any type of data in any environment, it is specifically geared to support online and offline data capture for research studies and operations. + * {% tool "molgenis" %} is a modular web application for scientific data. MOLGENIS was born from molecular genetics research but has grown to be used in many scientific areas such as biobanking, rare disease research, patient registries and even energy research. MOLGENIS provides researchers with user friendly and scalable software infrastructures to capture, exchange, and exploit the large amounts of data that is being produced by scientific organisations all around the world. + * {% tool "castor" %} is an EDC system for researchers and institutions. With Castor, you can create and customize your own database in no time. Without any prior technical knowledge, you can build a study in just a few clicks using an intuitive Form Builder. Simply define your data points and start collecting high quality data, all you need is a web browser. + * {% tool "redcap" %} is a secure web application for building and managing online surveys and databases. While REDCap can be used to collect virtually any type of data in any environment, it is specifically geared to support online and offline data capture for research studies and operations. * We don’t encourage setting up a centralised electronic database that will be exposed to the internet, unless really necessary. We encourage you to use existing and professional deposition databases to publish and share your datasets (see below). @@ -79,6 +79,6 @@ There are different ways to obtain a globally unique persistent identifier, and ### Solutions -* If you want to publish your data into an existing public repository, please first see our [data publication page](data_publication). The repository will provide globally unique persistent identifiers for your data. Check their guidelines if you need to edit or update your dataset after publication. Generic repositories (such as [Zenodo](https://zenodo.org/) and [Figshare](https://figshare.com/)) use versioning DOI to update a public dataset or document. -* If you want to publish your data in an institutional public repository, ask the institution to obtain a namespace at [identifiers.org](http://identifiers.org) in order to obtain globally unique persistent identifiers for your data. -* If you have the resources and skills to open up your database to the public, obtain a namespace at [identifiers.org](http://identifiers.org) in order to acquire globally unique persistent identifiers for your data. +* If you want to publish your data into an existing public repository, please first see our [data publication page](data_publication). The repository will provide globally unique persistent identifiers for your data. Check their guidelines if you need to edit or update your dataset after publication. Generic repositories (such as {% tool "zenodo" %} and {% tool "figshare" %}) use versioning DOI to update a public dataset or document. +* If you want to publish your data in an institutional public repository, ask the institution to obtain a namespace at {% tool "identifiers-org" %} in order to obtain globally unique persistent identifiers for your data. +* If you have the resources and skills to open up your database to the public, obtain a namespace at {% tool "identifiers-org" %} in order to acquire globally unique persistent identifiers for your data. diff --git a/pages/your_tasks/licensing.md b/pages/your_tasks/licensing.md index 4257585a1..6fc4ef3c6 100644 --- a/pages/your_tasks/licensing.md +++ b/pages/your_tasks/licensing.md @@ -56,18 +56,18 @@ What licence you should apply to your research data depends on what rights prote * If possible, choose and apply the least restrictive licence to ensure the widest possible reuse. * Remember that if you publish your data in a data repository of your choice, a licence agreement will be applied to your data. - * Repositories can be selected based on data licence and sharing policy by using [re3data.org](https://www.re3data.org/). + * Repositories can be selected based on data licence and sharing policy by using {% tool "re3data" %}. * ELIXIR data resources ideally have terms of use or a licence that enables the reuse and remixing of data. * Remember that the rights granted in a licence cannot be revoked once it has been applied. ### Solutions * Apply to your data one of the [recommended licenses conformant to the Open Definition](https://opendefinition.org/licenses/), so that your data can be shared and reused. The [Open Definition](https://opendefinition.org/) sets out principles that define the meaning of "open" in relation to data and content. * [Creative Commons licenses](https://creativecommons.org/licenses/) are the best known open data licences and are available in human-readable and machine-readable forms, with different levels of permissions. - * [Creative Commons License Chooser](https://creativecommons.org/choose/) helps you choose the right Creative Commons licence for your needs. + * {% tool "creative-commons-license-chooser" %} helps you choose the right Creative Commons licence for your needs. * The [video tutorial from Kingsborough E-Learning](https://www.youtube.com/watch?v=5QxkuuiZwRU) shows how to add a Creative Commons licence to your work in practice. * The following tools helps you find the right licence for your software and data. - * [EUDAT licence selector wizard](https://ufal.github.io/public-license-selector/). - * [Choose a license](https://choosealicense.com) is an online guide provided by GitHub to help you choose a license for open-source projects. - * [data.world](https://help.data.world/hc/en-us/articles/115006114287-Common-license-types-for-datasets) provides list of common license types for datasets. + * {% tool "eudat-licence-selector-wizard" %}. + * {% tool "choose-a-license" %} is an online guide provided by GitHub to help you choose a license for open-source projects. + * {% tool "data-world-data-license-list" %} provides list of common license types for datasets. * If your research data is a database or a dataset, consider putting it in the public domain by using the [Creative Commons CC0](https://creativecommons.org/share-your-work/public-domain/cc0) tool. CC0 let you waive all your rights to the work ("No Rights Reserved"). diff --git a/pages/your_tasks/machine_actionability.md b/pages/your_tasks/machine_actionability.md index 0150e272a..6932c76cc 100644 --- a/pages/your_tasks/machine_actionability.md +++ b/pages/your_tasks/machine_actionability.md @@ -88,7 +88,7 @@ Similarly, funders and institutions ask researchers to make their (meta)data FAI * Look through numerous (shared) folders to find the documentation about a specific experiment done by previous colleagues that generated the dataset you are interested in. * Read all publications about a topic and check if there is a dataset linked to it and/or available upon request. -* Integration of multiple datasets can be straightforward only if each dataset can be easily queried, processed and formatted via software/programmes that can properly handle structured and big (meta)data files, such as [OpenRefine](https://openrefine.org/#) and programming languages such as Python or R. Otherwise, manual data integration and processing can be very slow and error-prone. +* Integration of multiple datasets can be straightforward only if each dataset can be easily queried, processed and formatted via software/programmes that can properly handle structured and big (meta)data files, such as {% tool "openrefine" %} and programming languages such as Python or R. Otherwise, manual data integration and processing can be very slow and error-prone. ### Advantages @@ -100,11 +100,11 @@ The advantages of having machine-actionable data and metadata are numerous for a By providing structured metadata and data to a database that follows standards (metadata schemas, ontologies, file formats, programmatic access, etc.), at the level of each recorded value or observation, researchers: * Could more easily query and filter (meta)data based on specific variables, experimental conditions, biological sources and many other parameters, based on the capabilities of the used ELN or data management software. * Can more easily find and reproduce experiments performed in the past by others in literature or in databases e.g. by using [Europe PMC](https://europepmc.org/) and [EBI Search](https://www.ebi.ac.uk/ebisearch/overview.ebi/about). -* Can easily integrate data from multiple datasets and studies, sharing the same experimental conditions or variables. Datasets integration and manipulation are easier to achieve, more reproducible and can be automated by using common programmes/software such as R and [OpenRefine](https://openrefine.org/#). -* Can make use of visualization and exploration tools, provided by some repositories, to browse and explore the data of multiple datasets at once. For instance, you can use [Expression Atlas](https://www.ebi.ac.uk/gxa/home) to easily make a query about the expression of a gene in specific conditions, even without knowledge of any data analysis software. As another example, [GISAID](https://www.gisaid.org) allows you to visualise the spreading of viral variants. See the pages in the “Your Domain” section to find domain-specific databases, atlas or portals. -* Can import, export and exchange (meta)data between tools/systems/platforms without data loss. Exchanging and integrating (meta)data between two software or platforms is possible only if the format in which the information is contained can be read and interpreted by both. For instance, (meta)data from both [UniProt](https://www.uniprot.org) and [PDBe-KB](https://www.ebi.ac.uk/pdbe/pdbe-kb) can be accessed in [3DBioNotes](http://3dbionotes-ws.cnb.csic.es/) to enrich the structural analysis with sequence features. -* Can explore and visualise biological knowledge graphs by using software such as [KnetMiner](https://knetminer.com) and [AgroLD](http://agrold.southgreen.fr/agrold/). -* Can perform complex queries, from a single entry point, across multiple distributed databases and across domains via [APIs](https://en.wikipedia.org/wiki/API) or via SPARQL Query Language. For instance: “Retrieve the number of UniProtKB/Swiss-Prot human enzymes that metabolize cholesterol or cholesterol derivatives and that are involved in diseases?" in the [Integrated Database of Small Molecules](https://idsm.elixir-czech.cz/). +* Can easily integrate data from multiple datasets and studies, sharing the same experimental conditions or variables. Datasets integration and manipulation are easier to achieve, more reproducible and can be automated by using common programmes/software such as R and {% tool "openrefine" %}. +* Can make use of visualization and exploration tools, provided by some repositories, to browse and explore the data of multiple datasets at once. For instance, you can use {% tool "expression-atlas" %} to easily make a query about the expression of a gene in specific conditions, even without knowledge of any data analysis software. As another example, {% tool "gisaid" %} allows you to visualise the spreading of viral variants. See the pages in the [Your Domain](https://rdmkit.elixir-europe.org/your_domain) section to find domain-specific databases, atlas or portals. +* Can import, export and exchange (meta)data between tools/systems/platforms without data loss. Exchanging and integrating (meta)data between two software or platforms is possible only if the format in which the information is contained can be read and interpreted by both. For instance, (meta)data from both {% tool "uniprot" %} and {% tool "pdbe-kb" %} can be accessed in {% tool "3dbionotes" %} to enrich the structural analysis with sequence features. +* Can explore and visualise biological knowledge graphs by using software such as {% tool "knetminer" %} and {% tool "agronomic-linked-data" %}. +* Can perform complex queries, from a single entry point, across multiple distributed databases and across domains via [APIs](https://en.wikipedia.org/wiki/API) or via SPARQL Query Language. For instance: “Retrieve the number of UniProtKB/Swiss-Prot human enzymes that metabolize cholesterol or cholesterol derivatives and that are involved in diseases?" in the {% tool "integrated-database-of-small-molecules" %}. * Can more easily find reference data and existing data in general, since machine-actionable (meta)data could be found by search engines and domain specific or generic data catalogs and portals. @@ -114,10 +114,10 @@ By providing structured metadata and data to a database that follows standards ( * Applying RDF syntax to the database can make the (meta)data available for knowledge graphs and semantic web applications. * If [Application Programming Interface (API)](https://en.wikipedia.org/wiki/API) is available, other software/applications could make complex queries, access the database programmatically and always get up-to-date data. * If the metadata of your database or repository is exposed according to specific standards, it could function as data provider or data source, and be harvested and indexed by - * Data catalogues or data portals, such as [OmicsDI](http://blog.omicsdi.org/post/omicsdi-spec/) and [COVID-19 Data Portal](https://www.covid19dataportal.org). + * Data catalogues or data portals, such as {% tool "omicsdi" %} [data format specification](http://blog.omicsdi.org/post/omicsdi-spec/) and [COVID-19 Data Portal](https://www.covid19dataportal.org). * The [OpenAIRE aggregator](https://www.openaire.eu/aggregation-and-content-provision-workflows) that collects metadata records via OAI-PMH in the majority of cases. - * Other instances of your data repository software, such as [Dataverse](https://guides.dataverse.org/en/latest/admin/dashboard.html#harvesting) and [EUDAT B2FIND](http://b2find.eudat.eu/guidelines/harvesting.html), which use OAI-PMH for metadata harvest. - * Search engines such as [Google Dataset Search](https://datasetsearch.research.google.com/help), which relies on [sitemaps.org](https://www.sitemaps.org), [schema.org](https://schema.org), [DCAT](https://www.w3.org/TR/vocab-dcat/) and other approaches to datasets discovery. + * Other instances of your data repository software which use OAI-PMH for metadata harvest, such as {% tool "dataverse" %} [harvesting](https://guides.dataverse.org/en/latest/admin/dashboard.html#harvesting). + * Search engines such as [Google Dataset Search](https://datasetsearch.research.google.com/help), which relies on [sitemaps.org](https://www.sitemaps.org), {% tool "schema-org" %}, {% tool "data-catalog-vocabulary" %} and other approaches to datasets discovery. * Machine actionable metadata facilitates the automatization of data handling and validation, allowing for easier development of new tools and analysis strategies (e.g. data visualization tools, machine learning and artificial intelligence applications). #### For the authors of a machine-actionable public dataset @@ -146,7 +146,7 @@ The theoretically most machine-actionable format is in practice not achieved or For machine-actionability and interoperability, you should consider: 1. File formats that are data exchange formats (e.g. JSON, XML). -2. (Meta)Data schemas recognised and accepted by communities as standards (e.g. ISA model, OME data model). The (meta)data schema describes the relations, such as hierarchy, of the elements that constitute the (meta)data model or structure. +2. (Meta)Data schemas recognised and accepted by communities as standards (e.g. [ISA model](https://isa-specs.readthedocs.io/en/latest/isamodel.html), {% tool "ome-data-model-and-file-formats" %}). The (meta)data schema describes the relations, such as hierarchy, of the elements that constitute the (meta)data model or structure. 3. Sets of metadata attributes or metadata checklists recognised and accepted by communities (e.g. MIAPPE, ENA Samples checklists), that capture reporting best practice in the field. 4. Controlled vocabularies and ontologies recognised and accepted by communities to convey meaning or semantics (e.g. EFO, OBI). @@ -169,11 +169,11 @@ For machine-actionability and interoperability, you should consider: * Some of the (meta)data schemas considered standard in Life Sciences define the relations between elements of the model in a more implicit way (e.g. ISA-TAB, MAGE-TAB). * Some data repositories develop customised (meta)data schemas. * Different metadata schemas are preferred for different purposes. Some examples are listed below. - * [Schema.org](https://schema.org) and [Bioschemas.org](https://bioschemas.org/index.html) markup are mostly used to describe web resources and make them findable by Web search engines. - * [Data Catalog Vocabulary (DCAT)](https://www.w3.org/TR/vocab-dcat-2/) is an RDF vocabulary designed to facilitate interoperability between data catalogs published on the Web. + * {% tool "schema-org" %} and {% tool "bioschemas" %} markup are mostly used to describe web resources and make them findable by Web search engines. + * {% tool "data-catalog-vocabulary" %} is an RDF vocabulary designed to facilitate interoperability between data catalogs published on the Web. * [Investigation-Study-Assay (ISA) model](https://isa-tools.org/isa-api/content/isamodel.html#) was originally designed for describing multi-omics experiments in Life Sciences. * The [DAta Tag Suite (DATS)](https://github.com/datatagsuite) is a data description model designed and produced to describe datasets and associated metadata in a number of data deposition repositories. - * The [OME Data Model](https://docs.openmicroscopy.org/ome-model/latest/) is a specification for storing and exchanging data on biological imaging. + * The {% tool "ome-data-model-and-file-formats" %} is a specification for storing and exchanging data on biological imaging. * The [W3C](https://www.w3.org/) consortium has formalised a universal abstract data model to potentially establish relationships among any resource available on the web (people, places, web pages, events, abstract concepts, etc.) called [Resource Description Framework (RDF)](https://www.w3.org/TR/rdf-concepts/#section-Introduction). This universal abstract data model allows us to describe relationships between multiple resources encoded in different formats, following different standards and stored in different locations/servers on the internet. @@ -214,7 +214,7 @@ Vocabularies and ontologies are meant for describing concepts and relationships * Examples of standard (meta)data schemas, in different formats, in Life Sciences: * [ISA-JSON (.json) and ISA-TAB (.txt)](https://isa-specs.readthedocs.io/en/latest/) - generic metadata framework originally created to describe information about multi-omics experiments. * [MAGE-TAB](https://www.ebi.ac.uk/arrayexpress/help/magetab_spec.html) (.txt) - MicroArray Gene Expression Tabular. The format has been developed and adopted by the functional genomics community. - * [OME data model](https://docs.openmicroscopy.org/ome-model/latest/) (.tiff or .xml) for a wide range of biological imaging modalities. Ontologies to uniquely identify terms can be included. + * {% tool "ome-data-model-and-file-formats" %} (.tiff or .xml) for a wide range of biological imaging modalities. Ontologies to uniquely identify terms can be included. See also Hammer, M., Huisman, M., Rigano, A. et al. Towards community-driven metadata standards for light microscopy: tiered specifications extending the OME model. Nat Methods 18, 1427–1440 (2021). https://doi.org/10.1038/s41592-021-01327-9. * For more information about metadata schemas and ontologies, see [Documentation and Metadata](metadata_management) page. diff --git a/pages/your_tasks/metadata_management.md b/pages/your_tasks/metadata_management.md index 802ea4181..456ae2216 100644 --- a/pages/your_tasks/metadata_management.md +++ b/pages/your_tasks/metadata_management.md @@ -60,8 +60,8 @@ Due to the large variety of experiments, techniques and collaborative studies th * Online platforms for collaborative research and file sharing services, which integrate with several data management tools, could also be used for data documentation during the project. For instance, OSF.io has integrations with Mendeley, Dropbox, GitHub, Figshare, etc. * There is a major area of overlap between the aforementioned tools for data documentation, so it is better to choose the tool(s) that best address your specific need. Some tools can be used at the same time to address different needs and they can be complementary. Comparative lists can help with the choice: - * [Harvard Medical School – ELN Comparison Grid.](https://datamanagement.hms.harvard.edu/analyze/electronic-lab-notebooks) - * [University of Cambridge - Electronic Research Notebook Products.](https://www.data.cam.ac.uk/data-management-guide/electronic-research-notebooks/electronic-research-notebook-products) + * {% tool "harvard-medical-school-electronic-lab-notebooks" %} + * {% tool "university-of-cambridge-electronic-research-notebook-products" %} * Independently of the tools, you should agree on and establish a [data organisation](data_organisation) system for files (or tables in a database) together with your team or [Data Management Working Group](data_quality#how-do-you-ensure-the-quality-of-research-data): * Folder structure @@ -72,7 +72,7 @@ Due to the large variety of experiments, techniques and collaborative studies th * [Study-level](https://ukdataservice.ac.uk/learning-hub/research-data-management/document-your-data/study-level-documentation/) and [data-level](https://ukdataservice.ac.uk/learning-hub/research-data-management/document-your-data/data-level/) documentation can be provided as * README file - * [Codebook](https://ddialliance.org/training/getting-started-new-content/create-a-codebook) + * {% tool "create-a-codebook" %} * Data dictionary ([see an example](https://webdav-r3lab.uni.lu/public/elixir/templates/Data_dictionary_example.xlsx)) * Data list @@ -89,6 +89,8 @@ Due to the large variety of experiments, techniques and collaborative studies th * We recommend familiarising yourself with the requirements of the repositories that could be appropriate for publishing your data already at the beginning of the project, so that you can start documenting and formatting your data according to their requirements as early as possible. +* Platforms for management of metadata and data used by some scientific communities: {% tool "cedar" %}, {% tool "semares" %}, {% tool "fairdom-seek" %}, {% tool "fairdomhub" %}, {% tool "copo" %}. + @@ -101,7 +103,7 @@ There are multiple standards for different types of data, ranging from generic d ### Considerations -* Decide at the beginning of the project what are the [recommended repositories](https://elixir-europe.org/platforms/data/elixir-deposition-databases) for your data types. +* Decide at the beginning of the project what are the {% tool "elixir-deposition-databases-for-biomolecular-data" %} for your data types. * Note that you can use several repositories if you have different data types. * Distinguish between generic (e.g. Zenodo) and data type (technique) specific repositories (e.g. EBI repositories). @@ -114,9 +116,9 @@ There are multiple standards for different types of data, ranging from generic d * Be aware that data type specific repositories usually have check-lists for metadata. For example, the European Nucleotide Archive provides [sample checklists](https://www.ebi.ac.uk/ena/browser/checklists) that can also be downloaded as a spreadsheet after log in. * If you do not know yet what repository you will use, look for what is the recommended minimal information (i.e. “Minimum Information ...your topic”, e.g. [MIAME](https://www.fged.org/projects/miame) or [MINSEQE](https://www.fged.org/projects/minseqe) or [MIAPPE](https://www.miappe.org)) required for your type of data in your community, or other metadata, at the following resources: - * [Research Data Alliance (RDA): Metadata Dictionary: Standards](https://rd-alliance.github.io/metadata-directory/standards/) - * [FAIRsharing.org](https://fairsharing.org) at “Standards” and “Collections” - * [The Digital Curation Centre (DCC): List of Metadata Standards](https://www.dcc.ac.uk/guidance/standards/metadata/list) + * {% tool "rda-standards" %} + * {% tool "fairsharing" %} at “Standards” and “Collections” + * {% tool "data-curation-centre-metadata-list" %} ## How do you find appropriate vocabularies or ontologies? @@ -141,11 +143,13 @@ There are many vocabularies and ontologies available on the web. Finding a suita * Define a list of terms that you want to find ontologies for. Include in the list also any alternative term names that you are aware of. * Search for your listed terms on dedicated web portals. These are a few: - * [Linked Open Vocabularies (LOV)](https://lov.linkeddata.es/dataset/lov/) - * [EMBL-EBI Ontology Lookup Service](https://www.ebi.ac.uk/ols/index) - * [Ontobee](http://www.ontobee.org) - * [BioPortal](https://bioportal.bioontology.org) - * [AgroPortal](https://agroportal.lirmm.fr) + * {% tool "linked-open-vocabularies" %} + * {% tool "ontology-lookup-service" %} + * {% tool "ontobee" %} + * {% tool "bioportal" %} + * {% tool "agroportal" %} + * {% tool "the-open-biological-and-biomedical-ontology-foundry" %} + * {% tool "evidence-and-conclusion-ontology" %} ## What do you write in a README file? @@ -209,5 +213,3 @@ Below you will find examples of README files for study-level and data-level. For - License (if any): - Use restrictions (if any): - Recommended citation for the data (if any): - - diff --git a/pages/your_tasks/sensitive_data.md b/pages/your_tasks/sensitive_data.md index 60c70003e..1b7e739d2 100644 --- a/pages/your_tasks/sensitive_data.md +++ b/pages/your_tasks/sensitive_data.md @@ -45,7 +45,7 @@ In general, data can be categorised into two types i.e. sensitive data and non-s ### Considerations -* If you deal with any information about individuals from the EU, you are bound by the [General Data Protection Regulation (GDPR)](https://gdpr.eu/what-is-gdpr/). In GDPR, such data is called "personal data". +* If you deal with any information about individuals from the EU, you are bound by the {% tool "eu-general-data-protection-regulation" %}. In GDPR, such data is called "personal data". * In the context of GDPR "special category data" is a subclass of "personal data" that is potentially even more harmful, and GDPR prescribes very strict rules for dealing with this data. Article 9 of GDPR defines the special categories as data consisting of racial or ethnic origin, political opinions, religious or philosophical beliefs, or trade union membership, genetic data, biometric data, data concerning health or data concerning a natural person's sex life or sexual orientation. Confusingly, these special categories are sometimes colloquially called "sensitive data". Note that this page is concerned with the broader definition of "sensitive data". * Information in Life Science projects are for the most part categorised under health and genetic data and are considered special category data under the GDPR. * You need to assess whether or not your dataset contains attributes that can lead to the identification of a person. Note that combinations of attributes that are themselves not identifiable can be identifiable together. See the definitions described in the [How can you de-identify your data](#how-can-you-de-identify-your-data) section. diff --git a/pages/your_tasks/storage.md b/pages/your_tasks/storage.md index ec15b2e32..35b8d9d20 100644 --- a/pages/your_tasks/storage.md +++ b/pages/your_tasks/storage.md @@ -4,7 +4,7 @@ contributors: [Ulrike Wittig, Elin Kronander, Munazah Andrabi, Flora D'Anna, Fla description: How to find appropriate storage solutions. page_id: storage related_pages: - tool_assembly: [nels, tsd, ome, transmed, xnat-pic] + tool_assembly: [nels, tsd, ome, transmed, xnat_pic] training: - name: Training in TeSS registry: TeSS @@ -61,9 +61,9 @@ When looking for solutions to store your data during the collection or generatio * Ask for a tiered storage solution that gives you easy and fast access to the data for processing and analysis. Explain to the IT support what machine or infrastructure you need to access the data from and if other researchers should have access as well (in case of collaborative projects). * Ask if the storage solution includes an automatic management of versioning, conflict resolution and back-tracing capabilities (see also our Data Organisation page). * Ask the IT support in your institute if they offer technical solutions to keep a copy of your (raw)data secure and untouched (snapshot, read-only access, backup…). You could also keep a copy of the original data file in a separate folder as “read-only”. -* For small data files and private or collaborative projects within your institute, commonly accessible Cloud Storage is usually provided by the institute, such as Nextcloud (on-premises), Microsoft OneDrive, DropBox, Box, etc. Do not use personal accounts on similar services for this purpose, adhere to the policies of your institute. +* For small data files and private or collaborative projects within your institute, commonly accessible Cloud Storage is usually provided by the institute, such as {% tool "nextcloud" %} (on-premises), {% tool "microsoft-onedrive" %}, {% tool "dropbox" %}, {% tool "box" %}, etc. Do not use personal accounts on similar services for this purpose, adhere to the policies of your institute. * It is a requirement from the funders or universities to store raw data and data analysis workflows (for reproducible results) for a certain amount of time after the end of the project (see our Preserve page). This is usually a requirement. Check the data policy for your project or institute to know if a copy of the data should be also stored at your institute for a specific time after the project. This helps you budget for storage costs and helps your IT support with estimation of storage resources needed. -* Make sure to generate good documentation (i.e., README file) and metadata together with the data. Follow best practices for folder structure, file naming and versioning systems (see our Data Organisation page). Check if your institute provides a (meta)data management system, such as iRODS, DataVerse, FAIRDOM-SEEK or OSF. See All tools and resources table below for additional tools. +* Make sure to generate good documentation (i.e., README file) and metadata together with the data. Follow best practices for folder structure, file naming and versioning systems (see our Data Organisation page). Check if your institute provides a (meta)data management system, such as {% tool "irods" %}, {% tool "dataverse" %}, {% tool "fairdom-seek" %} or {% tool "osf" %}. See All tools and resources table below for additional tools. ## How do you estimate computational resources for data processing and analysis? @@ -91,8 +91,8 @@ Below, you can find some aspects that you need to consider to be able to estimat * Communicate your expectations about speed and the reliability of connection between storage and compute to the IT team. This could depend on the communication protocols that the compute and storage systems use. * It is recommended to ask about the time span for analysis to colleagues or bioinformatic support that have done similar work before. This could save you money and time. * If you need some reference datasets (e.g the references genomes such as human genome.), ask IT if they provide it or consult bioinformaticians that can set up automated public reference dataset retrieval. -* For small data files and private projects, using the computational resources of your own laptop might be fine, but make sure to preserve the reproducibility of your work by using data analysis software such as Galaxy or R Markdown. -* For small data volume and small collaborative projects, a commonly accessible cloud storage, such as Nextcloud (on-premises) or ownCloud might be fine. Adhere to the policies of your institute. +* For small data files and private projects, using the computational resources of your own laptop might be fine, but make sure to preserve the reproducibility of your work by using data analysis software such as {% tool "galaxy" %} or {% tool "r-markdown" %}. +* For small data volume and small collaborative projects, a commonly accessible cloud storage, such as {% tool "nextcloud" %} (on-premises) or {% tool "owncloud" %} might be fine. Adhere to the policies of your institute. * For large data volume and bigger collaborative projects, you need a large storage volume on fast hardware that is closely tied to a computational resource accessible to multiple users. ## Where should you store the data after the end of the project? diff --git a/var/Issue_creator.py b/var/Issue_creator.py new file mode 100644 index 000000000..711813881 --- /dev/null +++ b/var/Issue_creator.py @@ -0,0 +1,117 @@ +import csv +import os +import yaml +import frontmatter +import requests + +def read_csv_file(filename): + tools = {} + + with open(filename, 'r') as csvfile: + reader = csv.DictReader(csvfile) + + for row in reader: + tool_name = row['Orphan tool'] + tags = row['Related_pages'].split(',') + description = row['Orphan tool description'] + url = row['tool link'] + for tag in tags: + tag = tag.strip() + if tag not in tools: + tools[tag] = [] + tools[tag].append({'name' : tool_name, 'description' :description, 'url' : url }) + + return tools + +def render_body(tool_list, contributors, text): + output = f"### Page: [{contributors['title']}]({contributors['file']})\n\n" + output += f"{text}\n\n**Tools that are not mentioned in the text:**\n" + for tool in tool_list: + output += f'- [ ] **[{tool["name"]}]({tool["url"]})** - {tool["description"]}\n' + output += f"\n\nWould you kindly review these tools and determine if they should be mentioned (as a link) in the text? If necessary, please propose changes to the text through a pull request on this temporary [instance of the RDMkit](https://bedroesb.github.io/rdmkit) by clicking on the pencil icon next to the title and following the instructions in this [guide](https://bedroesb.github.io/rdmkit/tool_resource_update). {', '.join(contributors['contributors'])}\n" + return output + + +def lookup_git_id(contributor, contributors_file): + if contributor in contributors_file and 'git' in contributors_file[contributor] and contributors_file[contributor]['git']: + return contributors_file[contributor]['git'] + else: + return False + + +def get_contributors(markdown_dir, contributors_file): + with open(contributors_file, 'r') as yaml_file: + contributors_data = yaml.safe_load(yaml_file) + + page_contributors = {} + + for root, dirs, files in os.walk(markdown_dir): + for file in files: + if file.endswith('.md'): + markdown_file = os.path.join(root, file) + with open(markdown_file, 'r') as file_content: + post = frontmatter.load(file_content) + front_matter = post.metadata + if 'page_id' in front_matter and 'contributors' in front_matter: + contributors = front_matter['contributors'] + page_id = front_matter['page_id'] + title = front_matter['title'] + file_name = file.removesuffix(".md") + contr_ids = [] + for contributor in contributors: + git_id = lookup_git_id(contributor, contributors_data) + if git_id: + contr_ids.append(f"@{git_id}") + page_contributors[page_id] = {} + page_contributors[page_id]['contributors'] = contr_ids + page_contributors[page_id]['file'] = f"https://rdmkit.elixir-europe.org/{file_name}" + page_contributors[page_id]['title'] = title + return page_contributors + + +def create_github_issue(repo_owner, repo_name, title, body, token, labels): + url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/issues" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github.v3+json" + } + data = { + "title": title, + "body": body, + "labels": labels + } + response = requests.post(url, headers=headers, json=data) + if response.status_code == 201: + issue_data = response.json() + issue_number = issue_data["number"] + print(f"Successfully created GitHub issue #{issue_number}") + else: + print("Failed to create GitHub issue") + print(f"Response: {response.status_code} - {response.text}") + + +# Usage +filename = '/home/bedro/Documents/rdmkit/_data/tool_and_resource_list.csv' +markdown_dir = '/home/bedro/Documents/rdmkit/pages' +contributors_file = '/home/bedro/Documents/rdmkit/_data/CONTRIBUTORS.yaml' + +# Github info +repo_owner = "elixir-europe" +repo_name = "rdmkit" +body = "In pull request #1249, we are implementing changes to how we handle tools, resulting in different rules for rendering the tools table at the bottom of pages. Going forward, only tools referenced in the text will be included in the bottom table of the page. Some of the tools have been tagged with the page_id of this page, and we are currently exploring how to incorporate these tools into the text." +tags = ["tool-text-discrepancy"] +token = "" +whitelist = ["data_publication"] + + +tools = read_csv_file(filename) +page_contributors = get_contributors(markdown_dir, contributors_file) + + +for tag, tool_list in tools.items(): + if tag in page_contributors and tag not in whitelist: + output = render_body(tool_list, page_contributors[tag], body) + title = f"{page_contributors[tag]['title']}: tools not mentioned in in text" + create_github_issue(repo_owner, repo_name, title, output, token, tags) + else: + print(f"{tag} could not be found") diff --git a/var/issue_commentor.py b/var/issue_commentor.py new file mode 100644 index 000000000..42f591476 --- /dev/null +++ b/var/issue_commentor.py @@ -0,0 +1,40 @@ +import requests + +# GitHub API endpoint for creating a comment +COMMENT_URL = "https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}/comments" + +# GitHub personal access token +TOKEN = "" + +# GitHub repository information +owner = "elixir-europe" +repo = "rdmkit" + +def add_comment_to_issue(issue_id, comment): + # URL for creating a comment on a specific issue + url = COMMENT_URL.format(owner=owner, repo=repo, issue_number=issue_id) + + # Headers with authentication token + headers = { + "Authorization": f"Bearer {TOKEN}", + "Accept": "application/vnd.github.v3+json" + } + + # Comment payload + payload = { + "body": comment + } + + # Send POST request to create the comment + response = requests.post(url, headers=headers, json=payload) + + if response.status_code == 201: + print(f"Comment added to issue #{issue_id}") + else: + print(f"Failed to add comment to issue #{issue_id}") + print(response.json()) + +# Iterate over the list of issue IDs and add a comment to each issue +for issue_id in range(1296, 1327 + 1): + comment_text = "We will move to the new system in the week of the 24th of Juli, so please make sure changes are comments are made before this date. Thanks a lot in advance!" + add_comment_to_issue(issue_id, comment_text) \ No newline at end of file diff --git a/var/issue_milestone.py b/var/issue_milestone.py new file mode 100644 index 000000000..b2d8d00a5 --- /dev/null +++ b/var/issue_milestone.py @@ -0,0 +1,39 @@ +import requests + +# GitHub API endpoint for creating a Milestone +MILESTONE_URL = "https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}" + +# GitHub personal access token +TOKEN = "" + +# GitHub repository information +owner = "elixir-europe" +repo = "rdmkit" + +def add_milestone_to_issue(issue_id, milestone): + # URL for creating a Milestone on a specific issue + url = MILESTONE_URL.format(owner=owner, repo=repo, issue_number=issue_id) + + # Headers with authentication token + headers = { + "Authorization": f"Bearer {TOKEN}", + "Accept": "application/vnd.github.v3+json" + } + + # Milestone payload + payload = { + "milestone": milestone + } + + # Send POST request to create the Milestone + response = requests.post(url, headers=headers, json=payload) + + if response.status_code == 201: + print(f"Milestone added to issue #{issue_id}") + else: + print(f"Failed to add Milestone to issue #{issue_id}") + print(response.json()) + +# Iterate over the list of issue IDs and add a milestone to each issue +for issue_id in range(1321, 1327 + 1): + add_milestone_to_issue(issue_id, 1) \ No newline at end of file diff --git a/var/make_headings_capitalized.md b/var/make_headings_capitalized.py similarity index 100% rename from var/make_headings_capitalized.md rename to var/make_headings_capitalized.py diff --git a/var/conversions.py b/var/tools_validator.py similarity index 58% rename from var/conversions.py rename to var/tools_validator.py index 08edb02be..3a5cab882 100644 --- a/var/conversions.py +++ b/var/tools_validator.py @@ -2,21 +2,29 @@ import argparse import os import re -from csv import reader -import yaml +from ruamel.yaml import YAML import re -import unicodedata import requests from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry import frontmatter +from urllib.parse import urlparse +class NullRepresenter: + def __init__(self): + self.count = 0 + + def __call__(self, repr, data): + ret_val = repr.represent_scalar(u'tag:yaml.org,2002:null', u'') + self.count += 1 + return ret_val + def process_args(): '''parse command-line arguments ''' - parser = argparse.ArgumentParser(prog='Conversions', + parser = argparse.ArgumentParser(prog='Tools Validator', description='This script will convert the tool and resources table to a yaml file while injecting bio.tools and FAIRsharing IDs where needed.',) parser.add_argument('--username', help='Specify the FAIRsharing username') @@ -131,11 +139,10 @@ def remove_prefix(s, prefix): # --------- Variables --------- - -table_path = "_data/main_tool_and_resource_list.csv" -output_path = "_data/tool_and_resource_list.yml" +yaml_path = "_data/tool_and_resource_list.yml" rootdir = 'pages/' allowed_registries = ['biotools', 'fairsharing', 'tess', 'fairsharing-coll'] +my_represent_none = NullRepresenter() # --------- Reading out page_ids from pages --------- @@ -162,74 +169,81 @@ def remove_prefix(s, prefix): print(f"----> Allowed related_pages: {', '.join(pages_metadata.keys())}.") -# --------- Converting the table --------- - -print(f"----> Converting table {table_path} to {output_path} started.") args = process_args() main_list = [] + if args.reg: fairsharing_token = get_fairsharing_token(args.username, args.password) -with open(table_path, 'r') as read_obj: - csv_reader = reader(read_obj) - header = next(csv_reader) - # Check file as empty - if header != None: - # Looping over rows and adding its contents to the main dict - for row_index, row in enumerate(csv_reader): - tool = {} - tool_name = row[0] - for col_index, cell in enumerate(row): - # Only include keys if there are values: - if header[col_index] == 'related_pages' and cell: - output = re.split(', |,', cell) - for tag in output: - if tag not in pages_metadata.keys(): - print( - f'ERROR: The table contains the tag "{tag}" in row {row_index} which is not allowed.\n-> Check if the tag you are using is declared in the metadata of one of the pages using the "page_id" attribute.') - sys.exit( - f'The table contains the tag "{tag}" in row {row_index} which is not allowed.\n-> Check if the tag you are using is declared in the metadata of one of the pages using the "page_id" attribute.') - elif header[col_index] == 'registry': - output = {} - if cell: # Only include keys if there are values - for registry in re.split(', |,', cell): - reg, identifier = re.split(':|: ', registry) - if reg in allowed_registries: - output[reg] = identifier - else: - print( - f'ERROR: The table contains the registry "{reg}" in row {row_index} which is not allowed.\n' + f"Allowed registries are {', '.join(allowed_registries)}.\n") - sys.exit( - f'The table contains the registry "{reg}" in row {row_index} which is not allowed.\n' + f"Allowed registries are {', '.join(allowed_registries)}.\n") - if args.reg: - if "tess" not in output: - check_tess = tess_available(tool_name) - if check_tess: - output["tess"] = check_tess - elif output["tess"] == "NA": - del output["tess"] - if "biotools" not in output: - check_biotools = biotools_available(tool_name) - if check_biotools: - output["biotools"] = check_biotools - elif output["biotools"] == "NA": - del output["biotools"] - if "fairsharing" not in output: - if len(tool_name) > 4: - check_fairsharing = fairsharing_available( - tool_name, fairsharing_token) - if check_fairsharing: - output["fairsharing"] = check_fairsharing - elif output["fairsharing"] == "NA": - del output["fairsharing"] - else: - # Return the normal form for the Unicode string - output = unicodedata.normalize("NFKD", cell).strip() - if output: - tool[header[col_index]] = output - main_list.append(tool) - print(f"{row_index}. {tool['name']} is parsed.") - -with open(output_path, 'w') as yaml_file: - documents = yaml.dump(main_list, yaml_file) +with open(yaml_path, 'r') as read_obj: + yaml=YAML(typ='safe') + yaml.default_flow_style = False + yaml.representer.add_representer(type(None), my_represent_none) + yaml.width = 1000 + all_tools = yaml.load(read_obj) + + # Looping over tools + for i, tool in enumerate(all_tools): + if 'id' not in tool.keys() or tool['id'] != re.sub('[^0-9a-zA-Z]+', ' ', re.sub("[\(\[].*?[\)\]]", "", tool['id'])).strip().replace(" ", "-").lower(): + sys.exit(f"{tool['name']} has an no or incorrect ID. Make sure the ID is kebab-case and only contains alphanumerical characters.") + tool_name = tool['name'] + # Only include keys if there are values: + if 'related_pages' in tool and tool['related_pages']: + print( f'ERROR: The tool "{tool_name}" contains `related_pages` as metadata field, which is not supported.\n') + sys.exit() + if 'registry' in tool and tool['registry']: + for registry, identifier in tool['registry'].items(): + if registry not in allowed_registries: + print( + f'ERROR: The table contains the registry "{registry}" in row which is not allowed.\n' + f"Allowed registries are {', '.join(allowed_registries)}.\n") + sys.exit( + f'The table contains the registry "{registry}" in row which is not allowed.\n' + f"Allowed registries are {', '.join(allowed_registries)}.\n") + if 'url' in tool and tool['url']: + validation = urlparse(tool['url']) + if not validation.scheme and not validation.netloc: + print( f'ERROR: The tool "{tool_name}" contains has an invalid url: {tool["url"]}\n') + sys.exit() + + +# --------- Pulling from FAIRsharing, TeSS and Bio.tools --------- + if args.reg: + registry = {} + if 'registry' in tool: + registry = tool['registry'] + # TeSS Lookup + check_tess = tess_available(tool_name) + if check_tess: + registry['tess'] = check_tess + else: + if 'tess' in registry.keys(): + del registry['tess'] + # Bio.tools Lookup + if 'biotools' not in registry.keys() or not registry['biotools'] : + check_biotools = biotools_available(tool_name) + if check_biotools: + registry['biotools'] = check_biotools + if 'biotools' in registry.keys() and not registry['biotools']: + del registry['biotools'] + + # FAIRsharing Lookup + if 'fairsharing' not in registry.keys() or not registry['fairsharing']: + if len(tool_name) > 4: + check_fairsharing = fairsharing_available( + tool_name, fairsharing_token) + if check_fairsharing: + registry['fairsharing'] = check_fairsharing + if 'fairsharing' in registry.keys() and not registry['fairsharing']: + del registry['fairsharing'] + + # Add populated registry dict to the main list + if registry: + tool['registry'] = registry + # Delete empty dict + if 'registry' in tool and not tool['registry']: + del tool['registry'] + main_list.append(tool) + print(f"{i}. {tool['name']} is parsed.") + +with open(yaml_path, 'w') as yaml_file: + yaml.dump(main_list, yaml_file) print("----> YAML is dumped successfully")