diff --git a/cellphone_db/scripts/.ipynb_checkpoints/make_ligand_receptor_interactionDB-checkpoint.ipynb b/cellphone_db/scripts/.ipynb_checkpoints/make_ligand_receptor_interactionDB-checkpoint.ipynb deleted file mode 100644 index 1c80e69..0000000 --- a/cellphone_db/scripts/.ipynb_checkpoints/make_ligand_receptor_interactionDB-checkpoint.ipynb +++ /dev/null @@ -1,899 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "id": "demanding-teaching", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO: [2021-10-13 15:17:12] indra.ontology.bio.ontology - Loading INDRA bio ontology from cache at /Users/sbunga/.indra/bio_ontology/1.12/bio_ontology.pkl\n", - "INFO: [2021-10-13 15:17:21] receptor_ligand_interactions - Loading INDRA DB dataframe\n", - "INFO: [2021-10-13 15:17:26] receptor_ligand_interactions - Loaded 6191787 rows from /Users/sbunga/gitHub/panacea_indra/cellphone_db/input/db_dump_df.pkl\n" - ] - } - ], - "source": [ - "import os\n", - "import re\n", - "import sys\n", - "import csv\n", - "import json\n", - "import tqdm\n", - "import pyobo\n", - "import obonet\n", - "import random\n", - "import pickle\n", - "import logging\n", - "import graphviz\n", - "import datetime\n", - "import openpyxl\n", - "import networkx\n", - "import itertools\n", - "import numpy as np\n", - "import pandas as pd\n", - "import enzyme_client\n", - "from pathlib import Path\n", - "from matplotlib import rc\n", - "from bioinfokit import visuz\n", - "from graphviz import Digraph\n", - "from indra.sources import tas\n", - "import matplotlib.pyplot as plt\n", - "from indra.util import batch_iter\n", - "from collections import OrderedDict\n", - "from collections import defaultdict\n", - "import matplotlib.colors as mcolors\n", - "from indra.statements import Complex\n", - "from scipy.stats import fisher_exact\n", - "from indra.sources import indra_db_rest\n", - "import indra.tools.assemble_corpus as ac\n", - "from indra.literature import pubmed_client\n", - "from indra.assemblers.cx import hub_layout\n", - "from indra.ontology.bio import bio_ontology\n", - "from indra.databases.uniprot_client import um\n", - "from indra.assemblers.html import HtmlAssembler\n", - "from indra.statements.agent import default_ns_order\n", - "from indra.sources.omnipath import process_from_web\n", - "from indra.assemblers.cx.assembler import CxAssembler\n", - "from indra.databases import uniprot_client, hgnc_client\n", - "from indra_db.client.principal.curation import get_curations\n", - "from indra.databases.hgnc_client import get_hgnc_from_mouse, get_hgnc_name\n", - "\n", - "logger = logging.getLogger('receptor_ligand_interactions')\n", - "\n", - "mouse_gene_name_to_mgi = {v: um.uniprot_mgi.get(k)\n", - " for k, v in um.uniprot_gene_name.items()\n", - " if k in um.uniprot_mgi}\n", - "db_curations = get_curations()\n", - "\n", - "\n", - "\n", - "__file__ = \"/Users/sbunga/gitHub/panacea_indra/panacea_indra/nextflow/scripts/interactome_notebook.ipynb\"\n", - "HERE = os.path.abspath(\"/Users/sbunga/gitHub/panacea_indra/cellphone_db/\")\n", - "INPUT = os.path.join(HERE, 'input')\n", - "OUTPUT = os.path.join(HERE, 'output')\n", - "INDRA_DB_PKL = os.path.join(INPUT, 'db_dump_df.pkl')\n", - "DATA_SPREADSHEET = os.path.join(INPUT, 'Neuroimmune gene list .xlsx')\n", - "LIGAND_RECEPTOR_SPREADSHEET = os.path.join(INPUT, 'ncomms8866_lg_rg.xlsx')\n", - "GO_ANNOTATIONS = os.path.join(INPUT, 'goa_human.gaf')\n", - "DRUG_BANK_PKL = os.path.join(INPUT, 'drugbank_5.1.pkl')\n", - "ION_CHANNELS = os.path.join(INPUT, 'ion_channels.txt')\n", - "SURFACE_PROTEINS_WB = os.path.join(INPUT, 'Surface Proteins.xlsx')\n", - "\n", - "\n", - "def _load_goa_gaf():\n", - " \"\"\"Load the gene/GO annotations as a pandas data frame.\"\"\"\n", - " # goa_ec = {'EXP', 'IDA', 'IPI', 'IMP', 'IGI', 'IEP', 'HTP', 'HDA', 'HMP',\n", - " # 'HGI', 'HEP', 'IBA', 'IBD'}\n", - " goa = pd.read_csv(GO_ANNOTATIONS, sep='\\t',\n", - " skiprows=23, dtype=str,\n", - " header=None,\n", - " names=['DB',\n", - " 'DB_ID',\n", - " 'DB_Symbol',\n", - " 'Qualifier',\n", - " 'GO_ID',\n", - " 'DB_Reference',\n", - " 'Evidence_Code',\n", - " 'With_From',\n", - " 'Aspect',\n", - " 'DB_Object_Name',\n", - " 'DB_Object_Synonym',\n", - " 'DB_Object_Type',\n", - " 'Taxon',\n", - " 'Date',\n", - " 'Assigned',\n", - " 'Annotation_Extension',\n", - " 'Gene_Product_Form_ID'])\n", - " goa = goa.sort_values(by=['DB_ID', 'GO_ID'])\n", - " # Filter out all \"NOT\" negative evidences\n", - " goa['Qualifier'].fillna('', inplace=True)\n", - " goa = goa[~goa['Qualifier'].str.startswith('NOT')]\n", - " # Filter to rows with evidence code corresponding to experimental\n", - " # evidence\n", - " # goa = goa[goa['Evidence_Code'].isin(goa_ec)]\n", - " return goa\n", - "\n", - "\n", - "goa = _load_goa_gaf()\n", - "\n", - "\n", - "def get_pain_mol():\n", - " PAIN_SIGNAL_MOL = {\n", - " \"Prostaglandins\": \"CHEBI:26333\",\n", - " \"Brandykinin\": \"CHEBI:3165\"\n", - " }\n", - "\n", - " CHEBI_LIST = {}\n", - " CHEBI_NAMES = {}\n", - " for compounds, chebi_id in PAIN_SIGNAL_MOL.items():\n", - " CHEBI_LIST[compounds] = \\\n", - " [children[1] for children in\n", - " bio_ontology.get_children('CHEBI',\n", - " chebi_id)]\n", - "\n", - " CHEBI_NAMES[compounds] = \\\n", - " [bio_ontology.get_name('CHEBI', ids)\n", - " for ids in CHEBI_LIST[compounds]]\n", - "\n", - " return CHEBI_NAMES\n", - "\n", - "\n", - "PAIN_MOL_NAMES = get_pain_mol()\n", - "\n", - "\n", - "def load_indra_df(fname):\n", - " \"\"\"Return an INDRA Statement data frame from a pickle file.\"\"\"\n", - " logger.info('Loading INDRA DB dataframe')\n", - " with open(fname, 'rb') as fh:\n", - " df = pickle.load(fh)\n", - " logger.info('Loaded %d rows from %s' % (len(df), fname))\n", - " return df\n", - "\n", - "# Load the INDRA DB DF\n", - "indra_df = load_indra_df(INDRA_DB_PKL)\n", - "\n", - "\n", - "def get_hashes_by_gene_pair(df, ligand_genes, receptor_genes):\n", - " hashes_by_gene_pair = defaultdict(set)\n", - " l_genes = ligand_genes\n", - "\n", - " for a, b, hs in zip(df.agA_name, df.agB_name, df.stmt_hash):\n", - " if a in l_genes and b in receptor_genes:\n", - " hashes_by_gene_pair[(a, b)].add(hs)\n", - " return hashes_by_gene_pair\n", - "\n", - "\n", - "def download_statements(hashes):\n", - " \"\"\"Download the INDRA Statements corresponding to a set of hashes.\n", - " \"\"\"\n", - " stmts_by_hash = {}\n", - " for group in tqdm.tqdm(batch_iter(hashes, 200), total=int(len(hashes) / 200)):\n", - " idbp = indra_db_rest.get_statements_by_hash(list(group),\n", - " ev_limit=10)\n", - " for stmt in idbp.statements:\n", - " stmts_by_hash[stmt.get_hash()] = stmt\n", - " return stmts_by_hash\n", - "\n", - "\n", - "def get_genes_for_go_ids(go_ids):\n", - " \"\"\"Return genes that are annotated with a given go ID or its children.\"\"\"\n", - " all_go_ids = set()\n", - " for go_id in go_ids:\n", - " children_go_ids = {ch[1] for ch in bio_ontology.get_children('GO', go_id)}\n", - " all_go_ids.add(go_id)\n", - " all_go_ids |= children_go_ids\n", - " df = goa[goa['GO_ID'].isin(all_go_ids)]\n", - " up_ids = sorted(list(set(df['DB_ID'])))\n", - " gene_names = [uniprot_client.get_gene_name(up_id) for up_id in up_ids]\n", - " gene_names = {g for g in gene_names if g}\n", - " return gene_names\n", - "\n", - "\n", - "def fix_dates(gene_names):\n", - " replacements = {\n", - " datetime.datetime(2020, 3, 7, 0, 0): 'March7',\n", - " datetime.datetime(2020, 3, 2, 0, 0): 'March2',\n", - " datetime.datetime(2020, 3, 4, 0, 0): 'March4',\n", - " datetime.datetime(2020, 3, 5, 0, 0): 'March5',\n", - " datetime.datetime(2020, 3, 6, 0, 0): 'March6',\n", - " datetime.datetime(2020, 3, 9, 0, 0): 'March9',\n", - " datetime.datetime(2020, 3, 8, 0, 0): 'March8',\n", - " datetime.datetime(2020, 3, 11, 0, 0): 'Mar11',\n", - " datetime.datetime(2020, 9, 1, 0, 0): 'Sept1',\n", - " datetime.datetime(2020, 9, 2, 0, 0): 'Sept2',\n", - " datetime.datetime(2020, 9, 3, 0, 0): 'Sept3',\n", - " datetime.datetime(2020, 9, 4, 0, 0): 'Sept4',\n", - " datetime.datetime(2020, 9, 5, 0, 0): 'Sept5',\n", - " datetime.datetime(2020, 9, 6, 0, 0): 'Sept6',\n", - " datetime.datetime(2020, 9, 7, 0, 0): 'Sept7',\n", - " datetime.datetime(2020, 9, 8, 0, 0): 'Sept8',\n", - " datetime.datetime(2020, 9, 9, 0, 0): 'Sept9',\n", - " datetime.datetime(2020, 9, 10, 0, 0): 'Sept10',\n", - " datetime.datetime(2020, 9, 11, 0, 0): 'Sept11',\n", - " datetime.datetime(2020, 9, 15, 0, 0): 'Sept15',\n", - " }\n", - " fixed_gene_names = set()\n", - " for gene_name in gene_names:\n", - " if isinstance(gene_name, datetime.datetime):\n", - " fixed_gene_names.add(replacements[gene_name])\n", - " else:\n", - " fixed_gene_names.add(gene_name)\n", - " return fixed_gene_names\n", - "\n", - "\n", - "def read_workbook(workbook):\n", - " \"\"\" This function takes Excel workbook as an input and\n", - " returns ligand and receptor gene list respectively.\n", - " Input: Excel workbook with single(2 columns) or two sheets\n", - " Condition: considers first column/sheet as ligand genes and second\n", - " column/shet as receptor genes\n", - " \"\"\"\n", - " ligands_sheet = 'updated list of ligands '\n", - " receptors_sheet = 'RPKM > 1.5 cfiber'\n", - " wb = openpyxl.load_workbook(workbook)\n", - " ligands = fix_dates(set([row[0].value for row in wb[ligands_sheet]][1:]))\n", - " receptors = fix_dates(set([row[0].value\n", - " for row in wb[receptors_sheet]][1:]))\n", - " return ligands, receptors\n", - "\n", - "\n", - "def _plot_de_genes(df):\n", - " os.chdir(output_dir)\n", - " visuz.gene_exp.volcano(df=df,\n", - " lfc='avg_logFC', pv='p_val',\n", - " plotlegend=True, legendpos='upper right',\n", - " legendanchor=(1.46, 1), geneid=\"Genes\",\n", - " genenames=\"deg\", gstyle=2)\n", - "\n", - "\n", - "def read_gene_list(infile, mode):\n", - " gene_list = []\n", - " try:\n", - " with open(infile, mode) as FH:\n", - " for eachGene in FH:\n", - " gene_list.append(eachGene.strip(\"\\n\"))\n", - " return gene_list\n", - "\n", - " except FileNotFoundError:\n", - " sys.exit(\"Given file doesn't exist\")\n", - "\n", - "\n", - "def filter_nuclear_receptors(receptors_go, go_term):\n", - " # Filtering out the nuclear receptors from the receptor list\n", - " nuclear_receptors = get_genes_for_go_ids([go_term])\n", - " # Add any others that don't have the right annotation\n", - " nuclear_receptors |= {'NR2C2'}\n", - " filtered_receptors_go = receptors_go - nuclear_receptors\n", - " return filtered_receptors_go\n", - "\n", - "\n", - "def filter_complex_statements(stmts, ligands, receptors):\n", - " for stmt in stmts:\n", - " if isinstance(stmt, Complex):\n", - " # Statement updated by reference here\n", - " _filter_complex(stmt, ligands, receptors)\n", - " return stmts\n", - "\n", - "\n", - "def _filter_complex(stmt, lg, rg):\n", - " \"\"\"Filter out the genes from Complex statements which\n", - " are not present in the given ligand/receptor list\"\"\"\n", - " stmt.members = [agent for agent in stmt.members\n", - " if agent.name in lg or agent.name in rg]\n", - " return stmt\n", - "\n", - "\n", - "def filter_op_stmts(op_stmts, lg, rg):\n", - " \"\"\" Filter out the statements which are not ligand and receptor \"\"\"\n", - " logger.info(f'Filtering {len(op_stmts)} to ligand-receptor interactions')\n", - " filtered_stmts = [stmt for stmt in op_stmts if\n", - " (any(a.name in lg for a in stmt.agent_list())\n", - " and any(a.name in rg for a in stmt.agent_list()))]\n", - " logger.info(f'{len(filtered_stmts)} left after filter')\n", - " return filtered_stmts\n", - "\n", - "\n", - "def html_assembler(indra_stmts, fname):\n", - " \"\"\"Assemble INDRA statements into a HTML report\"\"\"\n", - " html_assembler = HtmlAssembler(indra_stmts,\n", - " db_rest_url='https://db.indra.bio')\n", - " assembled_html_report = html_assembler.make_model(no_redundancy=True)\n", - " html_assembler.save_model(fname)\n", - " return assembled_html_report\n", - "\n", - "\n", - "def cx_assembler(indra_stmts, fname):\n", - " \"\"\"Assemble INDRA statements into a CX report\"\"\"\n", - " cx_assembler = CxAssembler(indra_stmts)\n", - " assembled_cx_report = cx_assembler.make_model()\n", - " cx_assembler.save_model(fname)\n", - " ndex_network_id = cx_assembler.upload_model(ndex_cred=None,\n", - " private=True, style='default')\n", - " return assembled_cx_report, ndex_network_id\n", - "\n", - "\n", - "def get_small_mol_report(targets_by_drug, potential_targets, fname):\n", - " df = []\n", - " for drug, targets in targets_by_drug.items():\n", - " targets_in_data = targets & potential_targets\n", - " if not targets_in_data:\n", - " continue\n", - " df.append(\n", - " {\n", - " \"Drug\": drug[0],\n", - " \"ID\": '%s:%s' % (drug[1]),\n", - " \"Named\": 0 if drug[0].startswith('CHEMBL') else 1,\n", - " \"Score\": \"{:.3f}\".format(len(targets_in_data) / len(targets)),\n", - " \"Number of targets in data\": len(targets_in_data),\n", - " \"Targets in data\": \", \".join(sorted(targets_in_data)),\n", - " \"Other targets\": \", \".join(sorted(targets - targets_in_data)),\n", - " }\n", - " )\n", - " df = pd.DataFrame(df).sort_values(by=['Score', 'Number of targets in data',\n", - " 'Named'],\n", - " ascending=False)\n", - " df.to_csv(fname, sep=\"\\t\", header=True, index=False)\n", - " return df\n", - "\n", - "\n", - "\"\"\"\n", - "def get_ligands_by_receptor(receptors_in_data, ligands_in_data, stmts):\n", - " ligands_by_receptor = defaultdict(set)\n", - " logFC = list(ligands_in_data.keys())\n", - " lg = list(ligands_in_data.values())\n", - "\n", - " for stmt in stmts:\n", - " agent_names = {agent.name for agent in stmt.agent_list()}\n", - " receptors = agent_names & receptors_in_data\n", - " ligands = agent_names & ligands_in_data\n", - " for receptor in receptors:\n", - " ligands_by_receptor[receptor] |= ligands\n", - " return dict(ligands_by_receptor)\n", - "\"\"\"\n", - "\n", - "def get_receptor_by_ligands(receptors_in_data, ligands_in_data, stmts):\n", - " receptor_by_ligands = defaultdict(set)\n", - " for stmt in stmts:\n", - " agent_names = {agent.name for agent in stmt.agent_list()}\n", - " receptors = agent_names & receptors_in_data\n", - " ligands = agent_names & ligands_in_data\n", - " for receptor in receptors:\n", - " receptor_by_ligands[receptor] |= ligands\n", - " return dict(receptor_by_ligands)\n", - "\n", - "\n", - "def filter_out_medscan(stmts):\n", - " logger.info('Filtering out medscan evidence on %d statements' % len(stmts))\n", - " new_stmts = []\n", - " for stmt in stmts:\n", - " new_evidence = [e for e in stmt.evidence if e.source_api != 'medscan']\n", - " if not new_evidence:\n", - " continue\n", - " stmt.evidence = new_evidence\n", - " if not stmt.evidence:\n", - " continue\n", - " new_stmts.append(stmt)\n", - " logger.info('%d statements after filter' % len(new_stmts))\n", - " return new_stmts\n", - "\n", - "\n", - "def filter_db_only(stmts):\n", - " new_stmts = []\n", - " for stmt in stmts:\n", - " sources = {ev.source_api for ev in stmt.evidence}\n", - " if sources <= {'reach', 'sparser', 'trips', 'rlimsp', 'medscan', 'eidos'}:\n", - " continue\n", - " new_stmts.append(stmt)\n", - " return new_stmts\n", - "\n", - "\n", - "def get_cell_type_stats(stmts, ligands, receptors):\n", - " interactome = set()\n", - " ligand_interactions = defaultdict(set)\n", - " for stmt in stmts:\n", - " stmt_ligands = {a.name for a in stmt.agent_list() if\n", - " a.name in ligands}\n", - " stmt_receptors = {a.name for a in stmt.agent_list() if\n", - " a.name in receptors}\n", - " for ligand, receptor in itertools.product(stmt_ligands,\n", - " stmt_receptors):\n", - " interactome.add((ligand, receptor))\n", - " ligand_interactions[ligand].add(receptor)\n", - " return len(interactome), ligand_interactions\n", - "\n", - "\n", - "def plot_interaction_potential(num_interactions_by_cell_type, fname):\n", - " labels = {\n", - " 'DCs': 'Dendritic cells',\n", - " 'Dermal Macs': 'Dermal macrophages',\n", - " 'M2a': 'Reparative macrophages (2a)',\n", - " 'M2b': 'Reparative macrophages (2b)',\n", - " 'Monocytes': 'Monocytes',\n", - " 'Resident Mac': 'Resident macrophages',\n", - " 'Mast cells': 'Mast cells'\n", - " }\n", - " G = networkx.DiGraph()\n", - " for cell_type, num_int in num_interactions_by_cell_type.items():\n", - " G.add_node(cell_type, label=labels[cell_type])\n", - " G.add_edge(cell_type, 'Neurons', label=num_int)\n", - " ag = networkx.nx_agraph.to_agraph(G)\n", - " ag.draw(fname, prog='dot')\n", - "\n", - "\n", - "def get_all_enzymes():\n", - " HOME = str(Path.home())\n", - " ec_code_path = '.obo/ec-code/ec-code.obo'\n", - " if not os.path.exists(os.path.join(HOME, ec_code_path)):\n", - " _ = pyobo.get_id_name_mapping('ec-code')\n", - " obo = obonet.read_obo(os.path.join(HOME, ec_code_path))\n", - " else:\n", - " obo = obonet.read_obo(os.path.join(HOME, ec_code_path))\n", - " up_nodes = set()\n", - " for node in obo.nodes:\n", - " if node.startswith('uniprot'):\n", - " up_nodes.add(node[8:])\n", - " human_ups = {u for u in up_nodes if uniprot_client.is_human(u)}\n", - " enzymes = {uniprot_client.get_gene_name(u) for u in human_ups}\n", - " enzymes = {g for g in enzymes if not hgnc_client.is_kinase(g)}\n", - " enzymes = {g for g in enzymes if not hgnc_client.is_phosphatase(g)}\n", - " logger.info(f'Filtered {len(enzymes)} enzymes in total')\n", - " return enzymes\n", - "\n", - "\n", - "def process_seurat_csv(infile, fc):\n", - " \"\"\" Process Seurat dataframe and only filter in\n", - " genes with the given Fold change \"\"\"\n", - " l_df = pd.read_csv(infile, header=0, sep=\",\")\n", - " l_df.columns = l_df.columns.str.replace('Unnamed: 0', 'Genes')\n", - " filtered_df = l_df[l_df['avg_logFC'] > 0.25][['Genes', 'avg_logFC']]\n", - " filtered_df = filtered_df.sort_values(by='avg_logFC', ascending=False)\n", - " filtered_dict = {}\n", - " for r, c in filtered_df.iterrows():\n", - " filtered_dict[c[1]] = c[0]\n", - " # Volcano plot of DE genes\n", - " _plot_de_genes(l_df)\n", - " # return set(filtered_markers)\n", - " return filtered_dict\n", - "\n", - "\n", - "def get_de_product_list(de_enzyme_product_list,\n", - " de_enzyme_stmts):\n", - " if len(de_enzyme_product_list) > 1:\n", - " de_enzyme_product_list = pd.merge(de_enzyme_stmts, de_enzyme_product_list,\n", - " on=['Enzyme', 'Interaction', 'product', 'logFC'],\n", - " how=\"outer\").fillna('')\n", - " return de_enzyme_product_list.sort_values(by='logFC', ascending=False)\n", - "\n", - " elif len(de_enzyme_product_list) < 1:\n", - " de_enzyme_product_list = de_enzyme_stmts\n", - " return de_enzyme_product_list\n", - "\n", - "\n", - "def get_enzyme_product_interactions(df, de_en_df, receptors_in_data):\n", - " hashes_by_gene_pair = defaultdict(set)\n", - " seen_product = set()\n", - " product_and_fc = defaultdict(set)\n", - " for r, c in de_en_df.iterrows():\n", - " if c[2] not in seen_product:\n", - " product_and_fc[c[2]].add((c[0], c[3]))\n", - " seen_product.add(c[2])\n", - "\n", - " for a, b, hs in zip(df.agA_name, df.agB_name, df.stmt_hash):\n", - " if a in product_and_fc and b in receptors_in_data:\n", - " enzyme_logFC = [e for v in product_and_fc[a]\n", - " for e in v]\n", - " enzyme, logFC = enzyme_logFC[0], enzyme_logFC[1]\n", - " hashes_by_gene_pair[(a, b, enzyme, logFC)].add(hs)\n", - " return hashes_by_gene_pair\n", - "\n", - "\n", - "def get_pain_phenotype(lg, pain_db):\n", - " r_phenotype = defaultdict(set)\n", - " for r, c in pain_db.iterrows():\n", - " if isinstance(pain_db.iloc[r]['gene_symbols'], str):\n", - " l = set(pain_db.iloc[r]['gene_symbols'].split(\",\"))\n", - " pheno = pain_db.iloc[r]['phenotype_description']\n", - " for g in l:\n", - " if g in lg:\n", - " r_phenotype[(g)].add(pheno)\n", - " return r_phenotype\n", - "\n", - "\n", - "def make_rnk(infile):\n", - " df = pd.read_csv(infile, header=0, sep=\",\")\n", - " df.columns = df.columns.str.replace('Unnamed: 0', 'Genes')\n", - " df = df.loc[0:, ['Genes', 'p_val']]\n", - " return df\n", - "\n", - "\n", - "def make_pheno_file(l_phenotype):\n", - " pheno_df = []\n", - " for keys, values in l_phenotype.items():\n", - " pheno_df.append(\n", - " {\n", - " \"Receptor\": keys,\n", - " \"Phenotype_description\": \", \".join(values)\n", - " }\n", - " )\n", - " return pd.DataFrame(pheno_df)\n", - "\n", - "\n", - "def filter_incorrect_curations(stmts):\n", - " # Filter incorrect curations\n", - " indra_op_filtered = ac.filter_by_curation(stmts,\n", - " curations=db_curations)\n", - " return indra_op_filtered\n", - "\n", - "\n", - "def ligand_mgi_to_hgnc_name(seurat_ligand_genes):\n", - " filtered_mgi = defaultdict(set)\n", - " for logfc, gene in seurat_ligand_genes.items():\n", - " if gene in mouse_gene_name_to_mgi:\n", - " filtered_mgi[(gene, logfc)].add(mouse_gene_name_to_mgi[gene])\n", - "\n", - " hgnc_gene_dict = defaultdict(set)\n", - " seen_genes = set()\n", - " for key, value in filtered_mgi.items():\n", - " mgi_id = next(iter(value))\n", - " hgnc_id = get_hgnc_from_mouse(mgi_id)\n", - " hgnc_symbol = get_hgnc_name(hgnc_id)\n", - " if hgnc_symbol not in seen_genes:\n", - " hgnc_gene_dict[(key[1])].add(hgnc_symbol)\n", - " else:\n", - " pass\n", - " seen_genes.add(hgnc_symbol)\n", - " return hgnc_gene_dict\n", - "\n", - "\n", - "def mgi_to_hgnc_name(gene_list):\n", - " \"\"\"Convert given mouse gene symbols to HGNC equivalent symbols\"\"\"\n", - " filtered_mgi = {mouse_gene_name_to_mgi[gene] for gene in gene_list\n", - " if gene in mouse_gene_name_to_mgi}\n", - " hgnc_gene_set = set()\n", - " for mgi_id in filtered_mgi:\n", - " hgnc_id = get_hgnc_from_mouse(mgi_id)\n", - " hgnc_gene_set.add(get_hgnc_name(hgnc_id))\n", - " return hgnc_gene_set\n", - "\n", - "\n", - "def make_interaction_df(interaction_dict):\n", - " interaction_list = [\n", - " {\n", - " 'Agent_A': [stmt[0].agent_list()][0][0].name,\n", - " 'Agent_B': [stmt[0].agent_list()][0][1].name,\n", - " 'Interaction type': re.match(\"\\w+\", str(stmt[0])).group(),\n", - " 'Enzyme': stmt[1],\n", - " 'logFC': fc\n", - " }\n", - " for fc, stmts in interaction_dict.items()\n", - " for stmt in stmts\n", - " if len(stmt[0].agent_list()) > 1\n", - "\n", - " ]\n", - " df = pd.DataFrame(interaction_list)\n", - " df = df.sort_values(by=['logFC'],\n", - " ascending=False)\n", - " return df\n", - "\n", - "\n", - "def create_interaction_digraph(ligand_receptors,\n", - " sorted_enzyme_FC,\n", - " fname):\n", - " '''\n", - " This function takes two dictionaries as input,\n", - " ligand receptors and enzyme fold change and creates\n", - " a interaction Digraph of ligands, enzymes and receptors.\n", - "\n", - " Parameters\n", - " ----------\n", - " celtype_stmts : Optional[list[indra.statements.Statement]]\n", - " A list of INDRA Statements to be assembled.\n", - " network_name : Optional[str]\n", - " The name of the network to be assembled. Default: indra_assembled\n", - "\n", - " Attributes\n", - " ----------\n", - " ligands_dict : dict\n", - " Dict of foldchange and ligands as keys and receptors as values\n", - " enzyme dict : dict\n", - " Dict of foldchange as keys and enzymes as values\n", - " fname : str\n", - " output file name\n", - " '''\n", - "\n", - " ligand_receptors = dict(sorted(ligand_receptors.items(),\n", - " reverse=True))\n", - " G = networkx.DiGraph()\n", - "\n", - " top_lg_rc = dict(sorted(itertools.islice(ligand_receptors.items(), 10)))\n", - " top_en = dict(itertools.islice(sorted_enzyme_FC.items(), 10))\n", - "\n", - " for FC_lg, rcs in top_lg_rc.items():\n", - " for rc in rcs:\n", - " G.add_node(FC_lg[1], color='green')\n", - " G.add_edge(FC_lg[1], rc, label=\"{:.2f}\".format(FC_lg[0]))\n", - " for en_FC, en in top_en.items():\n", - " for chem in enzyme_product_dict[en]:\n", - " for rcs in products_receptors[chem]:\n", - " G.add_node(en, color='red')\n", - " G.add_edge(en, chem, label=\"{:.2f}\".format(en_FC))\n", - " G.add_edge(chem, rcs)\n", - "\n", - " G.graph.setdefault('graph', {})['rankdir'] = 'LR'\n", - " ag = networkx.nx_agraph.to_agraph(G)\n", - " fname = os.path.join(OUTPUT, fname + \"interactions_digraph.pdf\")\n", - " ag.draw(fname, prog='dot')\n", - " \n", - " \n", - "def process_df(workbook):\n", - " wb = openpyxl.load_workbook(workbook)\n", - " df = {\n", - " 'ligands': [row[1].value for row in wb['All.Pairs']][1:], \n", - " 'receptors': [row[3].value for row in wb['All.Pairs']][1:]\n", - " }\n", - " lg_rg = pd.DataFrame(df)\n", - " return lg_rg" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "forbidden-brooklyn", - "metadata": {}, - "outputs": [], - "source": [ - "??ac.filter_direct" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "essential-celtic", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO: [2021-10-13 15:45:58] receptor_ligand_interactions - Got 994 surface proteins from spreadsheet\n", - "WARNING: [2021-10-13 15:46:09] indra.sources.omnipath.processor - 1873 entries without references were skipped\n", - "WARNING: [2021-10-13 15:46:09] indra.sources.omnipath.processor - 137 references with bad pmids were skipped\n", - "WARNING: [2021-10-13 15:46:09] indra.sources.omnipath.processor - 12 entries with conflicting regulation were skipped\n", - "INFO: [2021-10-13 15:46:09] indra.tools.assemble_corpus - Filtering 19812 statements to direct ones...\n", - "INFO: [2021-10-13 15:46:09] indra.tools.assemble_corpus - 19812 statements after filter...\n", - "INFO: [2021-10-13 15:46:09] receptor_ligand_interactions - Filtering 19812 to ligand-receptor interactions\n", - "INFO: [2021-10-13 15:46:09] receptor_ligand_interactions - 1900 left after filter\n", - "INFO: [2021-10-13 15:46:09] indra.tools.assemble_corpus - Filtering 1900 statements to direct ones...\n", - "INFO: [2021-10-13 15:46:09] indra.tools.assemble_corpus - 1900 statements after filter...\n", - "INFO: [2021-10-13 15:46:09] indra.tools.assemble_corpus - Filtering 1900 statements with any incorrect curations...\n", - "INFO: [2021-10-13 15:46:09] indra.tools.assemble_corpus - 1896 statements after filter...\n", - "/opt/miniconda3/envs/INDRA/lib/python3.8/site-packages/openpyxl/worksheet/_reader.py:300: UserWarning: Unknown extension is not supported and will be removed\n", - " warn(msg)\n" - ] - } - ], - "source": [ - "wd = '/Users/sbunga/gitHub/panacea_indra/cellphone_db/'\n", - "\n", - "# Read and extract cell surface proteins from CSPA DB\n", - "wb = openpyxl.load_workbook(SURFACE_PROTEINS_WB)\n", - "surface_protein_set = set(row[4].value for row in wb['Sheet 1']\n", - " if row[6].value == 'yes')\n", - "logger.info('Got %d surface proteins from spreadsheet' %\n", - " len(surface_protein_set))\n", - "ligand_terms = ['cytokine activity', 'hormone activity',\n", - " 'growth factor activity']\n", - "receptor_terms = ['signaling receptor activity']\n", - "\n", - "# Getting GO id's for ligands and receptors by using\n", - "# GO terms\n", - "ligand_go_ids = [bio_ontology.get_id_from_name('GO', term)[1]\n", - " for term in ligand_terms]\n", - "receptor_go_ids = [bio_ontology.get_id_from_name('GO', term)[1]\n", - " for term in receptor_terms]\n", - "\n", - "# Converting GO id's to gene symbols\n", - "ligand_genes_go = get_genes_for_go_ids(ligand_go_ids)\n", - "receptor_genes_go = get_genes_for_go_ids(receptor_go_ids)\n", - "#manual_ligands = {'THBS1'}\n", - "manual_ligands = set()\n", - "\n", - "\n", - "# remove all the receptors from the surface_protein_set\n", - "full_ligand_set = \\\n", - " (surface_protein_set - receptor_genes_go) | ligand_genes_go | \\\n", - " manual_ligands\n", - "\n", - "# Filtering out the nuclear receptors from the receptor list\n", - "receptor_genes_go = filter_nuclear_receptors(receptor_genes_go,\n", - " 'GO:0004879')\n", - "\n", - "\n", - "# Add ION channels to the receptor list\n", - "ion_channels = set()\n", - "with open(ION_CHANNELS, 'r') as fh:\n", - " for line in fh:\n", - " ion_channels.add(line.strip())\n", - "receptor_genes_go |= ion_channels\n", - "\n", - "\n", - "# Collect lists of receptors based on GO annotations and\n", - "# by reading the data\n", - "# Read list of neuro immune genes from the spread sheet\n", - "_, raw_receptor_genes = read_workbook(DATA_SPREADSHEET)\n", - "receptor_genes = mgi_to_hgnc_name(raw_receptor_genes)\n", - "receptors_in_data = receptor_genes & receptor_genes_go\n", - "\n", - "\n", - "# Fetch omnipath database biomolecular interactions and\n", - "# process them into INDRA statements\n", - "op = process_from_web()\n", - "filtered_op_stmts = ac.filter_direct(op.statements)\n", - "\n", - "# Filter statements which are not ligands/receptors from \n", - "# OmniPath database\n", - "op_filtered = filter_op_stmts(op.statements, full_ligand_set,\n", - " receptors_in_data)\n", - "op_filtered = ac.filter_direct(op_filtered)\n", - "\n", - "\n", - "indra_op_filtered = ac.filter_by_curation(op_filtered,\n", - " curations=db_curations)\n", - "\n", - "# Filter complex statements\n", - "indra_op_filtered = filter_complex_statements(indra_op_filtered,\n", - " full_ligand_set,\n", - " receptors_in_data)\n", - "\n", - "receptor_by_ligands = get_receptor_by_ligands(receptors_in_data, \n", - " full_ligand_set, \n", - " indra_op_filtered)\n", - "\n", - "lg_rg = process_df(LIGAND_RECEPTOR_SPREADSHEET)\n", - "\n", - "nature_interactome = defaultdict(set)\n", - "for r,c in lg_rg.iterrows():\n", - " nature_interactome[(c[0])].add(c[1])\n", - " \n", - "hashes = defaultdict(set)\n", - "for a, b, hs in zip(indra_df.agA_name, \n", - " indra_df.agB_name,\n", - " indra_df.stmt_hash):\n", - " if a in nature_interactome and b in nature_interactome[a]:\n", - " hashes[(a, b)].add(hs)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "million-arabic", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO: [2021-10-13 15:59:11] receptor_ligand_interactions - total OP interactions: 1091\n", - "INFO: [2021-10-13 15:59:11] receptor_ligand_interactions - total nature 2015 interactions: 2557\n", - "INFO: [2021-10-13 15:59:11] receptor_ligand_interactions - total unique interactions: 3108\n" - ] - } - ], - "source": [ - "nature_interactions = []\n", - "for r,c in lg_rg.iterrows():\n", - " nature_interactions.append(\n", - " {\n", - " 'ligands': c[0],\n", - " 'receptors': c[1],\n", - " 'interactions': c[0]+'_'+c[1]\n", - "\n", - " }\n", - ")\n", - "nature_df = pd.DataFrame(nature_interactions)\n", - "\n", - "op_interactions = []\n", - "for receptors, ligands in receptor_by_ligands.items():\n", - " for lg in ligands:\n", - " op_interactions.append(\n", - " {\n", - " 'ligands':lg,\n", - " 'receptors': receptors,\n", - " 'interactions': lg+'_'+receptors\n", - " }\n", - " )\n", - "op_df = pd.DataFrame(op_interactions)\n", - "\n", - "\n", - "unique_interactions = set(op_df.interactions) | set(nature_df.interactions)\n", - "\n", - "logger.info('total OP interactions: %d' % (len(set(op_df.interactions))))\n", - "logger.info('total nature 2015 interactions: %d' % len(set(nature_df.interactions)))\n", - "logger.info('total unique interactions: %d' % len(unique_interactions))\n", - "\n", - "op_nature = [{'partner_a':i.split('_')[0],\n", - " 'partner_b':i.split('_')[1]} \n", - " for i in unique_interactions]\n", - "\n", - "op_nature = pd.DataFrame(op_nature)\n", - "op_nature.to_csv(os.path.join(wd, 'output/op_nature_interactions.csv'), \n", - " sep=\",\", index=0)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "id": "pending-watershed", - "metadata": {}, - "outputs": [], - "source": [ - "common_op_nature_interaction = set(op_df.interactions) & set(nature_df.interactions)\n", - "common_op_nature_interaction = pd.DataFrame([{'interactions':i} for i in common_op_nature_interaction])\n", - "common_op_nature_interaction.to_csv(os.path.join(wd, 'output/common_op_nature_interaction.csv'), \n", - " sep=\",\", index=0)\n", - "\n", - "op_specific = set(op_df.interactions) - set(nature_df.interactions)\n", - "op_specific = pd.DataFrame([{'interactions':i} for i in op_specific])\n", - "op_specific.to_csv(os.path.join(wd, 'output/op_specific.csv'), \n", - " sep=\",\", index=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "great-sharing", - "metadata": {}, - "outputs": [], - "source": [ - "up_hgnc = {v: k\n", - " for k, v in um.uniprot_gene_name.items()\n", - " if k in um.uniprot_hgnc}\n", - "\n", - "\n", - "dataframe = []\n", - "count=0\n", - "\n", - "for r,c in op_nature.iterrows():\n", - " count+=1\n", - " if c[0] in up_hgnc and c[1] in up_hgnc:\n", - " dataframe.append(\n", - " {\n", - " 'id_cp_interaction':'Woolf-'+str(count),\n", - " 'partner_a': up_hgnc[c[0]],\n", - " 'partner_b': up_hgnc[c[1]],\n", - " 'source':'NEURO_IMMUNE_PROJECT'\n", - " }\n", - " )\n", - "\n", - "df = pd.DataFrame(dataframe)\n", - "df.to_csv(os.path.join(wd, 'output/op_nature_uniprot.csv'), \n", - " sep=\",\", index=0)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/pain_model/output/gene_pct_drg_clusters.csv b/pain_model/output/gene_pct_drg_clusters.csv index f12912c..8de8663 100644 --- a/pain_model/output/gene_pct_drg_clusters.csv +++ b/pain_model/output/gene_pct_drg_clusters.csv @@ -118,6 +118,7 @@ "S100a10","S100 calcium binding protein A10","Cytoplasm","other",41.07,70.43,37.15,38.48,43.97,37.56,61.86,63.29,63.32,59.12,39,72.47,63.2,32.78,20.97,33.17,27.55,26.57,36.67,55.48 "S100a4","S100 calcium binding protein A4","Cytoplasm","other",6.17,1.45,3.49,2.88,23.51,2.44,1.34,1.47,1.05,2.91,1.03,4.24,17.59,4.56,13.27,36.57,9.42,12.52,17.33,4.09 "S100a6","S100 calcium binding protein A6","Cytoplasm","transporter",47.4,68.82,46.26,51.94,55.97,66.5,65.2,26.66,54.61,83.39,47.1,70.95,73.09,46.95,70.61,66.06,46.49,60.24,59.82,87.01 +"S100a7a","S100 calcium binding protein A7A","Cytoplasm","other",0.32,1.25,0.49,0.47,0.35,0.81,0.45,0.38,0.88,1.73,3.27,1.86,1.99,0.33,1,0.44,0.51,1.04,0.62,2.17 "S1pr3","sphingosine-1-phosphate receptor 3","Plasma Membrane","G-protein coupled receptor",6.82,3.51,9.24,6.6,7.77,9.92,15.58,5.3,27.73,3.35,2.63,40.63,3.72,12.14,7.28,20.61,39.69,3.89,37.89,9.56 "Scn10a","sodium voltage-gated channel alpha subunit 10","Plasma Membrane","ion channel",9.74,38.5,10.79,9.25,8.7,13.98,42.45,5.02,5.83,60.68,45.43,52.71,5.28,10.38,1.43,3.34,7.24,6.05,9.4,58.89 "Scn11a","sodium voltage-gated channel alpha subunit 11","Plasma Membrane","ion channel",7.47,34.61,10.02,8.31,8.25,7.32,18.15,5.13,5.97,58.02,23.03,42.18,5.32,8.9,0.57,5.21,6.25,5.72,8.89,47.37 diff --git a/pain_model/scripts/make_ranks.py b/pain_model/scripts/make_ranks.py index eb68260..a901958 100644 --- a/pain_model/scripts/make_ranks.py +++ b/pain_model/scripts/make_ranks.py @@ -21,6 +21,11 @@ proteomics_files = { 'protein_exp': os.path.join(OUTPUT, 'protein_exp.csv'), } + +# Read phospho data +phospho_files = { + 'phospho': os.path.join(OUTPUT, 'phospho.csv') +} # # Check if all the files exists all(True for k, v in transcript_files.items() if os.path.isfile(transcript_files[k])) @@ -69,6 +74,7 @@ proteomics_df.drop(['Unnamed: 0', 'Description', 'MOUSE_SYMBOL'], axis=1, inplace=True) proteomics_df = proteomics_df.loc[proteomics_df.index.drop_duplicates(keep=False), ] + # Create a rank column with 100 as base value rank_df[['score']] = 100.00 for genes in df_merged.index: diff --git a/pain_model/scripts/process_phospho.py b/pain_model/scripts/process_phospho.py index b7f9ec6..1e3db7a 100644 --- a/pain_model/scripts/process_phospho.py +++ b/pain_model/scripts/process_phospho.py @@ -6,7 +6,8 @@ HERE = os.getcwd() # Read phospho data -phospho_xl = os.path.join(HERE, os.pardir, 'data/Primary_mouse/Proteomics/phos_PANA_16plx_Oct2020_working_forSam.xlsx') +phospho_xl = os.path.join(HERE, os.pardir, + 'data/Primary_mouse/Proteomics/phos_PANA_16plx_Oct2020_working_forSam.xlsx') phospho_df = pd.read_excel(phospho_xl, sheet_name='P31_16plx_Panacea_singl&comp') # Read enriched genes @@ -22,7 +23,7 @@ df = dict() -for r,c in phospho_df.iterrows(): +for r, c in phospho_df.iterrows(): if c[2] not in df.keys(): df[c[2]] = { 'Site Position': [c[4]], diff --git a/panacea_indra/nextflow/scripts/compare_indra_cellphonedb.py b/panacea_indra/nextflow/scripts/compare_indra_cellphonedb.py index 8d207ca..9c5f8cc 100644 --- a/panacea_indra/nextflow/scripts/compare_indra_cellphonedb.py +++ b/panacea_indra/nextflow/scripts/compare_indra_cellphonedb.py @@ -8,21 +8,21 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger('Compare_interactions') - up_hgnc = {v: k for k, v in um.uniprot_gene_name.items() if k in um.uniprot_hgnc} -hgnc_up = {v:k for k,v in up_hgnc.items()} - +hgnc_up = {v: k for k, v in up_hgnc.items()} __file__ = '/Users/sbunga/gitHub/panacea_indra/panacea_indra/nextflow/scripts/compare_indra_cellphonedb.py' HERE = os.path.dirname(__file__) OUTPUT = os.path.join(HERE, os.pardir, 'output/') -indra_interactions = pd.read_csv(os.path.join(OUTPUT, 'cellphonedb_database', 'indra_op_nature', 'interaction_input.csv')) -cellphone_interactions = pd.read_csv(os.path.join(OUTPUT, 'cellphonedb_database', 'cellphonedb', 'interaction_input.csv')) +indra_interactions = pd.read_csv( + os.path.join(OUTPUT, 'cellphonedb_database', 'indra_op_nature', 'interaction_input.csv')) +cellphone_interactions = pd.read_csv( + os.path.join(OUTPUT, 'cellphonedb_database', 'cellphonedb', 'interaction_input.csv')) -indra_partner_interactions = {v[0]+'_'+v[1] for v in indra_interactions.values} -cellphone_partner_interactions = {v[0]+'_'+v[1] for v in cellphone_interactions.values} +indra_partner_interactions = {v[0] + '_' + v[1] for v in indra_interactions.values} +cellphone_partner_interactions = {v[0] + '_' + v[1] for v in cellphone_interactions.values} # Total indra interactions logger.info('Total interactions in INDRA, OP and nature paper: %d' % (len(indra_partner_interactions))) @@ -40,7 +40,6 @@ # Unique interactions to cellphonedb logger.info('Unique interactions to cellphonedb: %d' % len(cellphone_partner_interactions - indra_partner_interactions)) - unique_to_cdb = list(cellphone_partner_interactions - indra_partner_interactions) # Map back the interactions to HGNC @@ -48,29 +47,30 @@ for i in unique_to_cdb if len(i.split("_")) == 2 if i.split("_")[0] in hgnc_up and i.split("_")[1] in hgnc_up} - # Get all the receptors from indra interactome rg = get_receptors() +pd.DataFrame({'Receptors': list(rg)}).to_csv(os.path.join(OUTPUT, 'go_receptors.csv'), + index=False, header=True) # Get all ligands from the indra interactome lg = get_ligands() -# Load the protein data from cellphonedb +# Get cellphonedb receptors +cdb_receptor = get_cdb_receptors() protein_generated_cdb = \ pd.read_csv(os.path.join(OUTPUT, 'cellphonedb_database', 'cellphonedb', 'protein_generated.csv')) - -# Get receptors == True from protein_generated.csv table -cdb_receptor = protein_generated_cdb['uniprot'][protein_generated_cdb.receptor == True] -cdb_receptor = {hgnc_up[c] for c in cdb_receptor if c in hgnc_up} - +pd.DataFrame({'Receptors': list(cdb_receptor)}).to_csv(os.path.join(OUTPUT, 'cdb_receptors.csv'), + index=False, header=True) # Receptors unique to cellphonedb unique_rg_cdb = cdb_receptor - rg logger.info('Receptors unique to cellphonedb curation list: %d' % len(cdb_receptor - rg)) # save the list pd.DataFrame({'Receptors': list(unique_rg_cdb)}).to_csv(os.path.join(OUTPUT, 'unique_cdb_receptors.csv'), - index=False, header=True) + index=False, header=True) # Receptors unique to INDRA logger.info('Receptors unique to INDRA: %d' % len(rg - cdb_receptor)) +pd.DataFrame({'Receptors': list(rg - cdb_receptor)}).to_csv(os.path.join(OUTPUT, 'go_unique_receptors.csv'), + index=False, header=True) # How many receptors in INDRA are curated as Receptor in protein_generated table from cellphonedb non_rg = protein_generated_cdb['uniprot'][protein_generated_cdb.receptor == False] @@ -80,7 +80,6 @@ logger.info('Total receptors in INDRA which are curated as receptors by cellphonedb: %d' % (len(cdb_receptor & rg))) # save receptors from INDRA which are curated as False by cellphonedb - pd.DataFrame({'Receptors': list(non_rg & rg)}).to_csv(os.path.join(OUTPUT, 'indra_not_receptors_by_cdb.csv'), index=False, header=True) @@ -112,4 +111,4 @@ # check to which ontology these missing receptors belong to len((indra_non_rg - ion_channels) & receptors_ontology[('receptors')]) -# Subset the protein table with the INDRA receptors which are annotated as False \ No newline at end of file +# Subset the protein table with the INDRA receptors which are annotated as False