diff --git a/docs/_sources/index.rst b/docs/_sources/index.rst index 513c317..28e6287 100644 --- a/docs/_sources/index.rst +++ b/docs/_sources/index.rst @@ -75,10 +75,10 @@ The 'data_preprocessing' directory holds all the data generation steps and divid - Scripts * - Download human genome assembly (GRCh37/hg19) and `uniform TFBS `_ - ``0_download_data.py`` - * - - - - * - - - + * - Preprocess TFBS narrowpeak files and human genome + - ``1_preprocess_narrowPeaks_and_humanGenome.sh`` + * - Overlapping computation for label association + - ``2.1_compute_overlappings_job.sh``
``2.2_compute_overlappings.sh``
``3_postprocess.sh`` * - - * - @@ -89,8 +89,8 @@ The 'data_preprocessing' directory holds all the data generation steps and divid - .. | | `````` | -.. | Preprocess TFBS narrowpeak files and human genome | ```1_preprocess_narrowPeaks_and_humanGenome.sh``` | -.. | Overlapping computation for label association | ```2.1_compute_overlappings_job.sh```
```2.2_compute_overlappings.sh```
```3_postprocess.sh``` | +.. | | ````` | +.. | | | .. | Label association | ```5.1_extract_bins_containingOtherThanACGT.ipynb```
```5.2_compute_peaks_with_labels_clean.sh```| .. | Data preprocessing for DNA breathing dynamics generation and DNABERT2 | ```6.1_create_data_for_pydnaepbd.ipynb```
```6.2_create_data_for_dnabert2.ipynb``` | .. | Train/validation/test split| ```7_create_train_val_test_set.ipynb``` | diff --git a/docs/doctrees/environment.pickle b/docs/doctrees/environment.pickle index 65afad2..ca63671 100644 Binary files a/docs/doctrees/environment.pickle and b/docs/doctrees/environment.pickle differ diff --git a/docs/doctrees/index.doctree b/docs/doctrees/index.doctree index a767559..1b51dd6 100644 Binary files a/docs/doctrees/index.doctree and b/docs/doctrees/index.doctree differ diff --git a/docs/index.html b/docs/index.html index 5b32fa1..358ebf6 100644 --- a/docs/index.html +++ b/docs/index.html @@ -409,11 +409,11 @@

Data Preprocessing Steps

Download human genome assembly (GRCh37/hg19) and uniform TFBS

0_download_data.py

- - +

Preprocess TFBS narrowpeak files and human genome

+

1_preprocess_narrowPeaks_and_humanGenome.sh

- - +

Overlapping computation for label association

+

2.1_compute_overlappings_job.sh``<br /> ``2.2_compute_overlappings.sh``<br /> ``3_postprocess.sh

diff --git a/docs/searchindex.js b/docs/searchindex.js index 260dcc7..b4d82b2 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["index", "modules/epbd_bert", "modules/epbd_bert.datasets", "modules/epbd_bert.dnabert2_classifier", "modules/epbd_bert.dnabert2_epbd", "modules/epbd_bert.dnabert2_epbd_crossattn", "modules/epbd_bert.utility", "modules/modules"], "filenames": ["index.rst", "modules/epbd_bert.rst", "modules/epbd_bert.datasets.rst", "modules/epbd_bert.dnabert2_classifier.rst", "modules/epbd_bert.dnabert2_epbd.rst", "modules/epbd_bert.dnabert2_epbd_crossattn.rst", "modules/epbd_bert.utility.rst", "modules/modules.rst"], "titles": ["Welcome to EPBD-BERT\u2019s documentation!", "epbd_bert package", "epbd_bert.datasets package", "epbd_bert.dnabert2_classifier package", "epbd_bert.dnabert2_epbd package", "epbd_bert.dnabert2_epbd_crossattn package", "epbd_bert.utility package", "epbd_bert"], "terms": {"thi": [0, 5], "repositori": 0, "correspond": 0, "articl": 0, "titl": 0, "advanc": 0, "transcript": 0, "factor": 0, "bind": 0, "site": 0, "predict": 0, "us": 0, "dna": 0, "breath": 0, "dynam": 0, "sequenc": 0, "transform": [0, 2], "via": 0, "cross": 0, "attent": 0, "figur": 0, "1": [0, 3, 4, 5], "overview": 0, "propos": 0, "epbdxbert": 0, "framework": 0, "understand": 0, "impact": 0, "genom": 0, "variant": 0, "gene": 0, "regul": 0, "remain": 0, "kei": 0, "area": 0, "research": 0, "implic": 0, "unravel": 0, "complex": 0, "mechan": 0, "underli": 0, "variou": 0, "function": [0, 5], "effect": 0, "softwar": 0, "delv": 0, "role": 0, "biophys": 0, "properti": 0, "includ": 0, "thermodynam": 0, "stabil": 0, "shape": 0, "flexibl": 0, "tf": 0, "In": 0, "librari": 0, "we": 0, "have": 0, "multi": [0, 2], "modal": [0, 2], "deep": 0, "learn": 0, "integr": 0, "chip": 0, "seq": [0, 2], "chromatin": 0, "immunoprecipit": 0, "vivo": 0, "involv": 0, "690": [0, 3, 4, 5], "event": 0, "human": 0, "our": 0, "significantli": 0, "improv": 0, "perform": [0, 5], "over": 0, "660": 0, "up": 0, "9": [0, 5], "6": 0, "increas": 0, "auroc": 0, "metric": 0, "compar": 0, "baselin": 0, "when": 0, "explicitli": 0, "further": 0, "expand": 0, "analysi": 0, "vitro": 0, "high": 0, "throughput": 0, "systemat": 0, "evolut": 0, "ligand": 0, "exponenti": 0, "enrich": 0, "selex": 0, "establish": 0, "The": 0, "inclus": 0, "featur": [0, 2], "consist": 0, "across": 0, "differ": 0, "cell": 0, "line": 0, "notabl": 0, "dnabert2": [0, 1, 7], "provid": 0, "greater": 0, "capabl": 0, "insight": 0, "diseas": 0, "relat": 0, "non": 0, "code": 0, "found": 0, "wide": 0, "associ": 0, "studi": 0, "work": 0, "highlight": 0, "import": 0, "characterist": 0, "paper": 0, "notebook": 0, "virtur": 0, "environ": 0, "git": 0, "clone": 0, "http": 0, "github": 0, "com": 0, "lanl": 0, "cd": 0, "conda": 0, "creat": 0, "c": 0, "forg": 0, "p": 0, "venv": 0, "epbd_bert_condavenv_test1": 0, "python": 0, "3": [0, 5], "11": [0, 5], "y": 0, "activ": 0, "setup": 0, "py": 0, "scikit": 0, "scipi": 0, "pip": 0, "uninstal": 0, "triton": 0, "did": 0, "util": [0, 1, 7], "hardwar": 0, "depend": 0, "To": 0, "deactiv": 0, "remov": 0, "name": 0, "all": [0, 5], "data_preprocess": 0, "directori": 0, "hold": 0, "gener": 0, "divid": 0, "modul": [0, 7], "bug": 0, "track": 0, "bedtool": 0, "readthedoc": 0, "io": 0, "en": 0, "latest": 0, "oper": 0, "follow": 0, "guid": 0, "content": [0, 7], "html": 0, "also": 0, "bare": 0, "minimum": 0, "script": 0, "download": 0, "pre": 0, "compil": 0, "binari": 0, "bash": 0, "setup_bedtool": 0, "sh": 0, "export": 0, "path": [0, 6], "pwd": 0, "assembli": 0, "grch37": 0, "hg19": 0, "uniform": 0, "tfb": 0, "0_download_data": 0, "can": 0, "from": [0, 2], "here": 0, "usag": 0, "epbd_bert": 0, "sequence_dataset": [0, 1, 7], "onli": 0, "sequence_epbd_dataset": [0, 1, 7], "flat": 0, "sequence_epbd_multimodal_dataset": [0, 1, 7], "matrix": 0, "note": 0, "There": 0, "ar": 0, "some": 0, "other": 0, "each": 0, "exampl": 0, "run": [0, 5], "instruct": 0, "bottom": 0, "finetun": 0, "dnabert2_classifi": [0, 1, 7], "train_lightn": [0, 1, 7], "valid": 0, "split": 0, "vanillaepbd": 0, "coordflip": 0, "dnabert2_epbd": [0, 1, 7], "epbdxdnabert": 0, "2": 0, "dnabert2_epbd_crossattn": [0, 1, 7], "detail": 0, "ablat": 0, "www": 0, "biorxiv": 0, "org": 0, "10": 0, "1101": 0, "2024": 0, "01": [0, 5], "16": 0, "575935v2": 0, "abstract": 0, "m": 0, "lo": 0, "alamo": 0, "nation": 0, "lab": 0, "t": 0, "triad": 0, "secur": 0, "llc": 0, "right": 0, "reserv": 0, "program": 0, "wa": 0, "produc": 0, "under": 0, "u": 0, "govern": 0, "contract": 0, "89233218cna000001": 0, "laboratori": 0, "which": 0, "i": 0, "depart": 0, "energi": 0, "nuclear": 0, "administr": 0, "grant": 0, "itself": 0, "act": 0, "its": 0, "behalf": 0, "nonexclus": 0, "paid": 0, "irrevoc": 0, "worldwid": 0, "materi": 0, "reproduc": 0, "prepar": 0, "deriv": 0, "distribut": 0, "copi": 0, "public": 0, "publicli": 0, "displai": 0, "permit": 0, "do": 0, "so": 0, "o": 0, "4717": 0, "open": 0, "sourc": [0, 1, 2, 3, 4, 5, 6], "bsd": 0, "redistribut": 0, "form": 0, "without": 0, "modif": 0, "condit": 0, "met": 0, "must": 0, "retain": 0, "abov": 0, "list": 0, "disclaim": 0, "neither": 0, "holder": 0, "nor": 0, "contributor": 0, "mai": 0, "endors": 0, "promot": 0, "product": 0, "specif": 0, "prior": 0, "written": 0, "permiss": 0, "BY": 0, "THE": 0, "AND": 0, "AS": 0, "ani": 0, "express": 0, "OR": 0, "impli": 0, "warranti": 0, "BUT": 0, "NOT": 0, "limit": 0, "TO": 0, "OF": 0, "merchant": 0, "fit": 0, "FOR": 0, "A": 0, "particular": 0, "purpos": 0, "IN": 0, "NO": 0, "shall": 0, "BE": 0, "liabl": 0, "direct": 0, "indirect": 0, "incident": 0, "special": 0, "exemplari": 0, "consequenti": 0, "damag": 0, "procur": 0, "substitut": 0, "good": 0, "servic": 0, "loss": 0, "profit": 0, "busi": 0, "interrupt": 0, "howev": 0, "caus": 0, "ON": 0, "theori": 0, "liabil": 0, "whether": 0, "strict": 0, "tort": 0, "neglig": 0, "otherwis": 0, "aris": 0, "wai": 0, "out": 0, "even": 0, "IF": 0, "advis": 0, "possibl": 0, "SUCH": 0, "packag": [0, 7], "subpackag": [0, 7], "submodul": [0, 7], "models_factori": [0, 7], "get_model_and_dataload": [0, 1, 7], "path_config": [0, 7], "version": [0, 7], "index": 0, "search": 0, "page": 0, "anowarul": 0, "kabir": 0, "comput": [0, 5], "sciec": 0, "georg": 0, "mason": 0, "univers": 0, "manish": 0, "bhattarai": 0, "theoret": 0, "divis": 0, "kim": 0, "rasmussen": 0, "amarda": 0, "shehu": 0, "alan": 0, "bishop": 0, "boian": 0, "alexandrov": 0, "anni": 0, "usheva": 0, "surgeri": 0, "rhode": 0, "island": 0, "hospit": 0, "brown": 0, "kabir2024advanc": 0, "r": 0, "journal": 0, "year": 0, "publish": 0, "cold": 0, "spring": 0, "harbor": 0, "doi": 0, "5281": 0, "zenodo": 0, "11130474": 0, "url": 0, "dataset": [1, 7], "data_col": [1, 7], "seqlabeldatacol": [1, 2], "seqlabelepbddatacol": [1, 2], "sequencedataset": [1, 2], "sequenceepbddataset": [1, 2], "sequenceepbdmultimodaldataset": [1, 2], "sequence_epbd_multimodal_labelspecific_dataset": [1, 7], "sequenceepbdmultimodallabelspecificdataset": [1, 2], "sequence_randepbd_dataset": [1, 7], "sequencerandepbddataset": [1, 2], "sequence_randepbd_multimodal_dataset": [1, 7], "sequencerandepbdmultimodaldataset": [1, 2], "config": [1, 7], "batch_siz": [1, 3, 4, 5], "best_model_monitor": [1, 3, 4, 5], "best_model_monitor_mod": [1, 3, 4, 5], "learning_r": [1, 3, 4, 5], "max_epoch": [1, 3, 4, 5], "n_class": [1, 3, 4, 5], "num_work": [1, 3, 4, 5], "weight_decai": [1, 3, 4, 5], "model": [1, 7], "dnabert2classifi": [1, 3], "calculate_loss": [1, 3, 4, 5], "configure_optim": [1, 3, 4, 5], "forward": [1, 3, 4, 5], "on_validation_epoch_end": [1, 3, 4, 5], "training_step": [1, 3, 4, 5], "validation_step": [1, 3, 4, 5], "test": [1, 7], "epbd_feature_input_dim": [1, 4], "epbd_features_typ": [1, 4], "dnabert2epbdmodel": [1, 4], "epbdconfig": [1, 5], "d_ff": [1, 5], "d_model": [1, 5], "epbd_embedder_kernel_s": [1, 5], "epbd_feature_channel": [1, 5], "need_weight": [1, 5], "num_head": [1, 5], "p_dropout": [1, 5], "trainingconfig": [1, 5], "epbddnabert2model": [1, 5], "load_pretrained_model": [1, 5], "epbdembedd": [1, 5], "multimodallay": [1, 5], "poolinglay": [1, 5], "positionwisefeedforward": [1, 5], "compute_predict": [1, 5], "data_util": [1, 7], "compute_multi_class_weight": [1, 6], "get_uniform_peaks_metadata": [1, 6], "get_dnabert2_pretrained_model": [1, 6], "get_dnabert2_token": [1, 6], "load_dnabert2_for_classif": [1, 6], "pickle_util": [1, 7], "load": [1, 6], "save": [1, 6], "model_nam": 1, "data_path": [1, 2], "token": [1, 2], "pretrainedtoken": [1, 2], "64": 1, "8": [1, 5], "class": [2, 3, 4, 5], "pad_token_id": 2, "0": [2, 3, 4, 5], "base": [2, 3, 4, 5], "object": [2, 3, 4, 5], "str": [2, 3, 4, 5], "home_dir": [2, 6], "supervis": 2, "fine": 2, "tune": 2, "pydnaepbd_features_path": 2, "epbd": 2, "label": 2, "wgencodeawgtfbsbroaddnd41ctcfunipk": 2, "int": [3, 4, 5], "170": [3, 4], "32": [3, 4, 5], "float": [3, 4, 5], "1e": [3, 4, 5], "05": [3, 4, 5], "100": [3, 4], "val_loss": [3, 4, 5], "min": [3, 4, 5], "lightningmodul": [3, 4, 5], "_summary_": [3, 4, 5], "paramet": [3, 4, 5], "_description_": [3, 4, 5], "logit": [3, 4, 5], "tensor": [3, 4, 5], "target": [3, 4, 5], "torch": [3, 4, 5], "return": [3, 4, 5], "type": [3, 4, 5], "_type_": [3, 4, 5], "input": [3, 4, 5], "batch": [3, 4, 5], "batch_idx": [3, 4, 5], "1200": 4, "lightn": 4, "5": 5, "256": 5, "768": 5, "bool": 5, "fals": 5, "classmethod": 5, "checkpoint_path": 5, "mode": 5, "eval": 5, "option": 5, "default": 5, "in_channel": 5, "kernel_s": 5, "initi": 5, "intern": 5, "state": 5, "share": 5, "both": 5, "nn": 5, "scriptmodul": 5, "x": 5, "defin": 5, "everi": 5, "call": 5, "should": 5, "overridden": 5, "subclass": 5, "although": 5, "recip": 5, "pass": 5, "need": 5, "within": 5, "one": 5, "instanc": 5, "afterward": 5, "instead": 5, "sinc": 5, "former": 5, "take": 5, "care": 5, "regist": 5, "hook": 5, "while": 5, "latter": 5, "silent": 5, "ignor": 5, "them": 5, "epbd_embed": 5, "seq_embed": 5, "key_padding_mask": 5, "none": 5, "dropout": 5, "dl": 5, "dataload": 5, "output_preds_path": 5, "compute_again": 5, "usr": 6, "project": 6, "pydna_epbd": 6, "tf_dna_bind": 6, "max_num_token": 6, "512": 6, "num_label": 6, "data": 6}, "objects": {"": [[1, 0, 0, "-", "epbd_bert"]], "epbd_bert": [[2, 0, 0, "-", "datasets"], [3, 0, 0, "-", "dnabert2_classifier"], [4, 0, 0, "-", "dnabert2_epbd"], [5, 0, 0, "-", "dnabert2_epbd_crossattn"], [1, 0, 0, "-", "models_factory"], [1, 0, 0, "-", "path_configs"], [6, 0, 0, "-", "utility"], [1, 0, 0, "-", "version"]], "epbd_bert.datasets": [[2, 0, 0, "-", "data_collators"], [2, 0, 0, "-", "sequence_dataset"], [2, 0, 0, "-", "sequence_epbd_dataset"], [2, 0, 0, "-", "sequence_epbd_multimodal_dataset"], [2, 0, 0, "-", "sequence_epbd_multimodal_labelspecific_dataset"], [2, 0, 0, "-", "sequence_randepbd_dataset"], [2, 0, 0, "-", "sequence_randepbd_multimodal_dataset"]], "epbd_bert.datasets.data_collators": [[2, 1, 1, "", "SeqLabelDataCollator"], [2, 1, 1, "", "SeqLabelEPBDDataCollator"]], "epbd_bert.datasets.sequence_dataset": [[2, 1, 1, "", "SequenceDataset"]], "epbd_bert.datasets.sequence_epbd_dataset": [[2, 1, 1, "", "SequenceEPBDDataset"]], "epbd_bert.datasets.sequence_epbd_multimodal_dataset": [[2, 1, 1, "", "SequenceEPBDMultiModalDataset"]], "epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset": [[2, 1, 1, "", "SequenceEPBDMultiModalLabelSpecificDataset"]], "epbd_bert.datasets.sequence_randepbd_dataset": [[2, 1, 1, "", "SequenceRandEPBDDataset"]], "epbd_bert.datasets.sequence_randepbd_multimodal_dataset": [[2, 1, 1, "", "SequenceRandEPBDMultiModalDataset"]], "epbd_bert.dnabert2_classifier": [[3, 0, 0, "-", "configs"], [3, 0, 0, "-", "model"], [3, 0, 0, "-", "test"], [3, 0, 0, "-", "train_lightning"]], "epbd_bert.dnabert2_classifier.configs": [[3, 1, 1, "", "Configs"]], "epbd_bert.dnabert2_classifier.configs.Configs": [[3, 2, 1, "", "batch_size"], [3, 2, 1, "", "best_model_monitor"], [3, 2, 1, "", "best_model_monitor_mode"], [3, 2, 1, "", "learning_rate"], [3, 2, 1, "", "max_epochs"], [3, 2, 1, "", "n_classes"], [3, 2, 1, "", "num_workers"], [3, 2, 1, "", "weight_decay"]], "epbd_bert.dnabert2_classifier.model": [[3, 1, 1, "", "DNABERT2Classifier"]], "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier": [[3, 3, 1, "", "calculate_loss"], [3, 3, 1, "", "configure_optimizers"], [3, 3, 1, "", "forward"], [3, 3, 1, "", "on_validation_epoch_end"], [3, 3, 1, "", "training_step"], [3, 3, 1, "", "validation_step"]], "epbd_bert.dnabert2_epbd": [[4, 0, 0, "-", "configs"], [4, 0, 0, "-", "model"], [4, 0, 0, "-", "test"], [4, 0, 0, "-", "train_lightning"]], "epbd_bert.dnabert2_epbd.configs": [[4, 1, 1, "", "Configs"]], "epbd_bert.dnabert2_epbd.configs.Configs": [[4, 2, 1, "", "batch_size"], [4, 2, 1, "", "best_model_monitor"], [4, 2, 1, "", "best_model_monitor_mode"], [4, 2, 1, "", "epbd_feature_input_dim"], [4, 2, 1, "", "epbd_features_type"], [4, 2, 1, "", "learning_rate"], [4, 2, 1, "", "max_epochs"], [4, 2, 1, "", "n_classes"], [4, 2, 1, "", "num_workers"], [4, 2, 1, "", "weight_decay"]], "epbd_bert.dnabert2_epbd.model": [[4, 1, 1, "", "Dnabert2EPBDModel"]], "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel": [[4, 3, 1, "", "calculate_loss"], [4, 3, 1, "", "configure_optimizers"], [4, 3, 1, "", "forward"], [4, 3, 1, "", "on_validation_epoch_end"], [4, 3, 1, "", "training_step"], [4, 3, 1, "", "validation_step"]], "epbd_bert.dnabert2_epbd_crossattn": [[5, 0, 0, "-", "configs"], [5, 0, 0, "-", "model"], [5, 0, 0, "-", "test"], [5, 0, 0, "-", "train_lightning"]], "epbd_bert.dnabert2_epbd_crossattn.configs": [[5, 1, 1, "", "EPBDConfigs"], [5, 1, 1, "", "TrainingConfigs"]], "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs": [[5, 2, 1, "", "best_model_monitor"], [5, 2, 1, "", "best_model_monitor_mode"], [5, 2, 1, "", "d_ff"], [5, 2, 1, "", "d_model"], [5, 2, 1, "", "epbd_embedder_kernel_size"], [5, 2, 1, "", "epbd_feature_channels"], [5, 2, 1, "", "n_classes"], [5, 2, 1, "", "need_weights"], [5, 2, 1, "", "num_heads"], [5, 2, 1, "", "p_dropout"]], "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs": [[5, 2, 1, "", "batch_size"], [5, 2, 1, "", "learning_rate"], [5, 2, 1, "", "max_epochs"], [5, 2, 1, "", "num_workers"], [5, 2, 1, "", "weight_decay"]], "epbd_bert.dnabert2_epbd_crossattn.model": [[5, 1, 1, "", "EPBDDnabert2Model"], [5, 1, 1, "", "EPBDEmbedder"], [5, 1, 1, "", "MultiModalLayer"], [5, 1, 1, "", "PoolingLayer"], [5, 1, 1, "", "PositionWiseFeedForward"]], "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model": [[5, 3, 1, "", "calculate_loss"], [5, 3, 1, "", "configure_optimizers"], [5, 3, 1, "", "forward"], [5, 3, 1, "", "load_pretrained_model"], [5, 3, 1, "", "on_validation_epoch_end"], [5, 3, 1, "", "training_step"], [5, 3, 1, "", "validation_step"]], "epbd_bert.dnabert2_epbd_crossattn.model.EPBDEmbedder": [[5, 3, 1, "", "forward"]], "epbd_bert.dnabert2_epbd_crossattn.model.MultiModalLayer": [[5, 3, 1, "", "forward"]], "epbd_bert.dnabert2_epbd_crossattn.model.PoolingLayer": [[5, 3, 1, "", "forward"]], "epbd_bert.dnabert2_epbd_crossattn.model.PositionWiseFeedForward": [[5, 3, 1, "", "forward"]], "epbd_bert.dnabert2_epbd_crossattn.test": [[5, 4, 1, "", "compute_predictions"]], "epbd_bert.models_factory": [[1, 4, 1, "", "get_model_and_dataloader"]], "epbd_bert.utility": [[6, 0, 0, "-", "data_utils"], [6, 0, 0, "-", "dnabert2"], [6, 0, 0, "-", "pickle_utils"]], "epbd_bert.utility.data_utils": [[6, 4, 1, "", "compute_multi_class_weights"], [6, 4, 1, "", "get_uniform_peaks_metadata"]], "epbd_bert.utility.dnabert2": [[6, 4, 1, "", "get_dnabert2_pretrained_model"], [6, 4, 1, "", "get_dnabert2_tokenizer"], [6, 4, 1, "", "load_dnabert2_for_classification"]], "epbd_bert.utility.pickle_utils": [[6, 4, 1, "", "load"], [6, 4, 1, "", "save"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:method", "4": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "method", "Python method"], "4": ["py", "function", "Python function"]}, "titleterms": {"welcom": 0, "epbd": 0, "bert": 0, "": 0, "document": 0, "resourc": 0, "instal": 0, "data": 0, "preprocess": 0, "step": 0, "dataset": [0, 2], "load": 0, "train": 0, "test": [0, 3, 4, 5], "develop": 0, "model": [0, 3, 4, 5], "acknowledg": 0, "copyright": 0, "notic": 0, "licens": 0, "indic": 0, "tabl": 0, "author": 0, "how": 0, "cite": 0, "epbd_bert": [1, 2, 3, 4, 5, 6, 7], "packag": [1, 2, 3, 4, 5, 6], "subpackag": 1, "submodul": [1, 2, 3, 4, 5, 6], "models_factori": 1, "modul": [1, 2, 3, 4, 5, 6], "path_config": 1, "version": 1, "content": [1, 2, 3, 4, 5, 6], "data_col": 2, "sequence_dataset": 2, "sequence_epbd_dataset": 2, "sequence_epbd_multimodal_dataset": 2, "sequence_epbd_multimodal_labelspecific_dataset": 2, "sequence_randepbd_dataset": 2, "sequence_randepbd_multimodal_dataset": 2, "dnabert2_classifi": 3, "config": [3, 4, 5], "train_lightn": [3, 4, 5], "dnabert2_epbd": 4, "dnabert2_epbd_crossattn": 5, "util": 6, "data_util": 6, "dnabert2": 6, "pickle_util": 6}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 60}, "alltitles": {"Welcome to EPBD-BERT\u2019s documentation!": [[0, "welcome-to-epbd-bert-s-documentation"]], "Resources": [[0, "resources"]], "Installation": [[0, "installation"]], "Data Preprocessing Steps": [[0, "data-preprocessing-steps"]], "Preprocessed dataset loading": [[0, "preprocessed-dataset-loading"]], "Training and testing the developed models": [[0, "training-and-testing-the-developed-models"]], "Acknowledgements": [[0, "acknowledgements"]], "Copyright notice": [[0, "copyright-notice"]], "License": [[0, "license"]], "Documentation": [[0, "id1"]], "Indices and tables": [[0, "indices-and-tables"]], "Authors": [[0, "authors"]], "How to cite EPBD-BERT?": [[0, "how-to-cite-epbd-bert"]], "epbd_bert package": [[1, "epbd-bert-package"]], "Subpackages": [[1, "subpackages"]], "Submodules": [[1, "submodules"], [2, "submodules"], [3, "submodules"], [4, "submodules"], [5, "submodules"], [6, "submodules"]], "epbd_bert.models_factory module": [[1, "module-epbd_bert.models_factory"]], "epbd_bert.path_configs module": [[1, "module-epbd_bert.path_configs"]], "epbd_bert.version module": [[1, "module-epbd_bert.version"]], "Module contents": [[1, "module-epbd_bert"], [2, "module-epbd_bert.datasets"], [3, "module-epbd_bert.dnabert2_classifier"], [4, "module-epbd_bert.dnabert2_epbd"], [5, "module-epbd_bert.dnabert2_epbd_crossattn"], [6, "module-epbd_bert.utility"]], "epbd_bert.datasets package": [[2, "epbd-bert-datasets-package"]], "epbd_bert.datasets.data_collators module": [[2, "module-epbd_bert.datasets.data_collators"]], "epbd_bert.datasets.sequence_dataset module": [[2, "module-epbd_bert.datasets.sequence_dataset"]], "epbd_bert.datasets.sequence_epbd_dataset module": [[2, "module-epbd_bert.datasets.sequence_epbd_dataset"]], "epbd_bert.datasets.sequence_epbd_multimodal_dataset module": [[2, "module-epbd_bert.datasets.sequence_epbd_multimodal_dataset"]], "epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset module": [[2, "module-epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset"]], "epbd_bert.datasets.sequence_randepbd_dataset module": [[2, "module-epbd_bert.datasets.sequence_randepbd_dataset"]], "epbd_bert.datasets.sequence_randepbd_multimodal_dataset module": [[2, "module-epbd_bert.datasets.sequence_randepbd_multimodal_dataset"]], "epbd_bert.dnabert2_classifier package": [[3, "epbd-bert-dnabert2-classifier-package"]], "epbd_bert.dnabert2_classifier.configs module": [[3, "module-epbd_bert.dnabert2_classifier.configs"]], "epbd_bert.dnabert2_classifier.model module": [[3, "module-epbd_bert.dnabert2_classifier.model"]], "epbd_bert.dnabert2_classifier.test module": [[3, "module-epbd_bert.dnabert2_classifier.test"]], "epbd_bert.dnabert2_classifier.train_lightning module": [[3, "module-epbd_bert.dnabert2_classifier.train_lightning"]], "epbd_bert.dnabert2_epbd package": [[4, "epbd-bert-dnabert2-epbd-package"]], "epbd_bert.dnabert2_epbd.configs module": [[4, "module-epbd_bert.dnabert2_epbd.configs"]], "epbd_bert.dnabert2_epbd.model module": [[4, "module-epbd_bert.dnabert2_epbd.model"]], "epbd_bert.dnabert2_epbd.test module": [[4, "module-epbd_bert.dnabert2_epbd.test"]], "epbd_bert.dnabert2_epbd.train_lightning module": [[4, "module-epbd_bert.dnabert2_epbd.train_lightning"]], "epbd_bert.dnabert2_epbd_crossattn package": [[5, "epbd-bert-dnabert2-epbd-crossattn-package"]], "epbd_bert.dnabert2_epbd_crossattn.configs module": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.configs"]], "epbd_bert.dnabert2_epbd_crossattn.model module": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.model"]], "epbd_bert.dnabert2_epbd_crossattn.test module": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.test"]], "epbd_bert.dnabert2_epbd_crossattn.train_lightning module": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.train_lightning"]], "epbd_bert.utility package": [[6, "epbd-bert-utility-package"]], "epbd_bert.utility.data_utils module": [[6, "module-epbd_bert.utility.data_utils"]], "epbd_bert.utility.dnabert2 module": [[6, "module-epbd_bert.utility.dnabert2"]], "epbd_bert.utility.pickle_utils module": [[6, "module-epbd_bert.utility.pickle_utils"]], "epbd_bert": [[7, "epbd-bert"]]}, "indexentries": {"epbd_bert": [[1, "module-epbd_bert"]], "epbd_bert.models_factory": [[1, "module-epbd_bert.models_factory"]], "epbd_bert.path_configs": [[1, "module-epbd_bert.path_configs"]], "epbd_bert.version": [[1, "module-epbd_bert.version"]], "get_model_and_dataloader() (in module epbd_bert.models_factory)": [[1, "epbd_bert.models_factory.get_model_and_dataloader"]], "module": [[1, "module-epbd_bert"], [1, "module-epbd_bert.models_factory"], [1, "module-epbd_bert.path_configs"], [1, "module-epbd_bert.version"], [2, "module-epbd_bert.datasets"], [2, "module-epbd_bert.datasets.data_collators"], [2, "module-epbd_bert.datasets.sequence_dataset"], [2, "module-epbd_bert.datasets.sequence_epbd_dataset"], [2, "module-epbd_bert.datasets.sequence_epbd_multimodal_dataset"], [2, "module-epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset"], [2, "module-epbd_bert.datasets.sequence_randepbd_dataset"], [2, "module-epbd_bert.datasets.sequence_randepbd_multimodal_dataset"], [3, "module-epbd_bert.dnabert2_classifier"], [3, "module-epbd_bert.dnabert2_classifier.configs"], [3, "module-epbd_bert.dnabert2_classifier.model"], [3, "module-epbd_bert.dnabert2_classifier.test"], [3, "module-epbd_bert.dnabert2_classifier.train_lightning"], [4, "module-epbd_bert.dnabert2_epbd"], [4, "module-epbd_bert.dnabert2_epbd.configs"], [4, "module-epbd_bert.dnabert2_epbd.model"], [4, "module-epbd_bert.dnabert2_epbd.test"], [4, "module-epbd_bert.dnabert2_epbd.train_lightning"], [5, "module-epbd_bert.dnabert2_epbd_crossattn"], [5, "module-epbd_bert.dnabert2_epbd_crossattn.configs"], [5, "module-epbd_bert.dnabert2_epbd_crossattn.model"], [5, "module-epbd_bert.dnabert2_epbd_crossattn.test"], [5, "module-epbd_bert.dnabert2_epbd_crossattn.train_lightning"], [6, "module-epbd_bert.utility"], [6, "module-epbd_bert.utility.data_utils"], [6, "module-epbd_bert.utility.dnabert2"], [6, "module-epbd_bert.utility.pickle_utils"]], "seqlabeldatacollator (class in epbd_bert.datasets.data_collators)": [[2, "epbd_bert.datasets.data_collators.SeqLabelDataCollator"]], "seqlabelepbddatacollator (class in epbd_bert.datasets.data_collators)": [[2, "epbd_bert.datasets.data_collators.SeqLabelEPBDDataCollator"]], "sequencedataset (class in epbd_bert.datasets.sequence_dataset)": [[2, "epbd_bert.datasets.sequence_dataset.SequenceDataset"]], "sequenceepbddataset (class in epbd_bert.datasets.sequence_epbd_dataset)": [[2, "epbd_bert.datasets.sequence_epbd_dataset.SequenceEPBDDataset"]], "sequenceepbdmultimodaldataset (class in epbd_bert.datasets.sequence_epbd_multimodal_dataset)": [[2, "epbd_bert.datasets.sequence_epbd_multimodal_dataset.SequenceEPBDMultiModalDataset"]], "sequenceepbdmultimodallabelspecificdataset (class in epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset)": [[2, "epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset.SequenceEPBDMultiModalLabelSpecificDataset"]], "sequencerandepbddataset (class in epbd_bert.datasets.sequence_randepbd_dataset)": [[2, "epbd_bert.datasets.sequence_randepbd_dataset.SequenceRandEPBDDataset"]], "sequencerandepbdmultimodaldataset (class in epbd_bert.datasets.sequence_randepbd_multimodal_dataset)": [[2, "epbd_bert.datasets.sequence_randepbd_multimodal_dataset.SequenceRandEPBDMultiModalDataset"]], "epbd_bert.datasets": [[2, "module-epbd_bert.datasets"]], "epbd_bert.datasets.data_collators": [[2, "module-epbd_bert.datasets.data_collators"]], "epbd_bert.datasets.sequence_dataset": [[2, "module-epbd_bert.datasets.sequence_dataset"]], "epbd_bert.datasets.sequence_epbd_dataset": [[2, "module-epbd_bert.datasets.sequence_epbd_dataset"]], "epbd_bert.datasets.sequence_epbd_multimodal_dataset": [[2, "module-epbd_bert.datasets.sequence_epbd_multimodal_dataset"]], "epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset": [[2, "module-epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset"]], "epbd_bert.datasets.sequence_randepbd_dataset": [[2, "module-epbd_bert.datasets.sequence_randepbd_dataset"]], "epbd_bert.datasets.sequence_randepbd_multimodal_dataset": [[2, "module-epbd_bert.datasets.sequence_randepbd_multimodal_dataset"]], "configs (class in epbd_bert.dnabert2_classifier.configs)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs"]], "dnabert2classifier (class in epbd_bert.dnabert2_classifier.model)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier"]], "batch_size (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.batch_size"]], "best_model_monitor (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.best_model_monitor"]], "best_model_monitor_mode (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.best_model_monitor_mode"]], "calculate_loss() (epbd_bert.dnabert2_classifier.model.dnabert2classifier method)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier.calculate_loss"]], "configure_optimizers() (epbd_bert.dnabert2_classifier.model.dnabert2classifier method)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier.configure_optimizers"]], "epbd_bert.dnabert2_classifier": [[3, "module-epbd_bert.dnabert2_classifier"]], "epbd_bert.dnabert2_classifier.configs": [[3, "module-epbd_bert.dnabert2_classifier.configs"]], "epbd_bert.dnabert2_classifier.model": [[3, "module-epbd_bert.dnabert2_classifier.model"]], "epbd_bert.dnabert2_classifier.test": [[3, "module-epbd_bert.dnabert2_classifier.test"]], "epbd_bert.dnabert2_classifier.train_lightning": [[3, "module-epbd_bert.dnabert2_classifier.train_lightning"]], "forward() (epbd_bert.dnabert2_classifier.model.dnabert2classifier method)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier.forward"]], "learning_rate (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.learning_rate"]], "max_epochs (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.max_epochs"]], "n_classes (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.n_classes"]], "num_workers (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.num_workers"]], "on_validation_epoch_end() (epbd_bert.dnabert2_classifier.model.dnabert2classifier method)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier.on_validation_epoch_end"]], "training_step() (epbd_bert.dnabert2_classifier.model.dnabert2classifier method)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier.training_step"]], "validation_step() (epbd_bert.dnabert2_classifier.model.dnabert2classifier method)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier.validation_step"]], "weight_decay (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.weight_decay"]], "configs (class in epbd_bert.dnabert2_epbd.configs)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs"]], "dnabert2epbdmodel (class in epbd_bert.dnabert2_epbd.model)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel"]], "batch_size (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.batch_size"]], "best_model_monitor (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.best_model_monitor"]], "best_model_monitor_mode (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.best_model_monitor_mode"]], "calculate_loss() (epbd_bert.dnabert2_epbd.model.dnabert2epbdmodel method)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel.calculate_loss"]], "configure_optimizers() (epbd_bert.dnabert2_epbd.model.dnabert2epbdmodel method)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel.configure_optimizers"]], "epbd_bert.dnabert2_epbd": [[4, "module-epbd_bert.dnabert2_epbd"]], "epbd_bert.dnabert2_epbd.configs": [[4, "module-epbd_bert.dnabert2_epbd.configs"]], "epbd_bert.dnabert2_epbd.model": [[4, "module-epbd_bert.dnabert2_epbd.model"]], "epbd_bert.dnabert2_epbd.test": [[4, "module-epbd_bert.dnabert2_epbd.test"]], "epbd_bert.dnabert2_epbd.train_lightning": [[4, "module-epbd_bert.dnabert2_epbd.train_lightning"]], "epbd_feature_input_dim (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.epbd_feature_input_dim"]], "epbd_features_type (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.epbd_features_type"]], "forward() (epbd_bert.dnabert2_epbd.model.dnabert2epbdmodel method)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel.forward"]], "learning_rate (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.learning_rate"]], "max_epochs (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.max_epochs"]], "n_classes (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.n_classes"]], "num_workers (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.num_workers"]], "on_validation_epoch_end() (epbd_bert.dnabert2_epbd.model.dnabert2epbdmodel method)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel.on_validation_epoch_end"]], "training_step() (epbd_bert.dnabert2_epbd.model.dnabert2epbdmodel method)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel.training_step"]], "validation_step() (epbd_bert.dnabert2_epbd.model.dnabert2epbdmodel method)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel.validation_step"]], "weight_decay (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.weight_decay"]], "epbdconfigs (class in epbd_bert.dnabert2_epbd_crossattn.configs)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs"]], "epbddnabert2model (class in epbd_bert.dnabert2_epbd_crossattn.model)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model"]], "epbdembedder (class in epbd_bert.dnabert2_epbd_crossattn.model)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDEmbedder"]], "multimodallayer (class in epbd_bert.dnabert2_epbd_crossattn.model)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.MultiModalLayer"]], "poolinglayer (class in epbd_bert.dnabert2_epbd_crossattn.model)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.PoolingLayer"]], "positionwisefeedforward (class in epbd_bert.dnabert2_epbd_crossattn.model)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.PositionWiseFeedForward"]], "trainingconfigs (class in epbd_bert.dnabert2_epbd_crossattn.configs)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs"]], "batch_size (epbd_bert.dnabert2_epbd_crossattn.configs.trainingconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs.batch_size"]], "best_model_monitor (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.best_model_monitor"]], "best_model_monitor_mode (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.best_model_monitor_mode"]], "calculate_loss() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.calculate_loss"]], "compute_predictions() (in module epbd_bert.dnabert2_epbd_crossattn.test)": [[5, "epbd_bert.dnabert2_epbd_crossattn.test.compute_predictions"]], "configure_optimizers() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.configure_optimizers"]], "d_ff (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.d_ff"]], "d_model (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.d_model"]], "epbd_bert.dnabert2_epbd_crossattn": [[5, "module-epbd_bert.dnabert2_epbd_crossattn"]], "epbd_bert.dnabert2_epbd_crossattn.configs": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.configs"]], "epbd_bert.dnabert2_epbd_crossattn.model": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.model"]], "epbd_bert.dnabert2_epbd_crossattn.test": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.test"]], "epbd_bert.dnabert2_epbd_crossattn.train_lightning": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.train_lightning"]], "epbd_embedder_kernel_size (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.epbd_embedder_kernel_size"]], "epbd_feature_channels (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.epbd_feature_channels"]], "forward() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.forward"]], "forward() (epbd_bert.dnabert2_epbd_crossattn.model.epbdembedder method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDEmbedder.forward"]], "forward() (epbd_bert.dnabert2_epbd_crossattn.model.multimodallayer method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.MultiModalLayer.forward"]], "forward() (epbd_bert.dnabert2_epbd_crossattn.model.poolinglayer method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.PoolingLayer.forward"]], "forward() (epbd_bert.dnabert2_epbd_crossattn.model.positionwisefeedforward method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.PositionWiseFeedForward.forward"]], "learning_rate (epbd_bert.dnabert2_epbd_crossattn.configs.trainingconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs.learning_rate"]], "load_pretrained_model() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model class method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.load_pretrained_model"]], "max_epochs (epbd_bert.dnabert2_epbd_crossattn.configs.trainingconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs.max_epochs"]], "n_classes (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.n_classes"]], "need_weights (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.need_weights"]], "num_heads (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.num_heads"]], "num_workers (epbd_bert.dnabert2_epbd_crossattn.configs.trainingconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs.num_workers"]], "on_validation_epoch_end() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.on_validation_epoch_end"]], "p_dropout (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.p_dropout"]], "training_step() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.training_step"]], "validation_step() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.validation_step"]], "weight_decay (epbd_bert.dnabert2_epbd_crossattn.configs.trainingconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs.weight_decay"]], "compute_multi_class_weights() (in module epbd_bert.utility.data_utils)": [[6, "epbd_bert.utility.data_utils.compute_multi_class_weights"]], "epbd_bert.utility": [[6, "module-epbd_bert.utility"]], "epbd_bert.utility.data_utils": [[6, "module-epbd_bert.utility.data_utils"]], "epbd_bert.utility.dnabert2": [[6, "module-epbd_bert.utility.dnabert2"]], "epbd_bert.utility.pickle_utils": [[6, "module-epbd_bert.utility.pickle_utils"]], "get_dnabert2_pretrained_model() (in module epbd_bert.utility.dnabert2)": [[6, "epbd_bert.utility.dnabert2.get_dnabert2_pretrained_model"]], "get_dnabert2_tokenizer() (in module epbd_bert.utility.dnabert2)": [[6, "epbd_bert.utility.dnabert2.get_dnabert2_tokenizer"]], "get_uniform_peaks_metadata() (in module epbd_bert.utility.data_utils)": [[6, "epbd_bert.utility.data_utils.get_uniform_peaks_metadata"]], "load() (in module epbd_bert.utility.pickle_utils)": [[6, "epbd_bert.utility.pickle_utils.load"]], "load_dnabert2_for_classification() (in module epbd_bert.utility.dnabert2)": [[6, "epbd_bert.utility.dnabert2.load_dnabert2_for_classification"]], "save() (in module epbd_bert.utility.pickle_utils)": [[6, "epbd_bert.utility.pickle_utils.save"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["index", "modules/epbd_bert", "modules/epbd_bert.datasets", "modules/epbd_bert.dnabert2_classifier", "modules/epbd_bert.dnabert2_epbd", "modules/epbd_bert.dnabert2_epbd_crossattn", "modules/epbd_bert.utility", "modules/modules"], "filenames": ["index.rst", "modules/epbd_bert.rst", "modules/epbd_bert.datasets.rst", "modules/epbd_bert.dnabert2_classifier.rst", "modules/epbd_bert.dnabert2_epbd.rst", "modules/epbd_bert.dnabert2_epbd_crossattn.rst", "modules/epbd_bert.utility.rst", "modules/modules.rst"], "titles": ["Welcome to EPBD-BERT\u2019s documentation!", "epbd_bert package", "epbd_bert.datasets package", "epbd_bert.dnabert2_classifier package", "epbd_bert.dnabert2_epbd package", "epbd_bert.dnabert2_epbd_crossattn package", "epbd_bert.utility package", "epbd_bert"], "terms": {"thi": [0, 5], "repositori": 0, "correspond": 0, "articl": 0, "titl": 0, "advanc": 0, "transcript": 0, "factor": 0, "bind": 0, "site": 0, "predict": 0, "us": 0, "dna": 0, "breath": 0, "dynam": 0, "sequenc": 0, "transform": [0, 2], "via": 0, "cross": 0, "attent": 0, "figur": 0, "1": [0, 3, 4, 5], "overview": 0, "propos": 0, "epbdxbert": 0, "framework": 0, "understand": 0, "impact": 0, "genom": 0, "variant": 0, "gene": 0, "regul": 0, "remain": 0, "kei": 0, "area": 0, "research": 0, "implic": 0, "unravel": 0, "complex": 0, "mechan": 0, "underli": 0, "variou": 0, "function": [0, 5], "effect": 0, "softwar": 0, "delv": 0, "role": 0, "biophys": 0, "properti": 0, "includ": 0, "thermodynam": 0, "stabil": 0, "shape": 0, "flexibl": 0, "tf": 0, "In": 0, "librari": 0, "we": 0, "have": 0, "multi": [0, 2], "modal": [0, 2], "deep": 0, "learn": 0, "integr": 0, "chip": 0, "seq": [0, 2], "chromatin": 0, "immunoprecipit": 0, "vivo": 0, "involv": 0, "690": [0, 3, 4, 5], "event": 0, "human": 0, "our": 0, "significantli": 0, "improv": 0, "perform": [0, 5], "over": 0, "660": 0, "up": 0, "9": [0, 5], "6": 0, "increas": 0, "auroc": 0, "metric": 0, "compar": 0, "baselin": 0, "when": 0, "explicitli": 0, "further": 0, "expand": 0, "analysi": 0, "vitro": 0, "high": 0, "throughput": 0, "systemat": 0, "evolut": 0, "ligand": 0, "exponenti": 0, "enrich": 0, "selex": 0, "establish": 0, "The": 0, "inclus": 0, "featur": [0, 2], "consist": 0, "across": 0, "differ": 0, "cell": 0, "line": 0, "notabl": 0, "dnabert2": [0, 1, 7], "provid": 0, "greater": 0, "capabl": 0, "insight": 0, "diseas": 0, "relat": 0, "non": 0, "code": 0, "found": 0, "wide": 0, "associ": 0, "studi": 0, "work": 0, "highlight": 0, "import": 0, "characterist": 0, "paper": 0, "notebook": 0, "virtur": 0, "environ": 0, "git": 0, "clone": 0, "http": 0, "github": 0, "com": 0, "lanl": 0, "cd": 0, "conda": 0, "creat": 0, "c": 0, "forg": 0, "p": 0, "venv": 0, "epbd_bert_condavenv_test1": 0, "python": 0, "3": [0, 5], "11": [0, 5], "y": 0, "activ": 0, "setup": 0, "py": 0, "scikit": 0, "scipi": 0, "pip": 0, "uninstal": 0, "triton": 0, "did": 0, "util": [0, 1, 7], "hardwar": 0, "depend": 0, "To": 0, "deactiv": 0, "remov": 0, "name": 0, "all": [0, 5], "data_preprocess": 0, "directori": 0, "hold": 0, "gener": 0, "divid": 0, "modul": [0, 7], "bug": 0, "track": 0, "bedtool": 0, "readthedoc": 0, "io": 0, "en": 0, "latest": 0, "oper": 0, "follow": 0, "guid": 0, "content": [0, 7], "html": 0, "also": 0, "bare": 0, "minimum": 0, "script": 0, "download": 0, "pre": 0, "compil": 0, "binari": 0, "bash": 0, "setup_bedtool": 0, "sh": 0, "export": 0, "path": [0, 6], "pwd": 0, "assembli": 0, "grch37": 0, "hg19": 0, "uniform": 0, "tfb": 0, "0_download_data": 0, "narrowpeak": 0, "file": 0, "1_preprocess_narrowpeaks_and_humangenom": 0, "overlap": 0, "comput": [0, 5], "label": [0, 2], "2": 0, "1_compute_overlappings_job": 0, "br": 0, "2_compute_overlap": 0, "3_postprocess": 0, "can": 0, "from": [0, 2], "here": 0, "usag": 0, "epbd_bert": 0, "sequence_dataset": [0, 1, 7], "onli": 0, "sequence_epbd_dataset": [0, 1, 7], "flat": 0, "sequence_epbd_multimodal_dataset": [0, 1, 7], "matrix": 0, "note": 0, "There": 0, "ar": 0, "some": 0, "other": 0, "each": 0, "exampl": 0, "run": [0, 5], "instruct": 0, "bottom": 0, "finetun": 0, "dnabert2_classifi": [0, 1, 7], "train_lightn": [0, 1, 7], "valid": 0, "split": 0, "vanillaepbd": 0, "coordflip": 0, "dnabert2_epbd": [0, 1, 7], "epbdxdnabert": 0, "dnabert2_epbd_crossattn": [0, 1, 7], "detail": 0, "ablat": 0, "www": 0, "biorxiv": 0, "org": 0, "10": 0, "1101": 0, "2024": 0, "01": [0, 5], "16": 0, "575935v2": 0, "abstract": 0, "m": 0, "lo": 0, "alamo": 0, "nation": 0, "lab": 0, "t": 0, "triad": 0, "secur": 0, "llc": 0, "right": 0, "reserv": 0, "program": 0, "wa": 0, "produc": 0, "under": 0, "u": 0, "govern": 0, "contract": 0, "89233218cna000001": 0, "laboratori": 0, "which": 0, "i": 0, "depart": 0, "energi": 0, "nuclear": 0, "administr": 0, "grant": 0, "itself": 0, "act": 0, "its": 0, "behalf": 0, "nonexclus": 0, "paid": 0, "irrevoc": 0, "worldwid": 0, "materi": 0, "reproduc": 0, "prepar": 0, "deriv": 0, "distribut": 0, "copi": 0, "public": 0, "publicli": 0, "displai": 0, "permit": 0, "do": 0, "so": 0, "o": 0, "4717": 0, "open": 0, "sourc": [0, 1, 2, 3, 4, 5, 6], "bsd": 0, "redistribut": 0, "form": 0, "without": 0, "modif": 0, "condit": 0, "met": 0, "must": 0, "retain": 0, "abov": 0, "list": 0, "disclaim": 0, "neither": 0, "holder": 0, "nor": 0, "contributor": 0, "mai": 0, "endors": 0, "promot": 0, "product": 0, "specif": 0, "prior": 0, "written": 0, "permiss": 0, "BY": 0, "THE": 0, "AND": 0, "AS": 0, "ani": 0, "express": 0, "OR": 0, "impli": 0, "warranti": 0, "BUT": 0, "NOT": 0, "limit": 0, "TO": 0, "OF": 0, "merchant": 0, "fit": 0, "FOR": 0, "A": 0, "particular": 0, "purpos": 0, "IN": 0, "NO": 0, "shall": 0, "BE": 0, "liabl": 0, "direct": 0, "indirect": 0, "incident": 0, "special": 0, "exemplari": 0, "consequenti": 0, "damag": 0, "procur": 0, "substitut": 0, "good": 0, "servic": 0, "loss": 0, "profit": 0, "busi": 0, "interrupt": 0, "howev": 0, "caus": 0, "ON": 0, "theori": 0, "liabil": 0, "whether": 0, "strict": 0, "tort": 0, "neglig": 0, "otherwis": 0, "aris": 0, "wai": 0, "out": 0, "even": 0, "IF": 0, "advis": 0, "possibl": 0, "SUCH": 0, "packag": [0, 7], "subpackag": [0, 7], "submodul": [0, 7], "models_factori": [0, 7], "get_model_and_dataload": [0, 1, 7], "path_config": [0, 7], "version": [0, 7], "index": 0, "search": 0, "page": 0, "anowarul": 0, "kabir": 0, "sciec": 0, "georg": 0, "mason": 0, "univers": 0, "manish": 0, "bhattarai": 0, "theoret": 0, "divis": 0, "kim": 0, "rasmussen": 0, "amarda": 0, "shehu": 0, "alan": 0, "bishop": 0, "boian": 0, "alexandrov": 0, "anni": 0, "usheva": 0, "surgeri": 0, "rhode": 0, "island": 0, "hospit": 0, "brown": 0, "kabir2024advanc": 0, "r": 0, "journal": 0, "year": 0, "publish": 0, "cold": 0, "spring": 0, "harbor": 0, "doi": 0, "5281": 0, "zenodo": 0, "11130474": 0, "url": 0, "dataset": [1, 7], "data_col": [1, 7], "seqlabeldatacol": [1, 2], "seqlabelepbddatacol": [1, 2], "sequencedataset": [1, 2], "sequenceepbddataset": [1, 2], "sequenceepbdmultimodaldataset": [1, 2], "sequence_epbd_multimodal_labelspecific_dataset": [1, 7], "sequenceepbdmultimodallabelspecificdataset": [1, 2], "sequence_randepbd_dataset": [1, 7], "sequencerandepbddataset": [1, 2], "sequence_randepbd_multimodal_dataset": [1, 7], "sequencerandepbdmultimodaldataset": [1, 2], "config": [1, 7], "batch_siz": [1, 3, 4, 5], "best_model_monitor": [1, 3, 4, 5], "best_model_monitor_mod": [1, 3, 4, 5], "learning_r": [1, 3, 4, 5], "max_epoch": [1, 3, 4, 5], "n_class": [1, 3, 4, 5], "num_work": [1, 3, 4, 5], "weight_decai": [1, 3, 4, 5], "model": [1, 7], "dnabert2classifi": [1, 3], "calculate_loss": [1, 3, 4, 5], "configure_optim": [1, 3, 4, 5], "forward": [1, 3, 4, 5], "on_validation_epoch_end": [1, 3, 4, 5], "training_step": [1, 3, 4, 5], "validation_step": [1, 3, 4, 5], "test": [1, 7], "epbd_feature_input_dim": [1, 4], "epbd_features_typ": [1, 4], "dnabert2epbdmodel": [1, 4], "epbdconfig": [1, 5], "d_ff": [1, 5], "d_model": [1, 5], "epbd_embedder_kernel_s": [1, 5], "epbd_feature_channel": [1, 5], "need_weight": [1, 5], "num_head": [1, 5], "p_dropout": [1, 5], "trainingconfig": [1, 5], "epbddnabert2model": [1, 5], "load_pretrained_model": [1, 5], "epbdembedd": [1, 5], "multimodallay": [1, 5], "poolinglay": [1, 5], "positionwisefeedforward": [1, 5], "compute_predict": [1, 5], "data_util": [1, 7], "compute_multi_class_weight": [1, 6], "get_uniform_peaks_metadata": [1, 6], "get_dnabert2_pretrained_model": [1, 6], "get_dnabert2_token": [1, 6], "load_dnabert2_for_classif": [1, 6], "pickle_util": [1, 7], "load": [1, 6], "save": [1, 6], "model_nam": 1, "data_path": [1, 2], "token": [1, 2], "pretrainedtoken": [1, 2], "64": 1, "8": [1, 5], "class": [2, 3, 4, 5], "pad_token_id": 2, "0": [2, 3, 4, 5], "base": [2, 3, 4, 5], "object": [2, 3, 4, 5], "str": [2, 3, 4, 5], "home_dir": [2, 6], "supervis": 2, "fine": 2, "tune": 2, "pydnaepbd_features_path": 2, "epbd": 2, "wgencodeawgtfbsbroaddnd41ctcfunipk": 2, "int": [3, 4, 5], "170": [3, 4], "32": [3, 4, 5], "float": [3, 4, 5], "1e": [3, 4, 5], "05": [3, 4, 5], "100": [3, 4], "val_loss": [3, 4, 5], "min": [3, 4, 5], "lightningmodul": [3, 4, 5], "_summary_": [3, 4, 5], "paramet": [3, 4, 5], "_description_": [3, 4, 5], "logit": [3, 4, 5], "tensor": [3, 4, 5], "target": [3, 4, 5], "torch": [3, 4, 5], "return": [3, 4, 5], "type": [3, 4, 5], "_type_": [3, 4, 5], "input": [3, 4, 5], "batch": [3, 4, 5], "batch_idx": [3, 4, 5], "1200": 4, "lightn": 4, "5": 5, "256": 5, "768": 5, "bool": 5, "fals": 5, "classmethod": 5, "checkpoint_path": 5, "mode": 5, "eval": 5, "option": 5, "default": 5, "in_channel": 5, "kernel_s": 5, "initi": 5, "intern": 5, "state": 5, "share": 5, "both": 5, "nn": 5, "scriptmodul": 5, "x": 5, "defin": 5, "everi": 5, "call": 5, "should": 5, "overridden": 5, "subclass": 5, "although": 5, "recip": 5, "pass": 5, "need": 5, "within": 5, "one": 5, "instanc": 5, "afterward": 5, "instead": 5, "sinc": 5, "former": 5, "take": 5, "care": 5, "regist": 5, "hook": 5, "while": 5, "latter": 5, "silent": 5, "ignor": 5, "them": 5, "epbd_embed": 5, "seq_embed": 5, "key_padding_mask": 5, "none": 5, "dropout": 5, "dl": 5, "dataload": 5, "output_preds_path": 5, "compute_again": 5, "usr": 6, "project": 6, "pydna_epbd": 6, "tf_dna_bind": 6, "max_num_token": 6, "512": 6, "num_label": 6, "data": 6}, "objects": {"": [[1, 0, 0, "-", "epbd_bert"]], "epbd_bert": [[2, 0, 0, "-", "datasets"], [3, 0, 0, "-", "dnabert2_classifier"], [4, 0, 0, "-", "dnabert2_epbd"], [5, 0, 0, "-", "dnabert2_epbd_crossattn"], [1, 0, 0, "-", "models_factory"], [1, 0, 0, "-", "path_configs"], [6, 0, 0, "-", "utility"], [1, 0, 0, "-", "version"]], "epbd_bert.datasets": [[2, 0, 0, "-", "data_collators"], [2, 0, 0, "-", "sequence_dataset"], [2, 0, 0, "-", "sequence_epbd_dataset"], [2, 0, 0, "-", "sequence_epbd_multimodal_dataset"], [2, 0, 0, "-", "sequence_epbd_multimodal_labelspecific_dataset"], [2, 0, 0, "-", "sequence_randepbd_dataset"], [2, 0, 0, "-", "sequence_randepbd_multimodal_dataset"]], "epbd_bert.datasets.data_collators": [[2, 1, 1, "", "SeqLabelDataCollator"], [2, 1, 1, "", "SeqLabelEPBDDataCollator"]], "epbd_bert.datasets.sequence_dataset": [[2, 1, 1, "", "SequenceDataset"]], "epbd_bert.datasets.sequence_epbd_dataset": [[2, 1, 1, "", "SequenceEPBDDataset"]], "epbd_bert.datasets.sequence_epbd_multimodal_dataset": [[2, 1, 1, "", "SequenceEPBDMultiModalDataset"]], "epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset": [[2, 1, 1, "", "SequenceEPBDMultiModalLabelSpecificDataset"]], "epbd_bert.datasets.sequence_randepbd_dataset": [[2, 1, 1, "", "SequenceRandEPBDDataset"]], "epbd_bert.datasets.sequence_randepbd_multimodal_dataset": [[2, 1, 1, "", "SequenceRandEPBDMultiModalDataset"]], "epbd_bert.dnabert2_classifier": [[3, 0, 0, "-", "configs"], [3, 0, 0, "-", "model"], [3, 0, 0, "-", "test"], [3, 0, 0, "-", "train_lightning"]], "epbd_bert.dnabert2_classifier.configs": [[3, 1, 1, "", "Configs"]], "epbd_bert.dnabert2_classifier.configs.Configs": [[3, 2, 1, "", "batch_size"], [3, 2, 1, "", "best_model_monitor"], [3, 2, 1, "", "best_model_monitor_mode"], [3, 2, 1, "", "learning_rate"], [3, 2, 1, "", "max_epochs"], [3, 2, 1, "", "n_classes"], [3, 2, 1, "", "num_workers"], [3, 2, 1, "", "weight_decay"]], "epbd_bert.dnabert2_classifier.model": [[3, 1, 1, "", "DNABERT2Classifier"]], "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier": [[3, 3, 1, "", "calculate_loss"], [3, 3, 1, "", "configure_optimizers"], [3, 3, 1, "", "forward"], [3, 3, 1, "", "on_validation_epoch_end"], [3, 3, 1, "", "training_step"], [3, 3, 1, "", "validation_step"]], "epbd_bert.dnabert2_epbd": [[4, 0, 0, "-", "configs"], [4, 0, 0, "-", "model"], [4, 0, 0, "-", "test"], [4, 0, 0, "-", "train_lightning"]], "epbd_bert.dnabert2_epbd.configs": [[4, 1, 1, "", "Configs"]], "epbd_bert.dnabert2_epbd.configs.Configs": [[4, 2, 1, "", "batch_size"], [4, 2, 1, "", "best_model_monitor"], [4, 2, 1, "", "best_model_monitor_mode"], [4, 2, 1, "", "epbd_feature_input_dim"], [4, 2, 1, "", "epbd_features_type"], [4, 2, 1, "", "learning_rate"], [4, 2, 1, "", "max_epochs"], [4, 2, 1, "", "n_classes"], [4, 2, 1, "", "num_workers"], [4, 2, 1, "", "weight_decay"]], "epbd_bert.dnabert2_epbd.model": [[4, 1, 1, "", "Dnabert2EPBDModel"]], "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel": [[4, 3, 1, "", "calculate_loss"], [4, 3, 1, "", "configure_optimizers"], [4, 3, 1, "", "forward"], [4, 3, 1, "", "on_validation_epoch_end"], [4, 3, 1, "", "training_step"], [4, 3, 1, "", "validation_step"]], "epbd_bert.dnabert2_epbd_crossattn": [[5, 0, 0, "-", "configs"], [5, 0, 0, "-", "model"], [5, 0, 0, "-", "test"], [5, 0, 0, "-", "train_lightning"]], "epbd_bert.dnabert2_epbd_crossattn.configs": [[5, 1, 1, "", "EPBDConfigs"], [5, 1, 1, "", "TrainingConfigs"]], "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs": [[5, 2, 1, "", "best_model_monitor"], [5, 2, 1, "", "best_model_monitor_mode"], [5, 2, 1, "", "d_ff"], [5, 2, 1, "", "d_model"], [5, 2, 1, "", "epbd_embedder_kernel_size"], [5, 2, 1, "", "epbd_feature_channels"], [5, 2, 1, "", "n_classes"], [5, 2, 1, "", "need_weights"], [5, 2, 1, "", "num_heads"], [5, 2, 1, "", "p_dropout"]], "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs": [[5, 2, 1, "", "batch_size"], [5, 2, 1, "", "learning_rate"], [5, 2, 1, "", "max_epochs"], [5, 2, 1, "", "num_workers"], [5, 2, 1, "", "weight_decay"]], "epbd_bert.dnabert2_epbd_crossattn.model": [[5, 1, 1, "", "EPBDDnabert2Model"], [5, 1, 1, "", "EPBDEmbedder"], [5, 1, 1, "", "MultiModalLayer"], [5, 1, 1, "", "PoolingLayer"], [5, 1, 1, "", "PositionWiseFeedForward"]], "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model": [[5, 3, 1, "", "calculate_loss"], [5, 3, 1, "", "configure_optimizers"], [5, 3, 1, "", "forward"], [5, 3, 1, "", "load_pretrained_model"], [5, 3, 1, "", "on_validation_epoch_end"], [5, 3, 1, "", "training_step"], [5, 3, 1, "", "validation_step"]], "epbd_bert.dnabert2_epbd_crossattn.model.EPBDEmbedder": [[5, 3, 1, "", "forward"]], "epbd_bert.dnabert2_epbd_crossattn.model.MultiModalLayer": [[5, 3, 1, "", "forward"]], "epbd_bert.dnabert2_epbd_crossattn.model.PoolingLayer": [[5, 3, 1, "", "forward"]], "epbd_bert.dnabert2_epbd_crossattn.model.PositionWiseFeedForward": [[5, 3, 1, "", "forward"]], "epbd_bert.dnabert2_epbd_crossattn.test": [[5, 4, 1, "", "compute_predictions"]], "epbd_bert.models_factory": [[1, 4, 1, "", "get_model_and_dataloader"]], "epbd_bert.utility": [[6, 0, 0, "-", "data_utils"], [6, 0, 0, "-", "dnabert2"], [6, 0, 0, "-", "pickle_utils"]], "epbd_bert.utility.data_utils": [[6, 4, 1, "", "compute_multi_class_weights"], [6, 4, 1, "", "get_uniform_peaks_metadata"]], "epbd_bert.utility.dnabert2": [[6, 4, 1, "", "get_dnabert2_pretrained_model"], [6, 4, 1, "", "get_dnabert2_tokenizer"], [6, 4, 1, "", "load_dnabert2_for_classification"]], "epbd_bert.utility.pickle_utils": [[6, 4, 1, "", "load"], [6, 4, 1, "", "save"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:method", "4": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "method", "Python method"], "4": ["py", "function", "Python function"]}, "titleterms": {"welcom": 0, "epbd": 0, "bert": 0, "": 0, "document": 0, "resourc": 0, "instal": 0, "data": 0, "preprocess": 0, "step": 0, "dataset": [0, 2], "load": 0, "train": 0, "test": [0, 3, 4, 5], "develop": 0, "model": [0, 3, 4, 5], "acknowledg": 0, "copyright": 0, "notic": 0, "licens": 0, "indic": 0, "tabl": 0, "author": 0, "how": 0, "cite": 0, "epbd_bert": [1, 2, 3, 4, 5, 6, 7], "packag": [1, 2, 3, 4, 5, 6], "subpackag": 1, "submodul": [1, 2, 3, 4, 5, 6], "models_factori": 1, "modul": [1, 2, 3, 4, 5, 6], "path_config": 1, "version": 1, "content": [1, 2, 3, 4, 5, 6], "data_col": 2, "sequence_dataset": 2, "sequence_epbd_dataset": 2, "sequence_epbd_multimodal_dataset": 2, "sequence_epbd_multimodal_labelspecific_dataset": 2, "sequence_randepbd_dataset": 2, "sequence_randepbd_multimodal_dataset": 2, "dnabert2_classifi": 3, "config": [3, 4, 5], "train_lightn": [3, 4, 5], "dnabert2_epbd": 4, "dnabert2_epbd_crossattn": 5, "util": 6, "data_util": 6, "dnabert2": 6, "pickle_util": 6}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 60}, "alltitles": {"Welcome to EPBD-BERT\u2019s documentation!": [[0, "welcome-to-epbd-bert-s-documentation"]], "Resources": [[0, "resources"]], "Installation": [[0, "installation"]], "Data Preprocessing Steps": [[0, "data-preprocessing-steps"]], "Preprocessed dataset loading": [[0, "preprocessed-dataset-loading"]], "Training and testing the developed models": [[0, "training-and-testing-the-developed-models"]], "Acknowledgements": [[0, "acknowledgements"]], "Copyright notice": [[0, "copyright-notice"]], "License": [[0, "license"]], "Documentation": [[0, "id1"]], "Indices and tables": [[0, "indices-and-tables"]], "Authors": [[0, "authors"]], "How to cite EPBD-BERT?": [[0, "how-to-cite-epbd-bert"]], "epbd_bert package": [[1, "epbd-bert-package"]], "Subpackages": [[1, "subpackages"]], "Submodules": [[1, "submodules"], [2, "submodules"], [3, "submodules"], [4, "submodules"], [5, "submodules"], [6, "submodules"]], "epbd_bert.models_factory module": [[1, "module-epbd_bert.models_factory"]], "epbd_bert.path_configs module": [[1, "module-epbd_bert.path_configs"]], "epbd_bert.version module": [[1, "module-epbd_bert.version"]], "Module contents": [[1, "module-epbd_bert"], [2, "module-epbd_bert.datasets"], [3, "module-epbd_bert.dnabert2_classifier"], [4, "module-epbd_bert.dnabert2_epbd"], [5, "module-epbd_bert.dnabert2_epbd_crossattn"], [6, "module-epbd_bert.utility"]], "epbd_bert.datasets package": [[2, "epbd-bert-datasets-package"]], "epbd_bert.datasets.data_collators module": [[2, "module-epbd_bert.datasets.data_collators"]], "epbd_bert.datasets.sequence_dataset module": [[2, "module-epbd_bert.datasets.sequence_dataset"]], "epbd_bert.datasets.sequence_epbd_dataset module": [[2, "module-epbd_bert.datasets.sequence_epbd_dataset"]], "epbd_bert.datasets.sequence_epbd_multimodal_dataset module": [[2, "module-epbd_bert.datasets.sequence_epbd_multimodal_dataset"]], "epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset module": [[2, "module-epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset"]], "epbd_bert.datasets.sequence_randepbd_dataset module": [[2, "module-epbd_bert.datasets.sequence_randepbd_dataset"]], "epbd_bert.datasets.sequence_randepbd_multimodal_dataset module": [[2, "module-epbd_bert.datasets.sequence_randepbd_multimodal_dataset"]], "epbd_bert.dnabert2_classifier package": [[3, "epbd-bert-dnabert2-classifier-package"]], "epbd_bert.dnabert2_classifier.configs module": [[3, "module-epbd_bert.dnabert2_classifier.configs"]], "epbd_bert.dnabert2_classifier.model module": [[3, "module-epbd_bert.dnabert2_classifier.model"]], "epbd_bert.dnabert2_classifier.test module": [[3, "module-epbd_bert.dnabert2_classifier.test"]], "epbd_bert.dnabert2_classifier.train_lightning module": [[3, "module-epbd_bert.dnabert2_classifier.train_lightning"]], "epbd_bert.dnabert2_epbd package": [[4, "epbd-bert-dnabert2-epbd-package"]], "epbd_bert.dnabert2_epbd.configs module": [[4, "module-epbd_bert.dnabert2_epbd.configs"]], "epbd_bert.dnabert2_epbd.model module": [[4, "module-epbd_bert.dnabert2_epbd.model"]], "epbd_bert.dnabert2_epbd.test module": [[4, "module-epbd_bert.dnabert2_epbd.test"]], "epbd_bert.dnabert2_epbd.train_lightning module": [[4, "module-epbd_bert.dnabert2_epbd.train_lightning"]], "epbd_bert.dnabert2_epbd_crossattn package": [[5, "epbd-bert-dnabert2-epbd-crossattn-package"]], "epbd_bert.dnabert2_epbd_crossattn.configs module": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.configs"]], "epbd_bert.dnabert2_epbd_crossattn.model module": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.model"]], "epbd_bert.dnabert2_epbd_crossattn.test module": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.test"]], "epbd_bert.dnabert2_epbd_crossattn.train_lightning module": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.train_lightning"]], "epbd_bert.utility package": [[6, "epbd-bert-utility-package"]], "epbd_bert.utility.data_utils module": [[6, "module-epbd_bert.utility.data_utils"]], "epbd_bert.utility.dnabert2 module": [[6, "module-epbd_bert.utility.dnabert2"]], "epbd_bert.utility.pickle_utils module": [[6, "module-epbd_bert.utility.pickle_utils"]], "epbd_bert": [[7, "epbd-bert"]]}, "indexentries": {"epbd_bert": [[1, "module-epbd_bert"]], "epbd_bert.models_factory": [[1, "module-epbd_bert.models_factory"]], "epbd_bert.path_configs": [[1, "module-epbd_bert.path_configs"]], "epbd_bert.version": [[1, "module-epbd_bert.version"]], "get_model_and_dataloader() (in module epbd_bert.models_factory)": [[1, "epbd_bert.models_factory.get_model_and_dataloader"]], "module": [[1, "module-epbd_bert"], [1, "module-epbd_bert.models_factory"], [1, "module-epbd_bert.path_configs"], [1, "module-epbd_bert.version"], [2, "module-epbd_bert.datasets"], [2, "module-epbd_bert.datasets.data_collators"], [2, "module-epbd_bert.datasets.sequence_dataset"], [2, "module-epbd_bert.datasets.sequence_epbd_dataset"], [2, "module-epbd_bert.datasets.sequence_epbd_multimodal_dataset"], [2, "module-epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset"], [2, "module-epbd_bert.datasets.sequence_randepbd_dataset"], [2, "module-epbd_bert.datasets.sequence_randepbd_multimodal_dataset"], [3, "module-epbd_bert.dnabert2_classifier"], [3, "module-epbd_bert.dnabert2_classifier.configs"], [3, "module-epbd_bert.dnabert2_classifier.model"], [3, "module-epbd_bert.dnabert2_classifier.test"], [3, "module-epbd_bert.dnabert2_classifier.train_lightning"], [4, "module-epbd_bert.dnabert2_epbd"], [4, "module-epbd_bert.dnabert2_epbd.configs"], [4, "module-epbd_bert.dnabert2_epbd.model"], [4, "module-epbd_bert.dnabert2_epbd.test"], [4, "module-epbd_bert.dnabert2_epbd.train_lightning"], [5, "module-epbd_bert.dnabert2_epbd_crossattn"], [5, "module-epbd_bert.dnabert2_epbd_crossattn.configs"], [5, "module-epbd_bert.dnabert2_epbd_crossattn.model"], [5, "module-epbd_bert.dnabert2_epbd_crossattn.test"], [5, "module-epbd_bert.dnabert2_epbd_crossattn.train_lightning"], [6, "module-epbd_bert.utility"], [6, "module-epbd_bert.utility.data_utils"], [6, "module-epbd_bert.utility.dnabert2"], [6, "module-epbd_bert.utility.pickle_utils"]], "seqlabeldatacollator (class in epbd_bert.datasets.data_collators)": [[2, "epbd_bert.datasets.data_collators.SeqLabelDataCollator"]], "seqlabelepbddatacollator (class in epbd_bert.datasets.data_collators)": [[2, "epbd_bert.datasets.data_collators.SeqLabelEPBDDataCollator"]], "sequencedataset (class in epbd_bert.datasets.sequence_dataset)": [[2, "epbd_bert.datasets.sequence_dataset.SequenceDataset"]], "sequenceepbddataset (class in epbd_bert.datasets.sequence_epbd_dataset)": [[2, "epbd_bert.datasets.sequence_epbd_dataset.SequenceEPBDDataset"]], "sequenceepbdmultimodaldataset (class in epbd_bert.datasets.sequence_epbd_multimodal_dataset)": [[2, "epbd_bert.datasets.sequence_epbd_multimodal_dataset.SequenceEPBDMultiModalDataset"]], "sequenceepbdmultimodallabelspecificdataset (class in epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset)": [[2, "epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset.SequenceEPBDMultiModalLabelSpecificDataset"]], "sequencerandepbddataset (class in epbd_bert.datasets.sequence_randepbd_dataset)": [[2, "epbd_bert.datasets.sequence_randepbd_dataset.SequenceRandEPBDDataset"]], "sequencerandepbdmultimodaldataset (class in epbd_bert.datasets.sequence_randepbd_multimodal_dataset)": [[2, "epbd_bert.datasets.sequence_randepbd_multimodal_dataset.SequenceRandEPBDMultiModalDataset"]], "epbd_bert.datasets": [[2, "module-epbd_bert.datasets"]], "epbd_bert.datasets.data_collators": [[2, "module-epbd_bert.datasets.data_collators"]], "epbd_bert.datasets.sequence_dataset": [[2, "module-epbd_bert.datasets.sequence_dataset"]], "epbd_bert.datasets.sequence_epbd_dataset": [[2, "module-epbd_bert.datasets.sequence_epbd_dataset"]], "epbd_bert.datasets.sequence_epbd_multimodal_dataset": [[2, "module-epbd_bert.datasets.sequence_epbd_multimodal_dataset"]], "epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset": [[2, "module-epbd_bert.datasets.sequence_epbd_multimodal_labelspecific_dataset"]], "epbd_bert.datasets.sequence_randepbd_dataset": [[2, "module-epbd_bert.datasets.sequence_randepbd_dataset"]], "epbd_bert.datasets.sequence_randepbd_multimodal_dataset": [[2, "module-epbd_bert.datasets.sequence_randepbd_multimodal_dataset"]], "configs (class in epbd_bert.dnabert2_classifier.configs)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs"]], "dnabert2classifier (class in epbd_bert.dnabert2_classifier.model)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier"]], "batch_size (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.batch_size"]], "best_model_monitor (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.best_model_monitor"]], "best_model_monitor_mode (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.best_model_monitor_mode"]], "calculate_loss() (epbd_bert.dnabert2_classifier.model.dnabert2classifier method)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier.calculate_loss"]], "configure_optimizers() (epbd_bert.dnabert2_classifier.model.dnabert2classifier method)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier.configure_optimizers"]], "epbd_bert.dnabert2_classifier": [[3, "module-epbd_bert.dnabert2_classifier"]], "epbd_bert.dnabert2_classifier.configs": [[3, "module-epbd_bert.dnabert2_classifier.configs"]], "epbd_bert.dnabert2_classifier.model": [[3, "module-epbd_bert.dnabert2_classifier.model"]], "epbd_bert.dnabert2_classifier.test": [[3, "module-epbd_bert.dnabert2_classifier.test"]], "epbd_bert.dnabert2_classifier.train_lightning": [[3, "module-epbd_bert.dnabert2_classifier.train_lightning"]], "forward() (epbd_bert.dnabert2_classifier.model.dnabert2classifier method)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier.forward"]], "learning_rate (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.learning_rate"]], "max_epochs (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.max_epochs"]], "n_classes (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.n_classes"]], "num_workers (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.num_workers"]], "on_validation_epoch_end() (epbd_bert.dnabert2_classifier.model.dnabert2classifier method)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier.on_validation_epoch_end"]], "training_step() (epbd_bert.dnabert2_classifier.model.dnabert2classifier method)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier.training_step"]], "validation_step() (epbd_bert.dnabert2_classifier.model.dnabert2classifier method)": [[3, "epbd_bert.dnabert2_classifier.model.DNABERT2Classifier.validation_step"]], "weight_decay (epbd_bert.dnabert2_classifier.configs.configs attribute)": [[3, "epbd_bert.dnabert2_classifier.configs.Configs.weight_decay"]], "configs (class in epbd_bert.dnabert2_epbd.configs)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs"]], "dnabert2epbdmodel (class in epbd_bert.dnabert2_epbd.model)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel"]], "batch_size (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.batch_size"]], "best_model_monitor (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.best_model_monitor"]], "best_model_monitor_mode (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.best_model_monitor_mode"]], "calculate_loss() (epbd_bert.dnabert2_epbd.model.dnabert2epbdmodel method)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel.calculate_loss"]], "configure_optimizers() (epbd_bert.dnabert2_epbd.model.dnabert2epbdmodel method)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel.configure_optimizers"]], "epbd_bert.dnabert2_epbd": [[4, "module-epbd_bert.dnabert2_epbd"]], "epbd_bert.dnabert2_epbd.configs": [[4, "module-epbd_bert.dnabert2_epbd.configs"]], "epbd_bert.dnabert2_epbd.model": [[4, "module-epbd_bert.dnabert2_epbd.model"]], "epbd_bert.dnabert2_epbd.test": [[4, "module-epbd_bert.dnabert2_epbd.test"]], "epbd_bert.dnabert2_epbd.train_lightning": [[4, "module-epbd_bert.dnabert2_epbd.train_lightning"]], "epbd_feature_input_dim (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.epbd_feature_input_dim"]], "epbd_features_type (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.epbd_features_type"]], "forward() (epbd_bert.dnabert2_epbd.model.dnabert2epbdmodel method)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel.forward"]], "learning_rate (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.learning_rate"]], "max_epochs (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.max_epochs"]], "n_classes (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.n_classes"]], "num_workers (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.num_workers"]], "on_validation_epoch_end() (epbd_bert.dnabert2_epbd.model.dnabert2epbdmodel method)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel.on_validation_epoch_end"]], "training_step() (epbd_bert.dnabert2_epbd.model.dnabert2epbdmodel method)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel.training_step"]], "validation_step() (epbd_bert.dnabert2_epbd.model.dnabert2epbdmodel method)": [[4, "epbd_bert.dnabert2_epbd.model.Dnabert2EPBDModel.validation_step"]], "weight_decay (epbd_bert.dnabert2_epbd.configs.configs attribute)": [[4, "epbd_bert.dnabert2_epbd.configs.Configs.weight_decay"]], "epbdconfigs (class in epbd_bert.dnabert2_epbd_crossattn.configs)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs"]], "epbddnabert2model (class in epbd_bert.dnabert2_epbd_crossattn.model)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model"]], "epbdembedder (class in epbd_bert.dnabert2_epbd_crossattn.model)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDEmbedder"]], "multimodallayer (class in epbd_bert.dnabert2_epbd_crossattn.model)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.MultiModalLayer"]], "poolinglayer (class in epbd_bert.dnabert2_epbd_crossattn.model)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.PoolingLayer"]], "positionwisefeedforward (class in epbd_bert.dnabert2_epbd_crossattn.model)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.PositionWiseFeedForward"]], "trainingconfigs (class in epbd_bert.dnabert2_epbd_crossattn.configs)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs"]], "batch_size (epbd_bert.dnabert2_epbd_crossattn.configs.trainingconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs.batch_size"]], "best_model_monitor (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.best_model_monitor"]], "best_model_monitor_mode (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.best_model_monitor_mode"]], "calculate_loss() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.calculate_loss"]], "compute_predictions() (in module epbd_bert.dnabert2_epbd_crossattn.test)": [[5, "epbd_bert.dnabert2_epbd_crossattn.test.compute_predictions"]], "configure_optimizers() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.configure_optimizers"]], "d_ff (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.d_ff"]], "d_model (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.d_model"]], "epbd_bert.dnabert2_epbd_crossattn": [[5, "module-epbd_bert.dnabert2_epbd_crossattn"]], "epbd_bert.dnabert2_epbd_crossattn.configs": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.configs"]], "epbd_bert.dnabert2_epbd_crossattn.model": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.model"]], "epbd_bert.dnabert2_epbd_crossattn.test": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.test"]], "epbd_bert.dnabert2_epbd_crossattn.train_lightning": [[5, "module-epbd_bert.dnabert2_epbd_crossattn.train_lightning"]], "epbd_embedder_kernel_size (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.epbd_embedder_kernel_size"]], "epbd_feature_channels (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.epbd_feature_channels"]], "forward() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.forward"]], "forward() (epbd_bert.dnabert2_epbd_crossattn.model.epbdembedder method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDEmbedder.forward"]], "forward() (epbd_bert.dnabert2_epbd_crossattn.model.multimodallayer method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.MultiModalLayer.forward"]], "forward() (epbd_bert.dnabert2_epbd_crossattn.model.poolinglayer method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.PoolingLayer.forward"]], "forward() (epbd_bert.dnabert2_epbd_crossattn.model.positionwisefeedforward method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.PositionWiseFeedForward.forward"]], "learning_rate (epbd_bert.dnabert2_epbd_crossattn.configs.trainingconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs.learning_rate"]], "load_pretrained_model() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model class method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.load_pretrained_model"]], "max_epochs (epbd_bert.dnabert2_epbd_crossattn.configs.trainingconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs.max_epochs"]], "n_classes (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.n_classes"]], "need_weights (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.need_weights"]], "num_heads (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.num_heads"]], "num_workers (epbd_bert.dnabert2_epbd_crossattn.configs.trainingconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs.num_workers"]], "on_validation_epoch_end() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.on_validation_epoch_end"]], "p_dropout (epbd_bert.dnabert2_epbd_crossattn.configs.epbdconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.EPBDConfigs.p_dropout"]], "training_step() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.training_step"]], "validation_step() (epbd_bert.dnabert2_epbd_crossattn.model.epbddnabert2model method)": [[5, "epbd_bert.dnabert2_epbd_crossattn.model.EPBDDnabert2Model.validation_step"]], "weight_decay (epbd_bert.dnabert2_epbd_crossattn.configs.trainingconfigs attribute)": [[5, "epbd_bert.dnabert2_epbd_crossattn.configs.TrainingConfigs.weight_decay"]], "compute_multi_class_weights() (in module epbd_bert.utility.data_utils)": [[6, "epbd_bert.utility.data_utils.compute_multi_class_weights"]], "epbd_bert.utility": [[6, "module-epbd_bert.utility"]], "epbd_bert.utility.data_utils": [[6, "module-epbd_bert.utility.data_utils"]], "epbd_bert.utility.dnabert2": [[6, "module-epbd_bert.utility.dnabert2"]], "epbd_bert.utility.pickle_utils": [[6, "module-epbd_bert.utility.pickle_utils"]], "get_dnabert2_pretrained_model() (in module epbd_bert.utility.dnabert2)": [[6, "epbd_bert.utility.dnabert2.get_dnabert2_pretrained_model"]], "get_dnabert2_tokenizer() (in module epbd_bert.utility.dnabert2)": [[6, "epbd_bert.utility.dnabert2.get_dnabert2_tokenizer"]], "get_uniform_peaks_metadata() (in module epbd_bert.utility.data_utils)": [[6, "epbd_bert.utility.data_utils.get_uniform_peaks_metadata"]], "load() (in module epbd_bert.utility.pickle_utils)": [[6, "epbd_bert.utility.pickle_utils.load"]], "load_dnabert2_for_classification() (in module epbd_bert.utility.dnabert2)": [[6, "epbd_bert.utility.dnabert2.load_dnabert2_for_classification"]], "save() (in module epbd_bert.utility.pickle_utils)": [[6, "epbd_bert.utility.pickle_utils.save"]]}}) \ No newline at end of file diff --git a/docs_builder/source/index.rst b/docs_builder/source/index.rst index 513c317..28e6287 100644 --- a/docs_builder/source/index.rst +++ b/docs_builder/source/index.rst @@ -75,10 +75,10 @@ The 'data_preprocessing' directory holds all the data generation steps and divid - Scripts * - Download human genome assembly (GRCh37/hg19) and `uniform TFBS `_ - ``0_download_data.py`` - * - - - - * - - - + * - Preprocess TFBS narrowpeak files and human genome + - ``1_preprocess_narrowPeaks_and_humanGenome.sh`` + * - Overlapping computation for label association + - ``2.1_compute_overlappings_job.sh``
``2.2_compute_overlappings.sh``
``3_postprocess.sh`` * - - * - @@ -89,8 +89,8 @@ The 'data_preprocessing' directory holds all the data generation steps and divid - .. | | `````` | -.. | Preprocess TFBS narrowpeak files and human genome | ```1_preprocess_narrowPeaks_and_humanGenome.sh``` | -.. | Overlapping computation for label association | ```2.1_compute_overlappings_job.sh```
```2.2_compute_overlappings.sh```
```3_postprocess.sh``` | +.. | | ````` | +.. | | | .. | Label association | ```5.1_extract_bins_containingOtherThanACGT.ipynb```
```5.2_compute_peaks_with_labels_clean.sh```| .. | Data preprocessing for DNA breathing dynamics generation and DNABERT2 | ```6.1_create_data_for_pydnaepbd.ipynb```
```6.2_create_data_for_dnabert2.ipynb``` | .. | Train/validation/test split| ```7_create_train_val_test_set.ipynb``` |