-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #77 from bigbio/spectrumAI
spectrumAI into py-pgatk
- Loading branch information
Showing
29 changed files
with
1,385 additions
and
120 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import logging | ||
|
||
import click | ||
|
||
from pypgatk.toolbox.general import read_yaml_from_file | ||
from pypgatk.commands.utils import print_help | ||
from pypgatk.proteogenomics.blast_get_position import BlastGetPositionService | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
@click.command('blast_get_position', short_help='Blast peptide and refence protein database to find variation sites.') | ||
@click.option('-c', '--config_file', help='Configuration file for the fdr peptides pipeline.') | ||
@click.option('-i', '--input_psm_to_blast', help='The file name of the input PSM table to blast.') | ||
@click.option('-o', '--output_psm', help='The file name of the output PSM table.') | ||
@click.option('-r', '--input_reference_database', help='The file name of the refence protein database to blast. The reference database includes Uniprot Proteomes with isoforms, ENSEMBL, RefSeq, etc.') | ||
@click.option('-n', '--number_of_processes', help='Used to specify the number of processes. Default is 40.') | ||
|
||
@click.pass_context | ||
def blast_get_position(ctx, config_file, input_psm_to_blast, output_psm, input_reference_database, number_of_processes): | ||
config_data = None | ||
if config_file is not None: | ||
config_data = read_yaml_from_file(config_file) | ||
|
||
if input_psm_to_blast is None or input_reference_database is None or output_psm is None: | ||
print_help() | ||
pipeline_arguments = {} | ||
if input_reference_database is not None: | ||
pipeline_arguments[BlastGetPositionService.CONFIG_INPUT_REFERENCE_DATABASE] = input_reference_database | ||
if number_of_processes is not None: | ||
pipeline_arguments[BlastGetPositionService.CONFIG_NUMBER_OF_PROCESSES] = number_of_processes | ||
|
||
blast_get_position_service = BlastGetPositionService(config_data, pipeline_arguments) | ||
blast_get_position_service.blast(input_psm_to_blast, output_psm) |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import logging | ||
|
||
import click | ||
|
||
from pypgatk.toolbox.general import read_yaml_from_file | ||
from pypgatk.commands.utils import print_help | ||
from pypgatk.proteogenomics.mztab_class_fdr import MzTabClassFdr | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
@click.command('mztab_class_fdr', short_help='Extract psms from mzTab for global-fdr and class-fdr filtering') | ||
@click.option('-c', '--config_file', help='Configuration file for the fdr peptides pipeline') | ||
@click.option('-i', '--input_mztab', help='The file name of the input mzTab') | ||
@click.option('-o', '--outfile_name', help='The file name of the psm table filtered by global-fdr and class-fdr') | ||
@click.option('-d', '--decoy_prefix', help='Default is "decoy"') | ||
@click.option('-gf', '--global_fdr_cutoff', help='PSM peptide global-fdr cutoff or threshold. Default is 0.01') | ||
@click.option('-cf', '--class_fdr_cutoff', help='PSM peptide class-fdr cutoff or threshold. Default is 0.01') | ||
@click.option('-g', '--peptide_groups_prefix', help="Peptide class " | ||
"groups e.g. \"{non_canonical:[altorf,pseudo,ncRNA];mutations:[COSMIC,cbiomut];variants:[var_mut,var_rs]}\"") | ||
@click.pass_context | ||
def mztab_class_fdr(ctx, config_file, input_mztab, outfile_name, decoy_prefix, global_fdr_cutoff, class_fdr_cutoff, peptide_groups_prefix): | ||
config_data = None | ||
if config_file is not None: | ||
config_data = read_yaml_from_file(config_file) | ||
|
||
if input_mztab is None or outfile_name is None: | ||
print_help() | ||
pipeline_arguments = {} | ||
if decoy_prefix is not None: | ||
pipeline_arguments[MzTabClassFdr.CONFIG_DECOY_PREFIX] = decoy_prefix | ||
if global_fdr_cutoff is not None: | ||
pipeline_arguments[MzTabClassFdr.CONFIG_GLOBAL_FDR_CUTOFF] = global_fdr_cutoff | ||
if class_fdr_cutoff is not None: | ||
pipeline_arguments[MzTabClassFdr.CONFIG_CLASS_FDR_CUTOFF] = class_fdr_cutoff | ||
if peptide_groups_prefix is not None: | ||
pipeline_arguments[MzTabClassFdr.CONFIG_PEPTIDE_GROUPS_PREFIX] = peptide_groups_prefix | ||
|
||
mzTab_class_fdr = MzTabClassFdr(config_data, pipeline_arguments) | ||
mzTab_class_fdr.form_mztab_class_fdr(input_mztab, outfile_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import logging | ||
|
||
import click | ||
|
||
from pypgatk.toolbox.general import read_yaml_from_file | ||
from pypgatk.proteogenomics.validate_peptides import ValidatePeptidesService | ||
from pypgatk.commands.utils import print_help | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
@click.command('validate_peptides', | ||
short_help='Command to inspect MS2 spectra of single-subsititution peptide identifications') | ||
@click.option('-c', '--config_file', help='Configuration file for the validate peptides pipeline') | ||
@click.option('-p', '--mzml_path', help='The mzml file path.You only need to use either mzml_path or mzml_files') | ||
@click.option('-f', '--mzml_files', | ||
help='The mzml files.Different files are separated by ",".You only need to use either mzml_path or mzml_files') | ||
@click.option('-i', '--infile_name', help='Variant peptide PSMs table') | ||
@click.option('-o', '--outfile_name', help='Output file for the results') | ||
@click.option('-ion', '--ions_tolerance', help='MS2 fragment ions mass accuracy') | ||
@click.option('-n', '--number_of_processes', help='Used to specify the number of processes. Default is 40.') | ||
@click.option('-r', '--relative', help='When using ppm as ions_tolerance (not Da), it needs to be turned on', | ||
is_flag=True) | ||
@click.option('-msgf', '--msgf', | ||
help='If it is the standard format of MSGF output, please turn on this switch, otherwise it defaults to mzTab format', | ||
is_flag=True) | ||
@click.pass_context | ||
def validate_peptides(ctx, config_file, mzml_path, mzml_files, infile_name, outfile_name, ions_tolerance, | ||
number_of_processes, relative, msgf): | ||
config_data = None | ||
if config_file is not None: | ||
config_data = read_yaml_from_file(config_file) | ||
|
||
validate_flag = bool(infile_name and (mzml_path or mzml_files) and outfile_name) | ||
if not validate_flag: | ||
print_help() | ||
|
||
pipeline_arguments = {} | ||
|
||
if mzml_path is not None: | ||
pipeline_arguments[ValidatePeptidesService.CONFIG_MZML_PATH] = mzml_path | ||
if mzml_files is not None: | ||
pipeline_arguments[ValidatePeptidesService.CONFIG_MZML_FILES] = mzml_files | ||
if ions_tolerance is not None: | ||
pipeline_arguments[ValidatePeptidesService.CONFIG_IONS_TOLERANCE] = ions_tolerance | ||
if number_of_processes is not None: | ||
pipeline_arguments[ValidatePeptidesService.CONFIG_NUMBER_OF_PROCESSES] = number_of_processes | ||
if relative is not None: | ||
pipeline_arguments[ValidatePeptidesService.CONFIG_RELATIVE] = relative | ||
if msgf is not None: | ||
pipeline_arguments[ValidatePeptidesService.CONFIG_MSGF] = msgf | ||
|
||
validate_peptides_service = ValidatePeptidesService(config_data, pipeline_arguments) | ||
if validate_flag: | ||
validate_peptides_service.validate(infile_name, outfile_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Oops, something went wrong.