Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(protein-prospector): Add TMT converter for protein prospector #97

Merged
merged 7 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Suggests:
knitr,
rmarkdown
Collate:
'clean_ProteinProspector.R'
'clean_Metamorpheus.R'
'clean_DIANN.R'
'clean_Philosopher.R'
Expand All @@ -53,6 +54,7 @@ Collate:
'converters_OpenSWATHtoMSstatsFormat.R'
'converters_PDtoMSstatsFormat.R'
'converters_ProgenesistoMSstatsFormat.R'
'converters_ProteinProspectortoMSstatsTMTFormat.R'
'converters_SkylinetoMSstatsFormat.R'
'converters_SpectronauttoMSstatsFormat.R'
'utils_MSstatsConvert.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export(OpenMStoMSstatsFormat)
export(OpenSWATHtoMSstatsFormat)
export(PDtoMSstatsFormat)
export(ProgenesistoMSstatsFormat)
export(ProteinProspectortoMSstatsTMTFormat)
export(SkylinetoMSstatsFormat)
export(SpectronauttoMSstatsFormat)
export(getDataType)
Expand Down
11 changes: 11 additions & 0 deletions R/MSstatsConvert_core_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ setClass("MSstatsFragPipeFiles", contains = "MSstatsInputFiles")
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsMetamorpheusFiles", contains = "MSstatsInputFiles")
#' MSstatsProteinProspectorFiles: class for ProteinProspector files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsProteinProspectorFiles", contains = "MSstatsInputFiles")


#' Get one of files contained in an instance of `MSstatsInputFiles` class.
Expand Down Expand Up @@ -282,6 +286,13 @@ setMethod("MSstatsClean", signature = "MSstatsDIANNFiles",
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsMetamorpheusFiles",
.cleanRawMetamorpheus)
#' Clean Protein Prospector files
#' @include clean_ProteinProspector.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawProteinProspector
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsProteinProspectorFiles",
.cleanRawProteinProspector)


#' Preprocess outputs from MS signal processing tools for analysis with MSstats
Expand Down
38 changes: 38 additions & 0 deletions R/clean_ProteinProspector.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#' Clean raw Protein Prospector data
#' @param msstats_object object that inherits from MSstatsInputFiles class.
#' @return data.table
#' @keywords internal
#' @noRd
.cleanRawProteinProspector = function(msstats_object) {
PSM = PeptideSequence = PrecursorCharge = NULL
protein_prospector_input = getInputFile(msstats_object, "input")
protein_prospector_input =
data.table::as.data.table(protein_prospector_input)
channels = .getChannelColumns(
colnames(protein_prospector_input), "Int")
req_cols = c('AccX', 'z', 'DBPeptide', 'Fraction', channels)
protein_prospector_input = protein_prospector_input[, req_cols, with = FALSE]
data.table::setnames(
protein_prospector_input,
c("AccX", "DBPeptide", "z", "Fraction"),
c("ProteinName", "PeptideSequence", "PrecursorCharge", "Run"),
skip_absent = TRUE)
protein_prospector_input[, PSM := paste(PeptideSequence, PrecursorCharge,
1:nrow(protein_prospector_input), sep = "_")]

protein_prospector_input = melt(protein_prospector_input,
measure.vars = channels,
id.vars = setdiff(
colnames(protein_prospector_input),
channels
),
variable.name = "Channel",
value.name = "Intensity"
mstaniak marked this conversation as resolved.
Show resolved Hide resolved
)
protein_prospector_input$Channel = .standardizeColnames(
protein_prospector_input$Channel
)

.logSuccess("ProteinProspector", "clean")
protein_prospector_input
}
59 changes: 59 additions & 0 deletions R/converters_ProteinProspectortoMSstatsTMTFormat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#' Generate MSstatsTMT required input format from Protein Prospector output
#'
#' @inheritParams .sharedParametersAmongConverters
#' @param input Input txt peptide report file from Protein Prospector with
#' "Keep Replicates", "Mods in Peptide", and "Protein Mods" options selected.
#' @param annotation data frame which contains column Run, Fraction,
#' TechRepMixture, Mixture, Channel, BioReplicate, Condition.
#'
#' @return data.frame of class "MSstatsTMT"
#'
#' @export
#'
#' @examples
#' input = system.file("tinytest/raw_data/ProteinProspector/Prospector_TotalTMT.txt",
#' package = "MSstatsConvert")
#' input = data.table::fread(input)
#' annot = system.file("tinytest/raw_data/ProteinProspector/Annotation.csv",
#' package = "MSstatsConvert")
#' annot = data.table::fread(annot)
#' output <- ProteinProspectortoMSstatsTMTFormat(input, annot)
#' head(output)
#'
ProteinProspectortoMSstatsTMTFormat = function(
input, annotation, useUniquePeptide = TRUE, removeFewMeasurements = TRUE,
removeProtein_with1Feature = FALSE, summaryforMultipleRows = sum,
use_log_file = TRUE, append = FALSE, verbose = TRUE,
log_file_path = NULL
) {
MSstatsConvert::MSstatsLogsSettings(use_log_file, append, verbose,
log_file_path,
base = "MSstatsTMT_converter_log_")

input = MSstatsConvert::MSstatsImport(list(input = input),
"MSstatsTMT", "ProteinProspector")
input = MSstatsConvert::MSstatsClean(input)
annotation = MSstatsConvert::MSstatsMakeAnnotation(input, annotation)

feature_columns = c("PeptideSequence", "PrecursorCharge")
input = MSstatsConvert::MSstatsPreprocess(
input,
annotation,
feature_columns,
remove_shared_peptides = useUniquePeptide,
remove_single_feature_proteins = removeProtein_with1Feature,
feature_cleaning = list(
remove_features_with_few_measurements = removeFewMeasurements,
summarize_multiple_psms = summaryforMultipleRows)
)
input = MSstatsConvert::MSstatsBalancedDesign(input, feature_columns,
fix_missing = "zero_to_na")
tonywu1999 marked this conversation as resolved.
Show resolved Hide resolved
data.table::setnames(input, "PrecursorCharge", "Charge", skip_absent = TRUE)

msg_final = paste("** Finished preprocessing. The dataset is ready",
"to be processed by the proteinSummarization function.")
getOption("MSstatsLog")("INFO", msg_final)
getOption("MSstatsMsg")("INFO", msg_final)
getOption("MSstatsLog")("INFO", "\n")
input
}
Binary file not shown.
73 changes: 73 additions & 0 deletions inst/tinytest/raw_data/ProteinProspector/Annotation.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
Run,Fraction,TechRepMixture,Channel,Condition,Mixture,BioReplicate
Z20180606-01_FTMSms2hcd,1,1,Int 126,Young,Mixture1,S1
Z20180606-02_FTMSms2hcd,2,1,Int 126,Young,Mixture1,S1
Z20180606-03_FTMSms2hcd,3,1,Int 126,Young,Mixture1,S1
Z20180606-04_FTMSms2hcd,4,1,Int 126,Young,Mixture1,S1
Z20180606-05_FTMSms2hcd,5,1,Int 126,Young,Mixture1,S1
Z20180606-06_FTMSms2hcd,6,1,Int 126,Young,Mixture1,S1
Z20180606-07_FTMSms2hcd,7,1,Int 126,Young,Mixture1,S1
Z20180606-08_FTMSms2hcd,8,1,Int 126,Young,Mixture1,S1
Z20180606-09_FTMSms2hcd,9,1,Int 126,Young,Mixture1,S1
Z20180606-10_FTMSms2hcd,10,1,Int 126,Young,Mixture1,S1
Z20180606-11_FTMSms2hcd,11,1,Int 126,Young,Mixture1,S1
Z20180606-12_FTMSms2hcd,12,1,Int 126,Young,Mixture1,S1
Z20180606-01_FTMSms2hcd,1,1,Int 127,Young,Mixture1,S2
Z20180606-02_FTMSms2hcd,2,1,Int 127,Young,Mixture1,S2
Z20180606-03_FTMSms2hcd,3,1,Int 127,Young,Mixture1,S2
Z20180606-04_FTMSms2hcd,4,1,Int 127,Young,Mixture1,S2
Z20180606-05_FTMSms2hcd,5,1,Int 127,Young,Mixture1,S2
Z20180606-06_FTMSms2hcd,6,1,Int 127,Young,Mixture1,S2
Z20180606-07_FTMSms2hcd,7,1,Int 127,Young,Mixture1,S2
Z20180606-08_FTMSms2hcd,8,1,Int 127,Young,Mixture1,S2
Z20180606-09_FTMSms2hcd,9,1,Int 127,Young,Mixture1,S2
Z20180606-10_FTMSms2hcd,10,1,Int 127,Young,Mixture1,S2
Z20180606-11_FTMSms2hcd,11,1,Int 127,Young,Mixture1,S2
Z20180606-12_FTMSms2hcd,12,1,Int 127,Young,Mixture1,S2
Z20180606-01_FTMSms2hcd,1,1,Int 128,Young,Mixture1,S3
Z20180606-02_FTMSms2hcd,2,1,Int 128,Young,Mixture1,S3
Z20180606-03_FTMSms2hcd,3,1,Int 128,Young,Mixture1,S3
Z20180606-04_FTMSms2hcd,4,1,Int 128,Young,Mixture1,S3
Z20180606-05_FTMSms2hcd,5,1,Int 128,Young,Mixture1,S3
Z20180606-06_FTMSms2hcd,6,1,Int 128,Young,Mixture1,S3
Z20180606-07_FTMSms2hcd,7,1,Int 128,Young,Mixture1,S3
Z20180606-08_FTMSms2hcd,8,1,Int 128,Young,Mixture1,S3
Z20180606-09_FTMSms2hcd,9,1,Int 128,Young,Mixture1,S3
Z20180606-10_FTMSms2hcd,10,1,Int 128,Young,Mixture1,S3
Z20180606-11_FTMSms2hcd,11,1,Int 128,Young,Mixture1,S3
Z20180606-12_FTMSms2hcd,12,1,Int 128,Young,Mixture1,S3
Z20180606-01_FTMSms2hcd,1,1,Int 129,Aged,Mixture1,S4
Z20180606-02_FTMSms2hcd,2,1,Int 129,Aged,Mixture1,S4
Z20180606-03_FTMSms2hcd,3,1,Int 129,Aged,Mixture1,S4
Z20180606-04_FTMSms2hcd,4,1,Int 129,Aged,Mixture1,S4
Z20180606-05_FTMSms2hcd,5,1,Int 129,Aged,Mixture1,S4
Z20180606-06_FTMSms2hcd,6,1,Int 129,Aged,Mixture1,S4
Z20180606-07_FTMSms2hcd,7,1,Int 129,Aged,Mixture1,S4
Z20180606-08_FTMSms2hcd,8,1,Int 129,Aged,Mixture1,S4
Z20180606-09_FTMSms2hcd,9,1,Int 129,Aged,Mixture1,S4
Z20180606-10_FTMSms2hcd,10,1,Int 129,Aged,Mixture1,S4
Z20180606-11_FTMSms2hcd,11,1,Int 129,Aged,Mixture1,S4
Z20180606-12_FTMSms2hcd,12,1,Int 129,Aged,Mixture1,S4
Z20180606-01_FTMSms2hcd,1,1,Int 130,Aged,Mixture1,S5
Z20180606-02_FTMSms2hcd,2,1,Int 130,Aged,Mixture1,S5
Z20180606-03_FTMSms2hcd,3,1,Int 130,Aged,Mixture1,S5
Z20180606-04_FTMSms2hcd,4,1,Int 130,Aged,Mixture1,S5
Z20180606-05_FTMSms2hcd,5,1,Int 130,Aged,Mixture1,S5
Z20180606-06_FTMSms2hcd,6,1,Int 130,Aged,Mixture1,S5
Z20180606-07_FTMSms2hcd,7,1,Int 130,Aged,Mixture1,S5
Z20180606-08_FTMSms2hcd,8,1,Int 130,Aged,Mixture1,S5
Z20180606-09_FTMSms2hcd,9,1,Int 130,Aged,Mixture1,S5
Z20180606-10_FTMSms2hcd,10,1,Int 130,Aged,Mixture1,S5
Z20180606-11_FTMSms2hcd,11,1,Int 130,Aged,Mixture1,S5
Z20180606-12_FTMSms2hcd,12,1,Int 130,Aged,Mixture1,S5
Z20180606-01_FTMSms2hcd,1,1,Int 131,Aged,Mixture1,S6
Z20180606-02_FTMSms2hcd,2,1,Int 131,Aged,Mixture1,S6
Z20180606-03_FTMSms2hcd,3,1,Int 131,Aged,Mixture1,S6
Z20180606-04_FTMSms2hcd,4,1,Int 131,Aged,Mixture1,S6
Z20180606-05_FTMSms2hcd,5,1,Int 131,Aged,Mixture1,S6
Z20180606-06_FTMSms2hcd,6,1,Int 131,Aged,Mixture1,S6
Z20180606-07_FTMSms2hcd,7,1,Int 131,Aged,Mixture1,S6
Z20180606-08_FTMSms2hcd,8,1,Int 131,Aged,Mixture1,S6
Z20180606-09_FTMSms2hcd,9,1,Int 131,Aged,Mixture1,S6
Z20180606-10_FTMSms2hcd,10,1,Int 131,Aged,Mixture1,S6
Z20180606-11_FTMSms2hcd,11,1,Int 131,Aged,Mixture1,S6
Z20180606-12_FTMSms2hcd,12,1,Int 131,Aged,Mixture1,S6
Loading