diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs index 1787b9537..71633b657 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs @@ -1066,6 +1066,111 @@ public static float GetAverageFragmentMassError(IEnumerable return massErrors.Average(); } + /// + /// Taken from Nat. Methods.https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5409104/ + /// "MSFragger: ultrafast and comprehensive peptide identification in shotgun proteomics" + /// Andy T. Kong,1,2 Felipe V. Leprevost,2 Dmitry M. Avtonomov,2 Dattatreya Mellacheruvu,2 and Alexey I. Nesvizhskii1,2,* + /// + /// + /// + /// + public static float GetFraggerHyperScore(SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide) + { + var peptideFragmentIons = psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]; + float nIonIntensitySum = 0; + float cIonIntensitySum = 0; + int nIonCount = 0; + int cIonCount = 0; + + foreach (var ion in peptideFragmentIons) + { + if (ion.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.N) + { + nIonIntensitySum += (float)ion.Intensity; + nIonCount++; + } + else if (ion.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.C) + { + cIonIntensitySum += (float)ion.Intensity; + cIonCount++; + } + } + + float matched_n_IonCountFactorial = nIonCount > 0 ? GetLog10Factorial(nIonCount).Value : 0; + float matched_c_IonCountFactorial = cIonCount > 0 ? GetLog10Factorial(cIonCount).Value : 0; + + double log10IntensitySum = (nIonIntensitySum > 0 && cIonIntensitySum > 0) ? Math.Log10(nIonIntensitySum * cIonIntensitySum) : 0.1; + + return matched_n_IonCountFactorial + matched_c_IonCountFactorial + (float)log10IntensitySum; + } + + /// + /// https://willfondrie.com/2019/02/an-intuitive-look-at-the-xcorr-score-function-in-proteomics/ + /// + /// A mass spectrum can be preprocessed by subtracting the mean intensities at all of the offsets. + /// Then a single dot product between the preprocessed mass spectrum and the theoretical peptide + /// mass spectrum yields the xcorr score, which is made possible because of the distributive + /// property of the dot product. + /// + /// Since we have already chosen the match for this scan, we can use the matched ions to calculate the + /// xcorr and skip the dot product step. + /// + /// + /// + /// + /// + public static float Xcorr(SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide) + { + double xcorr = 0; + var xArray = psm.MsDataScan.MassSpectrum.XArray; + var yArray = psm.MsDataScan.MassSpectrum.YArray; + var fragments = psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]; + + foreach (var peptideFragmentIon in fragments) + { + int startIndex = Array.BinarySearch(xArray, peptideFragmentIon.Mz - 75); + int endIndex = Array.BinarySearch(xArray, peptideFragmentIon.Mz + 75); + + // Ensure valid indices + startIndex = startIndex < 0 ? ~startIndex : startIndex; + endIndex = endIndex < 0 ? ~endIndex - 1 : endIndex; + + // Sum yArray values between startIndex and endIndex + double sum = 0; + for (int i = startIndex; i <= endIndex; i++) + { + sum += yArray[i]; + } + sum -= peptideFragmentIon.Intensity; // Subtract the intensity of the current ion + + double range = xArray[endIndex] - xArray[startIndex]; + if (range > 0) + { + sum /= range; + } + + xcorr += Math.Max(peptideFragmentIon.Intensity - sum, 0); + } + + return (float)xcorr; + } + + public static float? GetLog10Factorial(int n) + { + if (n < 1) + { + return null; + } + else + { + double log10Factorial = 0.0; + for (int i = 1; i <= n; i++) + { + log10Factorial += Math.Log10(i); + } + return (float)log10Factorial; + } + } #endregion } } \ No newline at end of file diff --git a/MetaMorpheus/Test/PepAnalysisEngineTests.cs b/MetaMorpheus/Test/PepAnalysisEngineTests.cs new file mode 100644 index 000000000..57651bc13 --- /dev/null +++ b/MetaMorpheus/Test/PepAnalysisEngineTests.cs @@ -0,0 +1,202 @@ +using EngineLayer; +using MassSpectrometry; +using System.Collections.Generic; +using NUnit.Framework; +using Omics.Fragmentation; +using System.Linq; +using MzLibUtil; +using Proteomics.ProteolyticDigestion; +using Omics.Modifications; +using Omics.Digestion; +using Proteomics; +using System; +using Chemistry; + +namespace Test +{ + public class PepAnalysisEngineTests + { + [TestCase(5, 2.07918119f)] + [Test] + public static void GetLog10Factorial_ReturnsCorrectValue(int n, float? expected) + { + // Act + float? result = PepAnalysisEngine.GetLog10Factorial(n); + + // Assert + Assert.That(expected, Is.EqualTo(result)); + } + + [Test] + public static void TestGetFraggerHyperScore() + { + MassDiffAcceptor searchModes = new DotMassDiffAcceptor(null, new List { 0, 1.0029 }, new PpmTolerance(5)); + + var p = new Protein("PEPTIDE", "accession"); + var d = p.Digest(new DigestionParams(), new List(), new List()).ToList(); + PeptideWithSetModifications pep = d.First(); + + CommonParameters commonParameters = new CommonParameters(); + + var digested = p.Digest(commonParameters.DigestionParams, new List(), new List()).ToList(); + + TestDataFile t = new TestDataFile(new List { pep }); + + MsDataScan mzLibScan1 = t.GetOneBasedScan(2); + Ms2ScanWithSpecificMass scan1 = new Ms2ScanWithSpecificMass(mzLibScan1, pep.MonoisotopicMass.ToMz(1), 1, null, new CommonParameters()); + + var peptideFragmentIons = new List + { + new MatchedFragmentIon(new Product(ProductType.b, FragmentationTerminus.N, 100, 1, 1, 0), 100, 100, 1), + new MatchedFragmentIon(new Product(ProductType.b, FragmentationTerminus.N, 200, 2, 2, 0), 200, 200, 2), + new MatchedFragmentIon(new Product(ProductType.b, FragmentationTerminus.N, 300, 3, 3, 0), 300, 300, 3), + new MatchedFragmentIon(new Product(ProductType.y, FragmentationTerminus.C, 100, 1, 1, 0), 100, 100, 1), + new MatchedFragmentIon(new Product(ProductType.y, FragmentationTerminus.C, 200, 2, 2, 0), 200, 200, 1), + new MatchedFragmentIon(new Product(ProductType.y, FragmentationTerminus.C, 300, 3, 3, 0), 300, 300, 1) + }; + + SpectralMatch psm1 = new PeptideSpectralMatch(pep, 0, 3, 0, scan1, commonParameters, peptideFragmentIons); + + psm1.ResolveAllAmbiguities(); + + // Act + + float hyperScore = PepAnalysisEngine.GetFraggerHyperScore(psm1, psm1.BestMatchingBioPolymersWithSetMods.First().Peptide); + + + // Assert + Assert.That(7.112605f, Is.EqualTo(hyperScore).Within(0.000001f)); + } + + [Test] + [TestCase(1, 0.0)] + [TestCase(2, 0.30103)] + [TestCase(3, 0.7781513)] + [TestCase(4, 1.38021123)] + [TestCase(5, 2.07918119)] + [TestCase(6, 2.85733247)] + [TestCase(7, 3.70243049)] + [TestCase(8, 4.60552073)] + [TestCase(9, 5.559763)] + [TestCase(10, 6.559763)] + public static void GetLog10Factorial_ReturnsExpectedResult(int n, double? expected) + { + float? result = PepAnalysisEngine.GetLog10Factorial(n); + Assert.That((float)expected, Is.EqualTo(result).Within(0.0001)); + } + + [Test] + public static void GetLog10Factorial_NegativeInputReturnsNull() + { + Assert.That(PepAnalysisEngine.GetLog10Factorial(-1), Is.Null); + } + + [Test] + public static void GetLog10Factorial_LargeInput_ReturnsExpectedResult() + { + int n = 20; + float? result = PepAnalysisEngine.GetLog10Factorial(n); + double? expected = 0.0; + for (int i = 1; i <= n; i++) + { + expected += Math.Log10(i); + } + Assert.That((float)expected, Is.EqualTo(result).Within(4)); // Allowing a small tolerance for floating-point comparison + } + + [Test] + public void Xcorr_ValidInput_ReturnsExpectedResult() + { + // Arrange + var xArray = new double[] { 100, 150, 200, 250, 300 }; + var yArray = new double[] { 10, 20, 30, 40, 50 }; + + var fragments = new List + { + new MatchedFragmentIon(new Product(ProductType.b, Omics.Fragmentation.FragmentationTerminus.N, 150, 1,1,0), 150, 20, 1), + new MatchedFragmentIon(new Product(ProductType.b, Omics.Fragmentation.FragmentationTerminus.N, 250, 1,1,0), 250, 40, 1), + }; + + var psm = CreateSpectralMatch(xArray, yArray, [150, 250], [20, 40], fragments); + + var selectedPeptide = psm.BestMatchingBioPolymersWithSetMods.First().Peptide; + + // Act + float result = EngineLayer.PepAnalysisEngine.Xcorr(psm, selectedPeptide); + + // Assert + Assert.That(58.8, Is.EqualTo(result).Within(1)); // Allowing a small tolerance for floating-point comparison + } + + [Test] + public void Xcorr_EmptyFragments_ReturnsZero() + { + // Arrange + var xArray = new double[] { 100, 150, 200, 250, 300 }; + var yArray = new double[] { 10, 20, 30, 40, 50 }; + + var fragments = new List + { + }; + + var psm = CreateSpectralMatch(xArray, yArray, new double[0], new double[0], fragments); + + var selectedPeptide = psm.BestMatchingBioPolymersWithSetMods.First().Peptide; + + // Act + float result = EngineLayer.PepAnalysisEngine.Xcorr(psm, selectedPeptide); + + // Assert + Assert.That(0, Is.EqualTo(result)); + } + + private SpectralMatch CreateSpectralMatch(double[] xArray, double[] yArray, double[] fragmentMz, double[] fragmentIntensity, List matchedFragmentIons) + { + PeptideWithSetModifications pwsm = new PeptideWithSetModifications(new Protein("PEPTIDE", "ACCESSION", "ORGANISM"), new DigestionParams(), 1, 2, CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + int notch = 0; + double score = 0; + int scanIndex = 1; + Ms2ScanWithSpecificMass scan = CreateMs2ScanWithSpecificMass(xArray, yArray); + CommonParameters commonParameters = new CommonParameters(); + + return new PeptideSpectralMatch(pwsm, notch, score, scanIndex, scan, commonParameters, matchedFragmentIons); + } + + private Ms2ScanWithSpecificMass CreateMs2ScanWithSpecificMass(double[] xArray, double[] yArray) + { + MsDataScan scan = CreateMsDataScan(xArray, yArray); + double precursorMonoisotopicPeakMz = 1; + int precursorCharge = 1; + string fullFilePath = ""; + CommonParameters commonParam = new CommonParameters(); + + return new Ms2ScanWithSpecificMass(scan, precursorMonoisotopicPeakMz, precursorCharge, fullFilePath, commonParam); + } + private MsDataScan CreateMsDataScan(double[] xArray, double[] yArray) + { + MzSpectrum massSpectrum = CreateMzSpectrum(xArray, yArray); + int oneBasedScanNumber = 1; + int msnOrder = 1; + bool isCentroid = true; + MassSpectrometry.Polarity polarity = MassSpectrometry.Polarity.Positive; + double retentionTime = 1.0; + MzRange scanWindowRange = new MzRange(1, 500); + string scanFilter = ""; + MZAnalyzerType mzAnalyzer = MZAnalyzerType.Orbitrap; + double totalIonCurrent = 1.0; + double? injectionTime = 1.0; + double[,] noiseData = new double[1, 1]; + string nativeId = ""; + + return new MsDataScan(massSpectrum, oneBasedScanNumber, msnOrder, isCentroid, polarity, retentionTime, scanWindowRange, scanFilter, mzAnalyzer, totalIonCurrent, injectionTime, noiseData, nativeId); + + } + private MzSpectrum CreateMzSpectrum(double[] xArray, double[] yArray) + { + double[] mz = xArray; + double[] intensities = yArray; + bool shouldCopy = true; + return new MzSpectrum(mz, intensities, shouldCopy); + } + } +}