Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fragger and xcorr modules #2439

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,111 @@ public static float GetAverageFragmentMassError(IEnumerable<MatchedFragmentIon>
return massErrors.Average();
}

/// <summary>
/// Taken from Nat. Methods.https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5409104/
/// "MSFragger: ultrafast and comprehensive peptide identification in shotgun proteomics"
/// Andy T. Kong,1,2 Felipe V. Leprevost,2 Dmitry M. Avtonomov,2 Dattatreya Mellacheruvu,2 and Alexey I. Nesvizhskii1,2,*
/// </summary>
/// <param name="psm"></param>
/// <param name="selectedPeptide"></param>
/// <returns></returns>
public static float GetFraggerHyperScore(SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide)
{
var peptideFragmentIons = psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide];
float nIonIntensitySum = 0;
float cIonIntensitySum = 0;
int nIonCount = 0;
int cIonCount = 0;

foreach (var ion in peptideFragmentIons)
{
if (ion.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.N)
{
nIonIntensitySum += (float)ion.Intensity;
nIonCount++;
}
else if (ion.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.C)
{
cIonIntensitySum += (float)ion.Intensity;
cIonCount++;
}
}

float matched_n_IonCountFactorial = nIonCount > 0 ? GetLog10Factorial(nIonCount).Value : 0;
float matched_c_IonCountFactorial = cIonCount > 0 ? GetLog10Factorial(cIonCount).Value : 0;

double log10IntensitySum = (nIonIntensitySum > 0 && cIonIntensitySum > 0) ? Math.Log10(nIonIntensitySum * cIonIntensitySum) : 0.1;

return matched_n_IonCountFactorial + matched_c_IonCountFactorial + (float)log10IntensitySum;
}

/// <summary>
/// https://willfondrie.com/2019/02/an-intuitive-look-at-the-xcorr-score-function-in-proteomics/
///
/// A mass spectrum can be preprocessed by subtracting the mean intensities at all of the offsets.
/// Then a single dot product between the preprocessed mass spectrum and the theoretical peptide
/// mass spectrum yields the xcorr score, which is made possible because of the distributive
/// property of the dot product.
///
/// Since we have already chosen the match for this scan, we can use the matched ions to calculate the
/// xcorr and skip the dot product step.
///
/// </summary>
/// <param name="psm"></param>
/// <param name="selectedPeptide"></param>
/// <returns></returns>
public static float Xcorr(SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide)
{
double xcorr = 0;
var xArray = psm.MsDataScan.MassSpectrum.XArray;
var yArray = psm.MsDataScan.MassSpectrum.YArray;
var fragments = psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide];

foreach (var peptideFragmentIon in fragments)
{
int startIndex = Array.BinarySearch(xArray, peptideFragmentIon.Mz - 75);
int endIndex = Array.BinarySearch(xArray, peptideFragmentIon.Mz + 75);

// Ensure valid indices
startIndex = startIndex < 0 ? ~startIndex : startIndex;
endIndex = endIndex < 0 ? ~endIndex - 1 : endIndex;

// Sum yArray values between startIndex and endIndex
double sum = 0;
for (int i = startIndex; i <= endIndex; i++)
{
sum += yArray[i];
}
sum -= peptideFragmentIon.Intensity; // Subtract the intensity of the current ion

double range = xArray[endIndex] - xArray[startIndex];
if (range > 0)
{
sum /= range;
}

xcorr += Math.Max(peptideFragmentIon.Intensity - sum, 0);
}

return (float)xcorr;
}

public static float? GetLog10Factorial(int n)
{
if (n < 1)
{
return null;
}
else
{
double log10Factorial = 0.0;
for (int i = 1; i <= n; i++)
{
log10Factorial += Math.Log10(i);
}
return (float)log10Factorial;
}
}
#endregion
}
}
202 changes: 202 additions & 0 deletions MetaMorpheus/Test/PepAnalysisEngineTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
using EngineLayer;
using MassSpectrometry;
using System.Collections.Generic;
using NUnit.Framework;
using Omics.Fragmentation;
using System.Linq;
using MzLibUtil;
using Proteomics.ProteolyticDigestion;
using Omics.Modifications;
using Omics.Digestion;
using Proteomics;
using System;
using Chemistry;

namespace Test
{
public class PepAnalysisEngineTests
{
[TestCase(5, 2.07918119f)]
[Test]
public static void GetLog10Factorial_ReturnsCorrectValue(int n, float? expected)
{
// Act
float? result = PepAnalysisEngine.GetLog10Factorial(n);

// Assert
Assert.That(expected, Is.EqualTo(result));
}

[Test]
public static void TestGetFraggerHyperScore()
{
MassDiffAcceptor searchModes = new DotMassDiffAcceptor(null, new List<double> { 0, 1.0029 }, new PpmTolerance(5));

var p = new Protein("PEPTIDE", "accession");
var d = p.Digest(new DigestionParams(), new List<Modification>(), new List<Modification>()).ToList();
PeptideWithSetModifications pep = d.First();

CommonParameters commonParameters = new CommonParameters();

var digested = p.Digest(commonParameters.DigestionParams, new List<Modification>(), new List<Modification>()).ToList();

TestDataFile t = new TestDataFile(new List<PeptideWithSetModifications> { pep });

MsDataScan mzLibScan1 = t.GetOneBasedScan(2);
Ms2ScanWithSpecificMass scan1 = new Ms2ScanWithSpecificMass(mzLibScan1, pep.MonoisotopicMass.ToMz(1), 1, null, new CommonParameters());

var peptideFragmentIons = new List<MatchedFragmentIon>
{
new MatchedFragmentIon(new Product(ProductType.b, FragmentationTerminus.N, 100, 1, 1, 0), 100, 100, 1),
new MatchedFragmentIon(new Product(ProductType.b, FragmentationTerminus.N, 200, 2, 2, 0), 200, 200, 2),
new MatchedFragmentIon(new Product(ProductType.b, FragmentationTerminus.N, 300, 3, 3, 0), 300, 300, 3),
new MatchedFragmentIon(new Product(ProductType.y, FragmentationTerminus.C, 100, 1, 1, 0), 100, 100, 1),
new MatchedFragmentIon(new Product(ProductType.y, FragmentationTerminus.C, 200, 2, 2, 0), 200, 200, 1),
new MatchedFragmentIon(new Product(ProductType.y, FragmentationTerminus.C, 300, 3, 3, 0), 300, 300, 1)
};

SpectralMatch psm1 = new PeptideSpectralMatch(pep, 0, 3, 0, scan1, commonParameters, peptideFragmentIons);

psm1.ResolveAllAmbiguities();

// Act

float hyperScore = PepAnalysisEngine.GetFraggerHyperScore(psm1, psm1.BestMatchingBioPolymersWithSetMods.First().Peptide);


// Assert
Assert.That(7.112605f, Is.EqualTo(hyperScore).Within(0.000001f));
}

[Test]
[TestCase(1, 0.0)]
[TestCase(2, 0.30103)]
[TestCase(3, 0.7781513)]
[TestCase(4, 1.38021123)]
[TestCase(5, 2.07918119)]
[TestCase(6, 2.85733247)]
[TestCase(7, 3.70243049)]
[TestCase(8, 4.60552073)]
[TestCase(9, 5.559763)]
[TestCase(10, 6.559763)]
public static void GetLog10Factorial_ReturnsExpectedResult(int n, double? expected)
{
float? result = PepAnalysisEngine.GetLog10Factorial(n);
Assert.That((float)expected, Is.EqualTo(result).Within(0.0001));
}

[Test]
public static void GetLog10Factorial_NegativeInputReturnsNull()
{
Assert.That(PepAnalysisEngine.GetLog10Factorial(-1), Is.Null);
}

[Test]
public static void GetLog10Factorial_LargeInput_ReturnsExpectedResult()
{
int n = 20;
float? result = PepAnalysisEngine.GetLog10Factorial(n);
double? expected = 0.0;
for (int i = 1; i <= n; i++)
{
expected += Math.Log10(i);
}
Assert.That((float)expected, Is.EqualTo(result).Within(4)); // Allowing a small tolerance for floating-point comparison
}

[Test]
public void Xcorr_ValidInput_ReturnsExpectedResult()
{
// Arrange
var xArray = new double[] { 100, 150, 200, 250, 300 };
var yArray = new double[] { 10, 20, 30, 40, 50 };

var fragments = new List<MatchedFragmentIon>
{
new MatchedFragmentIon(new Product(ProductType.b, Omics.Fragmentation.FragmentationTerminus.N, 150, 1,1,0), 150, 20, 1),
new MatchedFragmentIon(new Product(ProductType.b, Omics.Fragmentation.FragmentationTerminus.N, 250, 1,1,0), 250, 40, 1),
};

var psm = CreateSpectralMatch(xArray, yArray, [150, 250], [20, 40], fragments);

var selectedPeptide = psm.BestMatchingBioPolymersWithSetMods.First().Peptide;

// Act
float result = EngineLayer.PepAnalysisEngine.Xcorr(psm, selectedPeptide);

// Assert
Assert.That(58.8, Is.EqualTo(result).Within(1)); // Allowing a small tolerance for floating-point comparison
}

[Test]
public void Xcorr_EmptyFragments_ReturnsZero()
{
// Arrange
var xArray = new double[] { 100, 150, 200, 250, 300 };
var yArray = new double[] { 10, 20, 30, 40, 50 };

var fragments = new List<MatchedFragmentIon>
{
};

var psm = CreateSpectralMatch(xArray, yArray, new double[0], new double[0], fragments);

var selectedPeptide = psm.BestMatchingBioPolymersWithSetMods.First().Peptide;

// Act
float result = EngineLayer.PepAnalysisEngine.Xcorr(psm, selectedPeptide);

// Assert
Assert.That(0, Is.EqualTo(result));
}

private SpectralMatch CreateSpectralMatch(double[] xArray, double[] yArray, double[] fragmentMz, double[] fragmentIntensity, List<MatchedFragmentIon> matchedFragmentIons)
{
PeptideWithSetModifications pwsm = new PeptideWithSetModifications(new Protein("PEPTIDE", "ACCESSION", "ORGANISM"), new DigestionParams(), 1, 2, CleavageSpecificity.Full, "", 0, new Dictionary<int, Modification>(), 0);
int notch = 0;
double score = 0;
int scanIndex = 1;
Ms2ScanWithSpecificMass scan = CreateMs2ScanWithSpecificMass(xArray, yArray);
CommonParameters commonParameters = new CommonParameters();

return new PeptideSpectralMatch(pwsm, notch, score, scanIndex, scan, commonParameters, matchedFragmentIons);
}

private Ms2ScanWithSpecificMass CreateMs2ScanWithSpecificMass(double[] xArray, double[] yArray)
{
MsDataScan scan = CreateMsDataScan(xArray, yArray);
double precursorMonoisotopicPeakMz = 1;
int precursorCharge = 1;
string fullFilePath = "";
CommonParameters commonParam = new CommonParameters();

return new Ms2ScanWithSpecificMass(scan, precursorMonoisotopicPeakMz, precursorCharge, fullFilePath, commonParam);
}
private MsDataScan CreateMsDataScan(double[] xArray, double[] yArray)
{
MzSpectrum massSpectrum = CreateMzSpectrum(xArray, yArray);
int oneBasedScanNumber = 1;
int msnOrder = 1;
bool isCentroid = true;
MassSpectrometry.Polarity polarity = MassSpectrometry.Polarity.Positive;
double retentionTime = 1.0;
MzRange scanWindowRange = new MzRange(1, 500);
string scanFilter = "";
MZAnalyzerType mzAnalyzer = MZAnalyzerType.Orbitrap;
double totalIonCurrent = 1.0;
double? injectionTime = 1.0;
double[,] noiseData = new double[1, 1];
string nativeId = "";

return new MsDataScan(massSpectrum, oneBasedScanNumber, msnOrder, isCentroid, polarity, retentionTime, scanWindowRange, scanFilter, mzAnalyzer, totalIonCurrent, injectionTime, noiseData, nativeId);

}
private MzSpectrum CreateMzSpectrum(double[] xArray, double[] yArray)
{
double[] mz = xArray;
double[] intensities = yArray;
bool shouldCopy = true;
return new MzSpectrum(mz, intensities, shouldCopy);
}
}
}