forked from wfondrie/mokapot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_writer_flashlfq.py
81 lines (65 loc) · 2.79 KB
/
test_writer_flashlfq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""Test that FlashLFQ export is working"""
import pytest
import mokapot
import numpy as np
import pandas as pd
def test_sanity(psms_ondisk, tmp_path):
"""Run simple sanity checks"""
mods, scores = mokapot.brew([psms_ondisk])
conf = mokapot.assign_confidence(
[psms_ondisk], scores_list=scores, eval_fdr=0.05
)
test1 = conf.to_flashlfq(tmp_path / "test1.txt")
mokapot.to_flashlfq(conf, tmp_path / "test2.txt")
test3 = mokapot.to_flashlfq([conf, conf], tmp_path / "test3.txt")
with pytest.raises(ValueError):
mokapot.to_flashlfq("blah", tmp_path / "test4.txt")
df1 = pd.read_table(test1)
df3 = pd.read_table(test3)
assert 2 * len(df1) == len(df3)
assert len(df1.columns) == 7
# TODO needs to be adapted to OnDisk confidence assignment
pass
def test_basic(mock_conf, tmp_path):
"""Test that the basic output works"""
conf = mock_conf
df = pd.read_table(mokapot.to_flashlfq(conf, tmp_path / "test.txt"))
expected = pd.DataFrame({
"File Name": ["c.mzML"] * 2,
"Base Sequence": ["ABCDXYZ", "ABCDEFG"],
"Full Sequence": ["B.ABCD[+2.817]XYZ.A", "ABCDE(shcah8)FG"],
"Peptide Monoisotopic Mass": [1, 2],
"Scan Retention Time": [60, 120],
"Precursor Charge": [2, 3],
"Protein Accession": ["A|B|C; B|C|A", "A|B|C"],
})
pd.testing.assert_frame_equal(df, expected)
def test_with_missing(mock_conf, tmp_path):
"""Test that missing columns causes errors"""
conf = mock_conf
cols = conf._optional_columns.copy()
for col in ["filename", "calcmass", "rt", "charge"]:
new_cols = cols.copy()
new_cols[col] = None
conf._optional_columns = new_cols
with pytest.raises(ValueError):
mokapot.to_flashlfq(conf, tmp_path / "test.txt")
def test_no_proteins(mock_conf, tmp_path):
"""Test when no proteins are available"""
conf = mock_conf
conf._protein_column = None
df = pd.read_table(mokapot.to_flashlfq(conf, tmp_path / "test.txt"))
expected = pd.Series([np.nan, np.nan], name="Protein Accession")
pd.testing.assert_series_equal(df["Protein Accession"], expected)
def test_fasta_proteins(mock_conf, mock_proteins, tmp_path):
"""Test that using mokapot protein groups works"""
conf = mock_conf
conf._proteins = mock_proteins
conf._has_proteins = True
df = pd.read_table(mokapot.to_flashlfq(conf, tmp_path / "test.txt"))
expected = pd.Series(["X|Y|Z", "A|B|C; X|Y|Z"], name="Protein Accession")
pd.testing.assert_series_equal(df["Protein Accession"], expected)
conf._proteins.shared_peptides = {}
df = pd.read_table(mokapot.to_flashlfq(conf, tmp_path / "test.txt"))
expected = pd.Series(["X|Y|Z"], name="Protein Accession")
pd.testing.assert_series_equal(df["Protein Accession"], expected)