Skip to content

Commit

Permalink
Merge pull request #587 from hechth/target_screen
Browse files Browse the repository at this point in the history
added target screening tool
  • Loading branch information
hechth committed Sep 26, 2024
2 parents f5e491b + c24f25d commit 9432288
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 0 deletions.
8 changes: 8 additions & 0 deletions tools/misc/.shed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,11 @@ repositories:
- use_theoretical_mz_annotations.py
- macros.xml
- test-data
target_screen:
description: "Extract peaks from recetox-aplcms tables using a list of target ions"
long_description: |
"Extract peaks from recetox-aplcms tables using a list of target ions. MZ and RT tolerances are used to find matching features."
include:
- target_screen.xml
- target_screen.py
- test-data
59 changes: 59 additions & 0 deletions tools/misc/target_screen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import argparse

import numpy as np
import pandas as pd


def mz_match(marker, peak, ppm):
return np.abs(marker - peak) <= ((peak + marker) / 2) * ppm * 1e-06


def rt_match(marker, peak, tol):
return np.abs(marker - peak) <= tol


def find_matches(peaks, markers, ppm, rt_tol):
# Create a meshgrid of all combinations of mz and rt values
marker_mz = markers['mz'].values[:, np.newaxis]
peak_mz = peaks['mz'].values
marker_rt = markers['rt'].values[:, np.newaxis]
peak_rt = peaks['rt'].values

# Calculate mz and rt matches
mz_matches = mz_match(marker_mz, peak_mz, ppm)
rt_matches = rt_match(marker_rt, peak_rt, rt_tol)

# Find the indices where both mz and rt match
match_indices = np.where(mz_matches & rt_matches)

# Create a DataFrame of hits
matched_markers = markers.iloc[match_indices[0]].reset_index(drop=True)
matched_peaks = peaks.iloc[match_indices[1]].reset_index(drop=True)
hits = pd.concat([matched_markers[['formula']].reset_index(drop=True), matched_peaks], axis=1)

# Calculate mz and rt differences
hits['mz_diff'] = np.abs(matched_markers['mz'].values - matched_peaks['mz'].values)
hits['rt_diff'] = np.abs(matched_markers['rt'].values - matched_peaks['rt'].values)

return hits


def main():
parser = argparse.ArgumentParser(description='Find matches between peaks and markers.')
parser.add_argument('--peaks', required=True, help='Path to the peaks parquet file.')
parser.add_argument('--markers', required=True, help='Path to the markers CSV file.')
parser.add_argument('--output', required=True, help='Path to the output TSV file.')
parser.add_argument('--ppm', type=int, default=5, help='PPM tolerance for mz matching.')
parser.add_argument('--rt_tol', type=int, default=10, help='RT tolerance for rt matching.')
args = parser.parse_args()

peaks = pd.read_parquet(args.peaks)
markers = pd.read_csv(args.markers, sep='\t')

hits = find_matches(peaks, markers, args.ppm, args.rt_tol)

hits.to_csv(args.output, sep='\t', index=False)


if __name__ == "__main__":
main()
47 changes: 47 additions & 0 deletions tools/misc/target_screen.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<tool id="target_screen" name="MS target screening" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT">
<description>Extract peaks from recetox-aplcms tables using a list of target ions</description>
<macros>
<token name="@TOOL_VERSION@">0.1.0</token>
<token name="@VERSION_SUFFIX@">0</token>
</macros>

<requirements>
<requirement type="package" version="2.2.3">pandas</requirement>
<requirement type="package" version="17.0.0">pyarrow</requirement>
</requirements>

<command detect_errors="exit_code"><![CDATA[
python3 '${__tool_directory__}/target_screen.py' --peaks '$peaks' --markers '$markers' --output '$hits' --ppm $ppm --rt_tol $rt
]]></command>

<inputs>
<param name="peaks" type="data" format="parquet"/>
<param name="markers" type="data" format="tabular"/>
<param name="ppm" type="integer" min="0" max="1000" value="10" label="ppm" help="Tolerance for peak filtering in ppm." />
<param name="rt" type="integer" min="0" max="100" value="10" label="rt tolerance" help="Toelrance regarding retention time to filter out peaks" />
</inputs>

<outputs>
<data name="hits" format="tabular" label="${tool.name} on ${on_string}" />
</outputs>

<tests>
<test>
<param name="peaks" value="target_screen/peaks.parquet"/>
<param name="markers" value="target_screen/markers.tsv"/>
<output name="hits" value="target_screen/out.tsv"/>
</test>
</tests>
<help><![CDATA[
.. class:: infomark
**What it does**
This tool pulls out peaks from a table given a list of markers.
The markers are matched based on m/z values with a specified ppm tolerance and matched based on retention time with a tolerance in units of retention time.
]]></help>
<citations>
<citation type="doi">10.25080/Majora-92bf1922-00a</citation>
</citations>
</tool>
23 changes: 23 additions & 0 deletions tools/misc/test-data/target_screen/markers.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
formula mz rt
C8H6Cl2O3 218.9621 474.6
C9H15N3O1 180.1142 458.23
C5H2Cl3N1O1 195.9129 488.1
C13H10O3 213.0557 508.40
C13H9FO3 231.0463 521.48
C6H5NO3 138.0197 166.56
C6H4Cl1N1O2 155.9858 176.62
C19H28N2O5S 395.1646 598.96
C10H12N2O3S1 239.0496 312.55
C4H11O3P1S1 169.0094 168.08
C14H17Cl2NO2 300.0564 689.79
C11H13ClO2 167.0633 572.93
C12H4Cl2F6N4OS 434.9314 767.86
C12H4Cl2F6N4O2S 450.9263 791.29
C16H22ClN3O2 322.1328 706.5
C16H11ClF6N2O 395.0391 741.93
C10H11Cl1O3 213.0324 533.9
C7H9NO2S 170.0281 363.59
C12H7Cl3O2 286.9439 830.97
C18H15Cl3O8 462.976 662.99
C12H7Cl3O5S 366.9007 700.52
C9H9N4Cl 207.0443 403.37
18 changes: 18 additions & 0 deletions tools/misc/test-data/target_screen/out.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
formula mz rt sd1 sd2 area mz_diff rt_diff
C8H6Cl2O3 218.9619738108278 473.4840709352675 0.6057217022739683 2.7706017478506073 1239147.63695882 0.00012618917219242576 1.1159290647325406
C9H15N3O1 180.11422341297595 450.9460162486645 0.4692965104502825 4.727634916193644 1100073.3285644436 2.341297593488889e-05 7.2839837513355405
C5H2Cl3N1O1 195.91267599889463 487.37949630118806 0.8695685392506757 2.8811688054510127 734461.0596300099 0.00022400110538001172 0.7205036988119673
C13H10O3 213.05556658306853 508.4123751384482 2.9585968043814983 3.226731392934289 1787580.264815322 0.00013341693147594924 0.01237513844824889
C13H9FO3 231.04576243564085 521.2436784813573 0.9930695671903609 2.469013097815558 1316270.081622402 0.0005375643591492008 0.23632151864273965
C10H12N2O3S1 239.04945126090132 311.8317362000094 0.5578277726641567 3.57063615115722 3042462.634739455 0.0001487390986767423 0.7182637999906092
C14H17Cl2NO2 300.0561299922103 685.3731548839577 0.8491884774374224 2.8491999009146074 1021277.4141378121 0.0002700077897088704 4.416845116042282
C12H4Cl2F6N4OS 434.93037227267905 766.6610671335172 0.6265405149641161 3.55175113250731 43923382.478327975 0.0010277273209453597 1.198932866482778
C12H4Cl2F6N4O2S 450.9259113906124 789.7479646306683 0.5765707513162325 3.4834377486718897 35843894.74749327 0.00038860938764173625 1.5420353693316429
C16H22ClN3O2 322.13274143359513 705.9176130811956 0.765497607933695 2.9798451004946203 7686414.229962895 5.8566404845805664e-05 0.5823869188044455
C16H11ClF6N2O 395.0387483584033 741.1840034426168 0.9150873601266857 2.396923077539685 692605.613740076 0.0003516415966942077 0.7459965573831369
C10H11Cl1O3 213.03219616261535 532.8368925687558 0.8335128693984499 2.548404631638127 1231177.7029795102 0.00020383738464602175 1.0631074312441342
C7H9NO2S 170.0280487596005 363.28514725405876 0.8844811055327363 2.7876246329523737 915161.3987675996 5.124039950032966e-05 0.3048527459412185
C12H7Cl3O2 286.9434413572324 831.0018611928409 0.32058179843066653 1.7667251294853705 19934.364712896095 0.00045864276756901745 0.03186119284089273
C18H15Cl3O8 462.97625391610677 662.6552310211961 0.9093786171678189 2.128435471267278 1209160.0005544876 0.00025391610677161225 0.3347689788039361
C12H7Cl3O5S 366.90097256680355 699.9403505546061 0.8393755187990459 2.354260942300286 9578789.63215569 0.0002725668035736817 0.5796494453938976
C9H9N4Cl 207.04420254367005 402.95120970553893 1.2647033563807812 2.594410018631832 40475158.16355405 9.74563299394049e-05 0.4187902944610755
Binary file added tools/misc/test-data/target_screen/peaks.parquet
Binary file not shown.

0 comments on commit 9432288

Please sign in to comment.