Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated RAMClustR tool #514

Merged
merged 4 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions tools/ramclustr/macros.xml
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,55 @@
help="NA, NaN, and Inf values are replaced with zero, and zero values are sometimes returned from
peak peaking. When TRUE, zero values will be replaced with a small amount of noise, with noise level
set based on the detected signal intensities for that feature."/>
<param label="Quality control" name="quality_control" type="boolean" truevalue="TRUE" falsevalue="FALSE"
checked="false"
help="Add graph to Rplots.pdf output which contains summarize quality control for clustering and for quality
control sample variation based on compound ($SpecAbund) and feature ($MSdata and $MSMSdata, if present)."/>
<param label="Experimental design metadata" name="ExpDes" type="data" format="csv" optional="true"
help="Definition of experimental design in CSV format." />
</section>

<section name="filtering" title="Filtering">
<conditional name="feature_filter_blanks">
<param label="Filter blanks" name="filter_blanks" type="select"
help="Is used to remove features which are found at similar intensity in blank samples">
<option value="FALSE" selected="true">FALSE</option>
<option value="TRUE">TRUE</option>
</param>
<when value="TRUE">
<param label="qc tag" name="qc_tag" type="text" value="QC"
help="Character vector of length one or two. If length is two, enter search string and factor name in $phenoData slot
(i.e. c('QC', 'sample.type'). If length one (i.e. 'QC'), will search for this string in the 'sample.names' slot by default.
Default is 'QC'"/>
<param label="blank tag" name="blank_tag" type="text" value="blank"
help="See 'qc tag' , but for blanks to use as background. Default is 'blank'"/>
<param label="signal to noise (sn)" name="sn" type="integer" value="3"
help="Numeric defines the ratio for 'signal'. i.e. sn = 3 indicates that signal intensity must be 3 fold higher in sample than in blanks,
on average, to be retained. Default is '3'"/>
<param label="Remove blanks" name="remove_blanks" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true"
help="TRUE by default. This removes any recognized blanks samples from the MSdata and MSMSdata sets after they are used to filter
contaminant features."/>
</when>
<when value="FALSE"></when>
</conditional>

<conditional name="feature_filter_cv">
<param label="Filter cv" name="filter_cv" type="select"
help="Extractor for xcms objects in preparation for clustering. This function offers normalization
by total extracted ion signal. It is recommended to first run 'Filter blanks' to remove non-sample derived signal">
<option value="FALSE" selected="true">FALSE</option>
<option value="TRUE">TRUE</option>
</param>
<when value="TRUE">
<param label="qc tag" name="qc_tag" type="text" value="QC"
help="Character vector of length one or two. If length is two, enter search string and factor name in $phenoData slot
(i.e. c('QC', 'sample.type'). If length one (i.e. 'QC'), will search for this string in the 'sample.names' slot by default.
Default is 'QC'"/>
Comment on lines +227 to +230
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be maybe moved to a token but I'm not sure if nested macros work.

<param label="Max cv" name="max_cv" type="float" value="0.5" help="Numeric maximum allowable cv for any feature. Default = 0.5."/>
</when>
<when value="FALSE"></when>
</conditional>
</section>
</xml>

<xml name="output_msp">
Expand All @@ -197,6 +243,9 @@
<data label="Mass spectra from ${tool.name} on ${on_string}" name="mass_spectra_merged" format="msp">
<filter>msp_output_details['merge_msp']</filter>
</data>
<data format="pdf" name="pdf_plot" from_work_dir="Rplots.pdf" label="PDF plot.">
<filter>extras['quality_control'] or filtering['feature_filter_cv']['filter_cv'] == "TRUE"</filter>
</data>
</xml>

<xml name="citations">
Expand Down
58 changes: 57 additions & 1 deletion tools/ramclustr/ramclustr.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="ramclustr" name="RAMClustR" version="@TOOL_VERSION@+galaxy4" profile="21.09">
<tool id="ramclustr" name="RAMClustR" version="@TOOL_VERSION@+galaxy5" profile="21.09">
<description>A feature clustering algorithm for non-targeted mass spectrometric metabolomics data.</description>
<macros>
<import>macros.xml</import>
Expand Down Expand Up @@ -72,6 +72,16 @@
ramclustObj = RAMClustR::rc.feature.replace.na(ramclustObj = ramclustObj)
#end if

#if $filtering.feature_filter_blanks.filter_blanks == "TRUE":
ramclustObj = RAMClustR::rc.feature.filter.blanks(
ramclustObj = ramclustObj,
qc.tag = "${filtering.feature_filter_blanks.qc_tag}",
blank.tag = "${filtering.feature_filter_blanks.blank_tag}",
sn = $filtering.feature_filter_blanks.sn,
remove.blanks = $filtering.feature_filter_blanks.remove_blanks
)
#end if

#if $normalisation.normalisation_method.normalize != "none":
ramclustObj = apply_normalisation(
ramclustr_obj = ramclustObj,
Expand All @@ -89,6 +99,14 @@
)
#end if

#if $filtering.feature_filter_cv.filter_cv == "TRUE":
ramclustObj = RAMClustR::rc.feature.filter.cv(
ramclustObj = ramclustObj,
qc.tag = "${filtering.feature_filter_cv.qc_tag}",
max.cv = $filtering.feature_filter_cv.max_cv
)
#end if

ramclustObj = RAMClustR::rc.ramclustr(
ramclustObj = ramclustObj,
st = $filetype.required.st,
Expand All @@ -104,6 +122,10 @@
rt.only.low.n = $extras.rt_only_low_n,
fftempdir = NULL,
)

#if $extras.quality_control == "TRUE":
ramclustObj = RAMClustR::rc.qc(ramclustObj = ramclustObj)
#end if

store_output(
ramclustr_obj = ramclustObj,
Expand Down Expand Up @@ -235,6 +257,40 @@
</section>
<output name="mass_spectra_merged" file="test5_spectra.msp" ftype="msp" lines_diff="10"/>
</test>
<test expect_num_outputs="3"><!-- TEST 6 -->
<section name="filetype">
<param name="type_choice" value="xcms"/>
<section name="xcms">
<param name="input_xcms" value="test6.fillpeaks" ftype="rdata.xcms.fillpeaks"/>
</section>
</section>
<section name="msp_output_details">
<param name="mzdec" value="4"/>
</section>
<section name="extras">
<param name="quality_control" value="TRUE"/>
</section>
<section name="filtering">
<section name="feature_filter_blanks">
<param name="filter_blanks" value="TRUE"/>
<param name="blank_tag" value="Blanc"/>
</section>
<section name="feature_filter_cv">
<param name="filter_cv" value="TRUE"/>
</section>
</section>
<output name="mass_spectra_merged" file="test6_out.msp" ftype="msp"/>
<output name="spec_abundance">
<assert_contents>
<has_size value="309" delta="10"/>
</assert_contents>
</output>
<output name="pdf_plot">
<assert_contents>
<has_size value="6842" delta="100"/>
</assert_contents>
</output>
</test>
</tests>

<help>
Expand Down
Binary file added tools/ramclustr/test-data/test6.fillpeaks
Binary file not shown.
53 changes: 53 additions & 0 deletions tools/ramclustr/test-data/test6_out.msp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
NAME:C1
IONMODE:Negative
SPECTRUMTYPE:Centroid
RETENTIONTIME:54.89
Num Peaks:10
135.0295 9979069
179.056 5594441
195.0506 2625643
165.0401 1960745
187.0372 1743661
148.0279 995311
146.0145 950514
219.0448 540250
189.0327 411265
180.0589 227648

NAME:C2
IONMODE:Negative
SPECTRUMTYPE:Centroid
RETENTIONTIME:63.74
Num Peaks:8
191.0193 28601409
111.0086 2371386
129.0192 2125989
192.0223 1724583
85.0294 991832
87.0087 956947
359.0473 811419
101.0241 397166

NAME:C3
IONMODE:Negative
SPECTRUMTYPE:Centroid
RETENTIONTIME:59.06
Num Peaks:7
149.0451 7763606
151.0067 3434565
166.0174 822559
75.0087 771138
177.0401 707232
209.066 258300
178.0633 166079

NAME:C4
IONMODE:Negative
SPECTRUMTYPE:Centroid
RETENTIONTIME:50.66
Num Peaks:4
96.9692 6132644
102.9565 1867855
176.8926 120442
184.8312 80882