Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add non-matching output lint rule #114

Merged
merged 35 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
d7929c5
feat: add non-matching output lint rule
adthrasher Jul 9, 2024
81dd295
chore: add PR # to CHANGELOG
adthrasher Jul 9, 2024
20665a5
chore: add document reset
adthrasher Jul 9, 2024
afdd6cd
Merge branch 'main' into feat/nonmatching_output
adthrasher Jul 9, 2024
339ec53
Merge branch 'feat/nonmatching_output' of https://github.com/stjude-r…
adthrasher Jul 9, 2024
1180cbe
Merge branch 'main' into feat/nonmatching_output
adthrasher Jul 9, 2024
7efddb2
chore: add exceptions and skip empty `output` sections
adthrasher Jul 9, 2024
0af033d
chore: add key name to diagnostic
adthrasher Jul 9, 2024
f5a0d15
chore: fix tags
adthrasher Jul 9, 2024
a0d5229
chore: remove extra blank line
adthrasher Jul 9, 2024
6d46d62
chore: add test for non-matching output
adthrasher Jul 10, 2024
e0fbd60
chore: add task or workflow name to diagnostics
adthrasher Jul 10, 2024
ffb998b
chore: add order check and extraneous items
adthrasher Jul 10, 2024
b964609
Apply suggestions from code review
adthrasher Jul 11, 2024
d1f31b5
Apply suggestions from code review
adthrasher Jul 11, 2024
4adcd2a
Apply suggestions from code review
adthrasher Jul 11, 2024
c8f8f9b
chore: add check for non-object `outputs` key
adthrasher Jul 11, 2024
93d7615
chore: use `output` for OutputSection
adthrasher Jul 11, 2024
75a84ef
chore: add test with empty output
adthrasher Jul 11, 2024
4effe6c
chore: add tests for lint exceptions
adthrasher Jul 11, 2024
4646473
refactor: process individual entries to enable `#@ except`
adthrasher Jul 11, 2024
950e3cd
refactor: store prior objects
adthrasher Jul 12, 2024
f2daa18
chore: apply PR feedback
adthrasher Jul 12, 2024
85b3c13
chore: String -> &str for diagnostics
adthrasher Jul 12, 2024
419a4d6
chore: cargo fmt
adthrasher Jul 12, 2024
3eba4a1
Apply suggestions from code review
adthrasher Jul 15, 2024
714638f
chore: rephrase descriptions
adthrasher Jul 15, 2024
470ef8d
refactor: pull common functionality into a function
adthrasher Jul 16, 2024
ddd13b8
refactor: track section enter/exit
adthrasher Jul 16, 2024
7dc4ace
chore: fmt
adthrasher Jul 16, 2024
f7330b3
Merge branch 'main' into feat/nonmatching_output
adthrasher Jul 16, 2024
1b49c52
chore: update Gauntlet and Arena after merge
adthrasher Jul 16, 2024
9a3fa7b
chore: update document callback signature after merge
adthrasher Jul 16, 2024
95579df
chore: update rules syntax
adthrasher Jul 16, 2024
23ac418
chore: add lifetime
adthrasher Jul 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions Arena.toml

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions Gauntlet.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ commit_hash = "4536dd2c6719b86f4c50348f0ce354818260cadb"

[repositories."broadinstitute/palantir-workflows"]
identifier = "broadinstitute/palantir-workflows"
commit_hash = "1e32078d3b57dcb2291534d0caa30f600d40967e"
commit_hash = "4e7fdfa7992dd1bf4c4721e77c81e4d7b8ecf4fe"

[repositories."broadinstitute/warp"]
identifier = "broadinstitute/warp"
commit_hash = "e49ffbcd72b138ad4dc6ec62c847943908e4abf9"
commit_hash = "e52f2c1aabdb88b565c132e5587d05b91c4e6c0f"

[repositories."chanzuckerberg/czid-workflows"]
identifier = "chanzuckerberg/czid-workflows"
commit_hash = "a7c54be041e9fbd4cfdd9fac99226e6b4f6f3bfd"
commit_hash = "b7c97a87d995a31d85bb6c8ee780fdf33d5bc176"

[repositories."getwilds/ww-fastq-to-cram"]
identifier = "getwilds/ww-fastq-to-cram"
Expand Down Expand Up @@ -379,39 +379,39 @@ permalink = "https://github.com/biowdl/tasks/blob/2bf875300d90a3c9c8d670b3d99026
[[diagnostics]]
document = "broadinstitute/palantir-workflows:/HaplotypeMap/BuildHaplotypeMap.wdl"
message = "BuildHaplotypeMap.wdl:17:1: error: a WDL document must start with a version statement"
permalink = "https://github.com/broadinstitute/palantir-workflows/blob/1e32078d3b57dcb2291534d0caa30f600d40967e/HaplotypeMap/BuildHaplotypeMap.wdl/#L17"
permalink = "https://github.com/broadinstitute/palantir-workflows/blob/4e7fdfa7992dd1bf4c4721e77c81e4d7b8ecf4fe/HaplotypeMap/BuildHaplotypeMap.wdl/#L17"

[[diagnostics]]
document = "broadinstitute/warp:/pipelines/skylab/scATAC/scATAC.wdl"
message = "scATAC.wdl:203:9: error: duplicate key `cpu` in runtime section"
permalink = "https://github.com/broadinstitute/warp/blob/e49ffbcd72b138ad4dc6ec62c847943908e4abf9/pipelines/skylab/scATAC/scATAC.wdl/#L203"
permalink = "https://github.com/broadinstitute/warp/blob/e52f2c1aabdb88b565c132e5587d05b91c4e6c0f/pipelines/skylab/scATAC/scATAC.wdl/#L203"

[[diagnostics]]
document = "broadinstitute/warp:/tasks/broad/GermlineVariantDiscovery.wdl"
message = "GermlineVariantDiscovery.wdl:137:32: error: expected string, but found integer"
permalink = "https://github.com/broadinstitute/warp/blob/e49ffbcd72b138ad4dc6ec62c847943908e4abf9/tasks/broad/GermlineVariantDiscovery.wdl/#L137"
permalink = "https://github.com/broadinstitute/warp/blob/e52f2c1aabdb88b565c132e5587d05b91c4e6c0f/tasks/broad/GermlineVariantDiscovery.wdl/#L137"

[[diagnostics]]
document = "broadinstitute/warp:/tasks/broad/GermlineVariantDiscovery.wdl"
message = "GermlineVariantDiscovery.wdl:65:32: error: expected string, but found integer"
permalink = "https://github.com/broadinstitute/warp/blob/e49ffbcd72b138ad4dc6ec62c847943908e4abf9/tasks/broad/GermlineVariantDiscovery.wdl/#L65"
permalink = "https://github.com/broadinstitute/warp/blob/e52f2c1aabdb88b565c132e5587d05b91c4e6c0f/tasks/broad/GermlineVariantDiscovery.wdl/#L65"

[[diagnostics]]
document = "broadinstitute/warp:/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl"
message = "UltimaGenomicsWholeGenomeGermlineTasks.wdl:814:27: error: expected string, but found integer"
permalink = "https://github.com/broadinstitute/warp/blob/e49ffbcd72b138ad4dc6ec62c847943908e4abf9/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl/#L814"
permalink = "https://github.com/broadinstitute/warp/blob/e52f2c1aabdb88b565c132e5587d05b91c4e6c0f/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl/#L814"

[[diagnostics]]
document = "broadinstitute/warp:/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl"
message = "UltimaGenomicsWholeGenomeGermlineTasks.wdl:866:27: error: expected string, but found integer"
permalink = "https://github.com/broadinstitute/warp/blob/e49ffbcd72b138ad4dc6ec62c847943908e4abf9/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl/#L866"
permalink = "https://github.com/broadinstitute/warp/blob/e52f2c1aabdb88b565c132e5587d05b91c4e6c0f/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl/#L866"

[[diagnostics]]
document = "broadinstitute/warp:/tests/cemba/pr/CheckCembaOutputs.wdl"
message = "CheckCembaOutputs.wdl:1:1: error: a WDL document must start with a version statement"
permalink = "https://github.com/broadinstitute/warp/blob/e49ffbcd72b138ad4dc6ec62c847943908e4abf9/tests/cemba/pr/CheckCembaOutputs.wdl/#L1"
permalink = "https://github.com/broadinstitute/warp/blob/e52f2c1aabdb88b565c132e5587d05b91c4e6c0f/tests/cemba/pr/CheckCembaOutputs.wdl/#L1"

[[diagnostics]]
document = "chanzuckerberg/czid-workflows:/workflows/index-generation/index-generation.wdl"
message = "index-generation.wdl:1:9: error: unsupported WDL version `development`"
permalink = "https://github.com/chanzuckerberg/czid-workflows/blob/a7c54be041e9fbd4cfdd9fac99226e6b4f6f3bfd/workflows/index-generation/index-generation.wdl/#L1"
permalink = "https://github.com/chanzuckerberg/czid-workflows/blob/b7c97a87d995a31d85bb6c8ee780fdf33d5bc176/workflows/index-generation/index-generation.wdl/#L1"
1 change: 1 addition & 0 deletions wdl-lint/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Added the `SectionOrdering` lint rule ([#109](https://github.com/stjude-rust-labs/wdl/pull/109)).
* Added the `DeprecatedObject` lint rule ([#112](https://github.com/stjude-rust-labs/wdl/pull/112)).
* Added the `DescriptionMissing` lint rule ([#113](https://github.com/stjude-rust-labs/wdl/pull/113)).
* Added the `NonmatchingOutput` lint rule ([#114](https://github.com/stjude-rust-labs/wdl/pull/114)).

### Fixed

Expand Down
1 change: 1 addition & 0 deletions wdl-lint/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ pub fn rules() -> Vec<Box<dyn Rule>> {
Box::new(rules::SectionOrderingRule),
Box::new(rules::DeprecatedObjectRule),
Box::new(rules::DescriptionMissingRule),
Box::new(rules::NonmatchingOutputRule),
];

// Ensure all the rule ids are unique and pascal case
Expand Down
2 changes: 2 additions & 0 deletions wdl-lint/src/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ mod missing_metas;
mod missing_output;
mod missing_runtime;
mod no_curly_commands;
mod nonmatching_output;
mod pascal_case;
mod preamble_comments;
mod preamble_whitespace;
Expand All @@ -41,6 +42,7 @@ pub use missing_metas::*;
pub use missing_output::*;
pub use missing_runtime::*;
pub use no_curly_commands::*;
pub use nonmatching_output::*;
pub use pascal_case::*;
pub use preamble_comments::*;
pub use preamble_whitespace::*;
Expand Down
275 changes: 275 additions & 0 deletions wdl-lint/src/rules/nonmatching_output.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
//! A lint rule to ensure each output is documented in `meta`.

use indexmap::IndexMap;
use wdl_ast::v1::MetadataSection;
use wdl_ast::v1::MetadataValue;
use wdl_ast::v1::OutputSection;
use wdl_ast::v1::TaskDefinition;
use wdl_ast::v1::TaskOrWorkflow;
use wdl_ast::v1::WorkflowDefinition;
use wdl_ast::AstNode;
use wdl_ast::AstToken;
use wdl_ast::Diagnostic;
use wdl_ast::Diagnostics;
use wdl_ast::Document;
use wdl_ast::Span;
use wdl_ast::ToSpan;
use wdl_ast::VisitReason;
use wdl_ast::Visitor;

use crate::Rule;
use crate::Tag;
use crate::TagSet;

/// The identifier for the non-matching output rule.
const ID: &str = "NonmatchingOutput";

/// Creates a "non-matching output" diagnostic.
fn nonmatching_output(span: Span, name: &str, context: &TaskOrWorkflow) -> Diagnostic {
let (ty, item_name) = match context {
TaskOrWorkflow::Task(t) => ("task", t.name().as_str().to_string()),
TaskOrWorkflow::Workflow(w) => ("workflow", w.name().as_str().to_string()),
};

Diagnostic::warning(format!(
"output `{name}` is missing from `meta` section in {ty} `{item_name}`"
))
.with_rule(ID)
.with_highlight(span)
.with_fix(format!(
"add a description of output `{name}` to documentation in `meta.outputs`"
))
}

/// Creates a missing outputs in meta diagnostic.
fn missing_outputs_in_meta(span: Span, context: &TaskOrWorkflow) -> Diagnostic {
let (ty, name) = match context {
TaskOrWorkflow::Task(t) => ("task", t.name().as_str().to_string()),
TaskOrWorkflow::Workflow(w) => ("workflow", w.name().as_str().to_string()),
};

Diagnostic::warning(format!(
"`outputs` key missing in `meta` section for the {ty} `{name}`"
))
.with_rule(ID)
.with_highlight(span)
.with_fix("add an `outputs` key to `meta` section describing the outputs")
}

/// Creates a diagnostic for extra `meta.outputs` entries.
fn extra_output_in_meta(span: Span, name: &str, context: &TaskOrWorkflow) -> Diagnostic {
let (ty, item_name) = match context {
TaskOrWorkflow::Task(t) => ("task", t.name().as_str().to_string()),
TaskOrWorkflow::Workflow(w) => ("workflow", w.name().as_str().to_string()),
};

Diagnostic::warning(format!(
"`{name}` appears in `outputs` section of the {ty} `{item_name}` but is not a declared \
`output`"
))
.with_rule(ID)
.with_highlight(span)
.with_fix(format!(
"ensure the output exists or remove the `{name}` key from `meta.outputs`"
))
}

/// Creates a diagnostic for out-of-order entries.
fn out_of_order(span: Span, output_span: Span, context: &TaskOrWorkflow) -> Diagnostic {
let (ty, item_name) = match context {
TaskOrWorkflow::Task(t) => ("task", t.name().as_str().to_string()),
TaskOrWorkflow::Workflow(w) => ("workflow", w.name().as_str().to_string()),
};

Diagnostic::warning(format!(
"`outputs` section of `meta` for the {ty} `{item_name}` is out of order"
))
.with_rule(ID)
.with_highlight(span)
.with_highlight(output_span)
.with_fix(
"ensure the keys within `meta.outputs` have the same order as they appear in `outputs`",
)
}

/// Creates a diagnostic for non-object `meta.outputs` entries.
fn non_object_meta_outputs(span: Span, context: &TaskOrWorkflow) -> Diagnostic {
let (ty, item_name) = match context {
TaskOrWorkflow::Task(t) => ("task", t.name().as_str().to_string()),
TaskOrWorkflow::Workflow(w) => ("workflow", w.name().as_str().to_string()),
};

Diagnostic::warning(format!(
"`outputs` key in `meta` section for the {ty} `{item_name}` is not an object"
adthrasher marked this conversation as resolved.
Show resolved Hide resolved
))
.with_rule(ID)
.with_highlight(span)
.with_fix("ensure `meta.outputs` is an object containing descriptions for each output")
}

/// Detects non-matching outputs.
#[derive(Default, Debug, Clone, Copy)]
pub struct NonmatchingOutputRule;

impl Rule for NonmatchingOutputRule {
fn id(&self) -> &'static str {
ID
}

fn description(&self) -> &'static str {
"Ensures that each output field is documented in the meta section under `meta.outputs`."
}

fn explanation(&self) -> &'static str {
"The meta section should have an `outputs` key and keys with descriptions for each output \
of the task/workflow. These must match exactly. i.e. for each named output of a task or \
workflow, there should be an entry under `meta.outputs` with that same name. \
Additionally, these entries should be in the same order (that order is up to the \
developer to decide). No extraneous output entries are allowed."
adthrasher marked this conversation as resolved.
Show resolved Hide resolved
}

fn tags(&self) -> TagSet {
TagSet::new(&[Tag::Completeness])
}
}

/// Check each output key exists in the `outputs` key within the `meta` section.
fn check_output_meta(
adthrasher marked this conversation as resolved.
Show resolved Hide resolved
state: &mut Diagnostics,
meta: &MetadataSection,
output: &OutputSection,
context: TaskOrWorkflow,
) {
// Get the `outputs` entries from the `meta` section.
if let Some(meta_outputs_key) = meta
.items()
.find(|entry| entry.name().as_str() == "outputs")
{
match meta_outputs_key.value() {
MetadataValue::Object(o) => {
let actual: IndexMap<_, _> = o
.items()
.map(|entry| {
(
entry.name().syntax().to_string(),
entry.syntax().text_range().to_span(),
)
})
.collect();

// Get the declared outputs.
let expected: IndexMap<_, _> = output
.declarations()
.map(|d| {
(
d.name().syntax().to_string(),
d.syntax().text_range().to_span(),
)
})
.collect();

let mut extra_found = false;

// Check for entries missing from `meta`.
for (name, span) in &expected {
if !actual.contains_key(name) {
if !extra_found {
extra_found = true;
}
state.add(nonmatching_output(*span, name, &context));
}
}

// Check for extra entries in `meta`.
for (name, span) in &actual {
if !expected.contains_key(name) {
if !extra_found {
extra_found = true;
}
state.add(extra_output_in_meta(*span, name, &context));
}
}

// Check for out-of-order entries.
if !extra_found && !actual.keys().eq(expected.keys()) {
state.add(out_of_order(
meta_outputs_key.syntax().text_range().to_span(),
output.syntax().text_range().to_span(),
&context,
));
}
}
_ => {
state.add(non_object_meta_outputs(
meta_outputs_key.syntax().text_range().to_span(),
&context,
));
}
}
} else {
state.add(missing_outputs_in_meta(
meta.syntax().first_token().unwrap().text_range().to_span(),
adthrasher marked this conversation as resolved.
Show resolved Hide resolved
&context,
));
}
}

impl Visitor for NonmatchingOutputRule {
type State = Diagnostics;

fn document(&mut self, _: &mut Self::State, reason: VisitReason, _: &Document) {
if reason == VisitReason::Exit {
return;
}

// Reset the visitor upon document entry
*self = Default::default();
}

fn workflow_definition(
&mut self,
state: &mut Self::State,
reason: VisitReason,
workflow: &WorkflowDefinition,
) {
if reason == VisitReason::Exit {
return;
}

// If the `meta` section is missing, the MissingMeta rule will handle it.
if let Some(meta) = workflow.metadata().next() {
// If the `output` section is missing, the MissingOutput rule will handle it.
if let Some(output) = workflow.outputs().next() {
adthrasher marked this conversation as resolved.
Show resolved Hide resolved
if output.declarations().count() > 0 {
check_output_meta(
state,
&meta,
&output,
TaskOrWorkflow::Workflow(workflow.clone()),
);
}
}
}
}

fn task_definition(
&mut self,
state: &mut Self::State,
reason: VisitReason,
task: &TaskDefinition,
) {
if reason == VisitReason::Exit {
return;
}

// If the `meta` section is missing, the MissingMeta rule will handle it.
if let Some(meta) = task.metadata().next() {
// If the `output` section is missing, the MissingOutput rule will handle it.
if let Some(output) = task.outputs().next() {
adthrasher marked this conversation as resolved.
Show resolved Hide resolved
if output.declarations().count() > 0 {
check_output_meta(state, &meta, &output, TaskOrWorkflow::Task(task.clone()));
}
}
}
}
}
2 changes: 1 addition & 1 deletion wdl-lint/tests/lints/command-mixed-line-cont/source.wdl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#@ except: NoCurlyCommands, DescriptionMissing
#@ except: DescriptionMissing, NoCurlyCommands, NonmatchingOutput
## This is a test of having mixed indentation in a line continuation.

version 1.1
Expand Down
2 changes: 1 addition & 1 deletion wdl-lint/tests/lints/command-mixed-spaces-first/source.wdl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#@ except: NoCurlyCommands, DescriptionMissing
#@ except: DescriptionMissing, NoCurlyCommands
## This is a test of having spaces before tabs in command sections.

version 1.1
Expand Down
2 changes: 1 addition & 1 deletion wdl-lint/tests/lints/command-mixed-tabs-first/source.wdl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#@ except: NoCurlyCommands, DescriptionMissing
#@ except: DescriptionMissing, NoCurlyCommands
## This is a test of having tabs before spaces in command sections.

version 1.1
Expand Down
2 changes: 1 addition & 1 deletion wdl-lint/tests/lints/command-mixed-trailing/source.wdl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#@ except: NoCurlyCommands, DescriptionMissing
#@ except: DescriptionMissing, NoCurlyCommands
## This is a test of having mixed _trailing_ indentation in command sections.
## There should be no warnings from the `CommandSectionMixedIndentation` rule.

Expand Down
Loading
Loading