From 1b7ee2a2990b574f351af0e1dd8a81f9fd0fbd7e Mon Sep 17 00:00:00 2001
From: Andrew Frantz <andrew.frantz@stjude.org>
Date: Tue, 18 Jun 2024 15:13:15 -0400
Subject: [PATCH] feat: upgrade to new wdl parser (#6)

* [WIP]chore: use latest wdl crates

* Update explain.rs

* Update explain.rs

* Update explain.rs

* Update file.rs

* Update explain.rs

* feat: separate check and lint commands

* chore: apply review feedback

Co-Authored-By: Peter Huene <peter@huene.dev>

---------

Co-authored-by: Peter Huene <peter@huene.dev>
---
 Cargo.toml                         |   2 +-
 src/commands.rs                    |   2 +-
 src/commands/{lint.rs => check.rs} |  61 +++++++----
 src/commands/explain.rs            |  55 +++++-----
 src/file.rs                        | 159 ++++++-----------------------
 src/main.rs                        |   8 +-
 src/report.rs                      | 135 +++++-------------------
 7 files changed, 135 insertions(+), 287 deletions(-)
 rename src/commands/{lint.rs => check.rs} (55%)
diff --git a/Cargo.toml b/Cargo.toml
index 307c051..4088628 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -20,6 +20,6 @@ nonempty = "0.9.0"
 pest = { version = "2.7.5", features = ["pretty-print"] }
 tracing = "0.1.40"
 tracing-subscriber = "0.3.18"
-wdl = { version = "0.3.0", features = ["ast", "core", "grammar"] }
+wdl = { version = "0.4.0", features = ["codespan"] }
 walkdir = "2.4.0"
 colored = "2.1.0"
diff --git a/src/commands.rs b/src/commands.rs
index 096ee39..8628bbd 100644
--- a/src/commands.rs
+++ b/src/commands.rs
@@ -1,2 +1,2 @@
+pub mod check;
 pub mod explain;
-pub mod lint;
diff --git a/src/commands/lint.rs b/src/commands/check.rs
similarity index 55%
rename from src/commands/lint.rs
rename to src/commands/check.rs
index 7ae49e9..f24d892 100644
--- a/src/commands/lint.rs
+++ b/src/commands/check.rs
@@ -9,11 +9,11 @@ use codespan_reporting::term::DisplayStyle;
 
 #[derive(Clone, Debug, Default, ValueEnum)]
 pub enum Mode {
-    /// Prints concerns as multiple lines.
+    /// Prints diagnostics as multiple lines.
     #[default]
     Full,
 
-    /// Prints concerns as one line.
+    /// Prints diagnostics as one line.
     OneLine,
 }
 
@@ -26,18 +26,14 @@ impl std::fmt::Display for Mode {
     }
 }
 
-/// Arguments for the `lint` subcommand.
+/// Common arguments for the `check` and `lint` subcommands.
 #[derive(Parser, Debug)]
 #[command(author, version, about)]
-pub struct Args {
-    /// The files or directories to lint.
+pub struct Common {
+    /// The files or directories to check.
     #[arg(required = true)]
     paths: Vec<PathBuf>,
 
-    /// The extensions to collect when expanding a directory.
-    #[arg(short, long, default_value = "wdl")]
-    extensions: Vec<String>,
-
     /// Disables color output.
     #[arg(long)]
     no_color: bool,
@@ -45,30 +41,53 @@ pub struct Args {
     /// The report mode.
     #[arg(short = 'm', long, default_value_t, value_name = "MODE")]
     report_mode: Mode,
+}
 
-    /// The specification version.
-    #[arg(short, long, default_value_t, value_enum, value_name = "VERSION")]
-    specification_version: wdl::core::Version,
+/// Arguments for the `check` subcommand.
+#[derive(Parser, Debug)]
+#[command(author, version, about)]
+pub struct CheckArgs {
+    #[command(flatten)]
+    common: Common,
+
+    /// Perform lint checks in addition to syntax validation.
+    #[arg(short, long)]
+    lint: bool,
 }
 
-pub fn lint(args: Args) -> anyhow::Result<()> {
-    let (config, writer) = get_display_config(&args);
+/// Arguments for the `lint` subcommand.
+#[derive(Parser, Debug)]
+#[command(author, version, about)]
+pub struct LintArgs {
+    #[command(flatten)]
+    common: Common,
+}
 
-    match sprocket::file::Repository::try_new(args.paths, args.extensions)?
-        .report_concerns(config, writer)?
+pub fn check(args: CheckArgs) -> anyhow::Result<()> {
+    let (config, writer) = get_display_config(&args.common);
+
+    match sprocket::file::Repository::try_new(args.common.paths, vec!["wdl".to_string()])?
+        .report_diagnostics(config, writer, args.lint)?
     {
-        // There are parse errors or validation failures.
+        // There are syntax errors.
         (true, _) => std::process::exit(1),
-        // There are no parse errors or validation failures, but there are lint warnings.
+        // There are lint failures.
         (false, true) => std::process::exit(2),
-        // There are no concerns.
-        _ => {}
+        // There are no diagnostics.
+        (false, false) => {}
     }
 
     Ok(())
 }
 
-fn get_display_config(args: &Args) -> (Config, StandardStream) {
+pub fn lint(args: LintArgs) -> anyhow::Result<()> {
+    check(CheckArgs {
+        common: args.common,
+        lint: true,
+    })
+}
+
+fn get_display_config(args: &Common) -> (Config, StandardStream) {
     let display_style = match args.report_mode {
         Mode::Full => DisplayStyle::Rich,
         Mode::OneLine => DisplayStyle::Short,
diff --git a/src/commands/explain.rs b/src/commands/explain.rs
index 30f4923..8744a4c 100644
--- a/src/commands/explain.rs
+++ b/src/commands/explain.rs
@@ -1,49 +1,50 @@
 use clap::Parser;
 use colored::Colorize;
-use wdl::ast::v1::lint as ast_lint;
-use wdl::core::concern::lint::Rule;
-use wdl::grammar::v1::lint as grammar_lint;
+use wdl::lint;
 
 /// Arguments for the `explain` subcommand.
 #[derive(Parser, Debug)]
-#[command(author, version, about)]
+#[command(author, version, about, after_help = list_all_rules())]
 pub struct Args {
-    /// The name or code of the rule to explain.
+    /// The name of the rule to explain.
     #[arg(required = true)]
-    pub rule_identifier: String,
+    pub rule_name: String,
 }
 
-pub fn pretty_print_rule<E>(rule: &dyn Rule<E>) {
-    println!("{}", rule.name().bold().underline());
-    println!("{}", format!("{}::{}", rule.code(), rule.tags(),).yellow());
-    println!();
-    println!("{}", rule.body());
+pub fn list_all_rules() -> String {
+    let mut result = "Available rules:".to_owned();
+    for rule in lint::v1::rules() {
+        result.push_str(&format!("\n  - {}", rule.id()));
+    }
+    result
+}
+
+pub fn pretty_print_rule(rule: &dyn lint::v1::Rule) -> String {
+    let mut result = format!("{}", rule.id().bold().underline());
+    result = format!("{}\n{}", result, rule.description());
+    result = format!("{}\n{}", result, format!("{}", rule.tags()).yellow());
+    result = format!("{}\n\n{}", result, rule.explanation());
+    match rule.url() {
+        Some(url) => format!("{}\n{}", result, url.underline().blue()),
+        None => result,
+    }
 }
 
 pub fn explain(args: Args) -> anyhow::Result<()> {
-    let ident = args.rule_identifier;
+    let name = args.rule_name;
+    let lowercase_name = name.to_lowercase();
 
-    let rule = grammar_lint::rules()
+    let rule = lint::v1::rules()
         .into_iter()
-        .find(|rule| rule.name() == ident || rule.code().to_string() == ident);
+        .find(|rule| rule.id().to_lowercase() == lowercase_name);
 
     match rule {
         Some(rule) => {
-            pretty_print_rule(&*rule);
+            println!("{}", pretty_print_rule(&*rule));
         }
         None => {
-            let rule = ast_lint::rules()
-                .into_iter()
-                .find(|rule| rule.name() == ident || rule.code().to_string() == ident);
-
-            match rule {
-                Some(rule) => {
-                    pretty_print_rule(&*rule);
-                }
-                None => {
-                    anyhow::bail!("No rule found with the identifier '{}'", ident);
-                }
-            }
+            println!("{}", list_all_rules());
+            anyhow::bail!("No rule found with the name '{}'", name);
         }
     }
 
diff --git a/src/file.rs b/src/file.rs
index 4b9a087..2f21925 100644
--- a/src/file.rs
+++ b/src/file.rs
@@ -6,19 +6,12 @@ use codespan_reporting::files::SimpleFiles;
 use codespan_reporting::term::termcolor::StandardStream;
 use codespan_reporting::term::Config;
 use indexmap::IndexMap;
-use wdl::core::Concern;
 
 use crate::report::Reporter;
 
 /// A filesystem error.
 #[derive(Debug)]
 pub enum Error {
-    /// A WDL 1.x abstract syntax tree error.
-    AstV1(wdl::ast::v1::Error),
-
-    /// A WDL 1.x grammar error.
-    GrammarV1(wdl::grammar::v1::Error),
-
     /// An invalid file name was provided.
     InvalidFileName(PathBuf),
 
@@ -38,8 +31,6 @@ pub enum Error {
 impl std::fmt::Display for Error {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            Error::AstV1(err) => write!(f, "ast error: {err}"),
-            Error::GrammarV1(err) => write!(f, "grammar error: {err}"),
             Error::InvalidFileName(path) => write!(f, "invalid file name: {}", path.display()),
             Error::Io(err) => write!(f, "i/o error: {}", err),
             Error::MissingPath(path) => write!(f, "missing path: {}", path.display()),
@@ -148,129 +139,47 @@ impl Repository {
         Ok(())
     }
 
-    /// Attempts to parse an existing entry into a WDL v1.x abstract syntax
-    /// tree.
-    ///
-    /// # Examples
-    ///
-    /// ```no_run
-    /// use sprocket::file::Repository;
-    ///
-    /// let mut repository = Repository::default();
-    /// repository.load("test.wdl")?;
-    /// let ast = repository.parse("test.wdl")?;
-    ///
-    /// assert!(matches!(ast.tree(), Some(_)));
-    ///
-    /// # Ok::<(), Box<dyn std::error::Error>>(())
-    /// ```
-    pub fn parse(&self, entry: impl AsRef<str>) -> Result<wdl::ast::v1::Result> {
-        let entry = entry.as_ref();
-        let handle = *self
-            .handles
-            .get(entry)
-            .ok_or(Error::MissingEntry(entry.to_owned()))?;
-
-        let file = match self.sources.get(handle) {
-            Ok(result) => result,
-            // SAFETY: this entry will _always_ exist in the inner
-            // [`SimpleFiles`], as we just ensured it existed in the mapping
-            // between entry names and handles.
-            Err(_) => unreachable!(),
-        };
-
-        let mut all_concerns = wdl::core::concern::concerns::Builder::default();
-
-        let (pt, concerns) = wdl::grammar::v1::parse(file.source())
-            .map_err(Error::GrammarV1)?
-            .into_parts();
-
-        if let Some(concerns) = concerns {
-            for concern in concerns.into_inner() {
-                all_concerns = all_concerns.push(concern);
-            }
-        }
-
-        let pt = match pt {
-            Some(pt) => pt,
-            None => {
-                // SAFETY: because `grammar::v1::parse` returns a
-                // `grammar::v1::Result`, we know that either the concerns or the
-                // parse tree must be [`Some`] (else, this would have failed at
-                // `grammar::v1::Result` creation time). That said, we just checked
-                // that `pt` is [`None`]. In this case, it must follow that the
-                // concerns are not empty. As such, this will always unwrap.
-                return Ok(wdl::ast::v1::Result::try_new(None, all_concerns.build()).unwrap());
-            }
-        };
-
-        let (ast, concerns) = wdl::ast::v1::parse(pt).map_err(Error::AstV1)?.into_parts();
-
-        if let Some(concerns) = concerns {
-            for concern in concerns.into_inner() {
-                all_concerns = all_concerns.push(concern);
-            }
-        }
-
-        match ast {
-            Some(ast) => {
-                // SAFETY: the ast is [`Some`], so this will always unwrap.
-                Ok(wdl::ast::v1::Result::try_new(Some(ast), all_concerns.build()).unwrap())
-            }
-            None => {
-                // SAFETY: because `ast::v1::parse` returns a
-                // `ast::v1::Result`, we know that either the concerns or the
-                // parse tree must be [`Some`] (else, this would have failed at
-                // `ast::v1::Result` creation time). That said, we just checked
-                // that `ast` is [`None`]. In this case, it must follow that the
-                // concerns are not empty. As such, this will always unwrap.
-                Ok(wdl::ast::v1::Result::try_new(None, all_concerns.build()).unwrap())
-            }
-        }
-    }
-
-    /// Reports all concerns for all documents in the [`Repository`].
-    ///
-    /// # Examples
-    ///
-    /// ```no_run
-    /// use codespan_reporting::term::termcolor::ColorChoice;
-    /// use codespan_reporting::term::termcolor::StandardStream;
-    /// use codespan_reporting::term::Config;
-    /// use sprocket::file::Repository;
-    ///
-    /// let mut repository = Repository::default();
-    /// repository.load("test.wdl")?;
-    ///
-    /// let config = Config::default();
-    /// let writer = StandardStream::stderr(ColorChoice::Always);
-    /// repository.report_concerns(config, writer);
-    ///
-    /// # Ok::<(), Box<dyn std::error::Error>>(())
-    /// ```
-    pub fn report_concerns(&self, config: Config, writer: StandardStream) -> Result<(bool, bool)> {
-        let mut reporter = Reporter::new(config, writer, &self.sources);
-        let mut reported_error = false;
-        let mut reported_warning = false;
-
-        for (file_name, handle) in self.handles.iter() {
-            let document = self.parse(file_name)?;
-
-            if let Some(concerns) = document.into_concerns() {
-                for concern in concerns.into_inner() {
-                    reporter.report_concern(&concern, *handle);
+    /// Reports all diagnostics for all documents in the [`Repository`].
+    pub fn report_diagnostics(
+        &self,
+        config: Config,
+        writer: StandardStream,
+        lint: bool,
+    ) -> anyhow::Result<(bool, bool)> {
+        let mut reporter = Reporter::new(config, writer);
+        let mut syntax_failure = false;
+        let mut lint_failure = false;
+
+        for (_path, handle) in self.handles.iter() {
+            let file = self.sources.get(*handle).expect("Expected to find file");
+            match wdl::ast::Document::parse(file.source()).into_result() {
+                Ok(document) => {
+                    let validator = wdl::ast::Validator::default();
+                    if let Err(diagnostics) = validator.validate(&document) {
+                        reporter.emit_diagnostics(file, &diagnostics)?;
+                        syntax_failure = true;
+                        continue;
+                    }
 
-                    match concern {
-                        Concern::ParseError(_) | Concern::ValidationFailure(_) => {
-                            reported_error = true
+                    if lint {
+                        let mut linter = wdl::ast::Validator::empty();
+                        linter.add_v1_visitors(
+                            wdl::lint::v1::rules().into_iter().map(|r| r.visitor()),
+                        );
+                        if let Err(diagnostics) = linter.validate(&document) {
+                            reporter.emit_diagnostics(file, &diagnostics)?;
+                            lint_failure = true;
                         }
-                        Concern::LintWarning(_) => reported_warning = true,
                     }
                 }
+                Err(diagnostics) => {
+                    reporter.emit_diagnostics(file, &diagnostics)?;
+                    syntax_failure = true;
+                }
             }
         }
 
-        Ok((reported_error, reported_warning))
+        Ok((syntax_failure, lint_failure))
     }
 }
 
diff --git a/src/main.rs b/src/main.rs
index fef949f..e9d87ee 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -12,8 +12,11 @@ git_testament!(TESTAMENT);
 #[derive(Subcommand)]
 #[allow(clippy::large_enum_variant)]
 enum Commands {
+    /// Checks the syntactic validity of Workflow Description Language files.
+    Check(commands::check::CheckArgs),
+
     /// Lints Workflow Description Language files.
-    Lint(commands::lint::Args),
+    Lint(commands::check::LintArgs),
 
     /// Explains a lint warning.
     Explain(commands::explain::Args),
@@ -53,7 +56,8 @@ pub fn inner() -> anyhow::Result<()> {
     tracing::subscriber::set_global_default(subscriber)?;
 
     match cli.command {
-        Commands::Lint(args) => commands::lint::lint(args),
+        Commands::Check(args) => commands::check::check(args),
+        Commands::Lint(args) => commands::check::lint(args),
         Commands::Explain(args) => commands::explain::explain(args),
     }
 }
diff --git a/src/report.rs b/src/report.rs
index 7197dc4..ff54d7c 100644
--- a/src/report.rs
+++ b/src/report.rs
@@ -1,130 +1,45 @@
 //! Reporting.
 
-use codespan_reporting::diagnostic::Diagnostic;
-use codespan_reporting::diagnostic::Label;
-use codespan_reporting::files::SimpleFiles;
-use codespan_reporting::term;
+use anyhow::Context;
+use anyhow::Result;
+use codespan_reporting::files::SimpleFile;
+use codespan_reporting::term::emit;
 use codespan_reporting::term::termcolor::StandardStream;
 use codespan_reporting::term::Config;
-use codespan_reporting::term::DisplayStyle;
-use wdl::core::concern::Concern;
+use wdl::grammar::Diagnostic;
 
 /// A reporter for Sprocket.
 #[derive(Debug)]
-pub(crate) struct Reporter<'a> {
+pub(crate) struct Reporter {
     /// The configuration.
     config: Config,
 
     /// The stream to write to.
     stream: StandardStream,
-
-    /// The file repository.
-    files: &'a SimpleFiles<String, String>,
 }
 
-impl<'a> Reporter<'a> {
+impl Reporter {
     /// Creates a new [`Reporter`].
-    pub(crate) fn new(
-        config: Config,
-        stream: StandardStream,
-        files: &'a SimpleFiles<String, String>,
-    ) -> Self {
-        Self {
-            config,
-            stream,
-            files,
-        }
+    pub(crate) fn new(config: Config, stream: StandardStream) -> Self {
+        Self { config, stream }
     }
 
-    /// Reports a concern to the terminal.
-    pub(crate) fn report_concern(&mut self, concern: &Concern, handle: usize) {
-        let diagnostic = match concern {
-            Concern::LintWarning(warning) => {
-                let mut diagnostic = Diagnostic::warning()
-                    .with_code(format!(
-                        "{}::{}::{:?}",
-                        warning.code(),
-                        warning.tags(),
-                        warning.level()
-                    ))
-                    .with_message(warning.subject());
-
-                for location in warning.locations() {
-                    // SAFETY: if `report` is called, then the location **must**
-                    // fall within the provided file's contents. As such, it will
-                    // never be [`Location::Unplaced`], and this will always unwrap.
-                    let byte_range = location.byte_range().unwrap();
-
-                    diagnostic = diagnostic.with_labels(vec![
-                        Label::primary(handle, byte_range).with_message(warning.subject()),
-                    ]);
-                }
-
-                let mut notes = match warning.fix() {
-                    Some(fix) => vec![format!("fix: {}", fix)],
-                    None => vec![],
-                };
-                notes.extend(vec![format!(
-                    "see `sprocket explain {}` for more information",
-                    warning.code()
-                )]);
-
-                diagnostic = diagnostic.with_notes(notes);
-
-                diagnostic
-            }
-            Concern::ParseError(error) => {
-                // SAFETY: if `report` is called, then the location **must**
-                // fall within the provided file's contents. As such, it will
-                // never be [`Location::Unplaced`], and this will always unwrap.
-                let byte_range = error.byte_range().unwrap();
-
-                let diagnostic = match &self.config.display_style {
-                    DisplayStyle::Rich => Diagnostic::error()
-                        .with_message("parse error")
-                        .with_labels(vec![
-                            Label::primary(handle, byte_range).with_message(error.message()),
-                        ]),
-                    _ => Diagnostic::error()
-                        .with_message(error.message().to_lowercase())
-                        .with_labels(vec![
-                            Label::primary(handle, byte_range).with_message(error.message()),
-                        ]),
-                };
-
-                diagnostic
-            }
-            Concern::ValidationFailure(failure) => {
-                let mut diagnostic = Diagnostic::error()
-                    .with_code(failure.code().to_string())
-                    .with_message(failure.subject());
-
-                for location in failure.locations() {
-                    // SAFETY: if `report` is called, then the location **must**
-                    // fall within the provided file's contents. As such, it will
-                    // never be [`Location::Unplaced`], and this will always unwrap.
-                    let byte_range = location.byte_range().unwrap();
-
-                    diagnostic = diagnostic.with_labels(vec![
-                        Label::primary(handle, byte_range).with_message(failure.body()),
-                    ]);
-                }
-
-                if let Some(fix) = failure.fix() {
-                    diagnostic = diagnostic.with_notes(vec![format!("fix: {}", fix)]);
-                }
-
-                diagnostic
-            }
-        };
+    /// Reports diagnostics to the terminal.
+    pub(crate) fn emit_diagnostics(
+        &mut self,
+        file: &SimpleFile<String, String>,
+        diagnostics: &[Diagnostic],
+    ) -> Result<()> {
+        for diagnostic in diagnostics.iter() {
+            emit(
+                &mut self.stream,
+                &self.config,
+                file,
+                &diagnostic.to_codespan(),
+            )
+            .context("failed to emit diagnostic")?;
+        }
 
-        // SAFETY: for use on the command line, this should always succeed.
-        term::emit(
-            &mut self.stream.lock(),
-            &self.config,
-            self.files,
-            &diagnostic,
-        )
-        .expect("writing diagnostic to stream failed")
+        Ok(())
     }
 }