Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ParseMode: Convert from enum to configurable struct #50

Merged
merged 3 commits into from
Dec 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions src/expression/minimize.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use super::Expression;
use crate::{LicenseReq, Licensee};
use std::fmt;

Expand Down Expand Up @@ -36,21 +37,22 @@ impl std::error::Error for MinimizeError {
}
}

impl super::Expression {
impl Expression {
/// Given a set of [`Licensee`]s, attempts to find the minimum number that
/// satisfy this [`Expression`]. The list of licensees should be given in
/// priority order, eg, if you wish to accept the `Apache-2.0` license if
/// it is available and the `MIT` if not, putting `Apache-2.0` before `MIT`
/// will cause the ubiquitous `Apache-2.0 OR MIT` expression to minimize to
/// just `Apache-2.0` as only only 1 of the licenses is required, and the
/// `Apache-2.0` has priority.
/// satisfy this [`Expression`].
///
/// The list of licensees should be given in priority order, eg, if you wish
/// to accept the `Apache-2.0` license if it is available, and the `MIT` if
/// not, putting `Apache-2.0` before `MIT` will cause the ubiquitous
/// `Apache-2.0 OR MIT` expression to minimize to just `Apache-2.0` as only
/// 1 of the licenses is required, and `Apache-2.0` has priority.
///
/// # Errors
///
/// This method will fail if more than 64 unique licensees are satisfied by
/// this expression, but such a case is unlikely to say the least in a real
/// world scenario. The list of licensees must also actually satisfy this
/// expression, otherwise it can't be minimized.
/// this expression, but such a case is unlikely in a real world scenario.
/// The list of licensees must also actually satisfy this expression,
/// otherwise it can't be minimized.
///
/// # Example
///
Expand Down
6 changes: 3 additions & 3 deletions src/expression/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl Expression {
/// spdx::Expression::parse("MIT OR Apache-2.0 WITH LLVM-exception").unwrap();
/// ```
pub fn parse(original: &str) -> Result<Self, ParseError> {
Self::parse_mode(original, ParseMode::Strict)
Self::parse_mode(original, ParseMode::STRICT)
}

/// Parses an expression with the specified `ParseMode`. With
Expand All @@ -33,7 +33,7 @@ impl Expression {
/// ```
/// spdx::Expression::parse_mode(
/// "mit/Apache-2.0 WITH LLVM-exception",
/// spdx::ParseMode::Lax
/// spdx::ParseMode::LAX
/// ).unwrap();
/// ```
pub fn parse_mode(original: &str, mode: ParseMode) -> Result<Self, ParseError> {
Expand Down Expand Up @@ -130,7 +130,7 @@ impl Expression {
..
}) => {
// Handle GNU licenses differently, as they should *NOT* be used with the `+`
if mode == ParseMode::Strict && id.is_gnu() {
if !mode.allow_postfix_plus_on_gpl && id.is_gnu() {
return Err(ParseError {
original: original.to_owned(),
span: lt.span,
Expand Down
77 changes: 57 additions & 20 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,59 @@ use crate::{
ExceptionId, LicenseId,
};

/// Available modes when parsing SPDX expressions
#[derive(Copy, Clone, PartialEq)]
pub enum ParseMode {
/// Strict SPDX parsing.
/// Parsing configuration for SPDX expression
#[derive(Default, Copy, Clone)]
pub struct ParseMode {
/// The `AND`, `OR`, and `WITH` operators are required to be uppercase in
/// the SPDX spec, but enabling this option allows them to be lowercased
pub allow_lower_case_operators: bool,
/// Allows the use of `/` as a synonym for the `OR` operator. This also
/// allows for not having whitespace between the `/` and the terms on either
/// side
pub allow_slash_as_or_operator: bool,
/// Allows some invalid/imprecise identifiers as synonyms for an actual
/// license identifier. See [`IMPRECISE_NAMES`](crate::identifiers::IMPRECISE_NAMES)
/// for a list of the current synonyms. Note that this list is not
/// comprehensive but can be expanded upon when invalid identifiers are
/// found in the wild.
pub allow_imprecise_license_names: bool,
/// The various GPL licenses diverge from every other license in the SPDX
/// license list by having an `-or-later` variant that used as a suffix on a
/// base license (eg. `GPL-3.0-or-later`) rather than the canonical `GPL-3.0+`.
/// This option just allows GPL licenses to be treated similarly to all of
/// the other SPDX licenses.
pub allow_postfix_plus_on_gpl: bool,
}

impl ParseMode {
/// Strict, specification compliant SPDX parsing.
///
/// 1. Only license identifiers in the SPDX license list, or
/// Document/LicenseRef, are allowed. The license identifiers are also
/// case-sensitive.
/// 1. `WITH`, `AND`, and `OR` are the only valid operators
Strict,
pub const STRICT: Self = Self {
allow_lower_case_operators: false,
allow_slash_as_or_operator: false,
allow_imprecise_license_names: false,
allow_postfix_plus_on_gpl: false,
};

/// Allow non-conforming syntax for crates-io compatibility
///
/// 1. Additional, invalid, identifiers are accepted and mapped to a correct
/// SPDX license identifier. See
/// [identifiers::IMPRECISE_NAMES](../identifiers/constant.IMPRECISE_NAMES.html)
/// for the list of additionally accepted identifiers and the license they
/// SPDX license identifier.
/// See [`IMPRECISE_NAMES`](crate::identifiers::IMPRECISE_NAMES) for the
/// list of additionally accepted identifiers and the license they
/// correspond to.
/// 1. `/` can by used as a synonym for `OR`, and doesn't need to be
/// separated by whitespace from the terms it combines
Lax,
pub const LAX: Self = Self {
allow_lower_case_operators: true,
allow_slash_as_or_operator: true,
allow_imprecise_license_names: true,
allow_postfix_plus_on_gpl: true,
};
}

/// A single token in an SPDX license expression
Expand Down Expand Up @@ -85,7 +120,7 @@ pub struct Lexer<'a> {
inner: &'a str,
original: &'a str,
offset: usize,
lax: bool,
mode: ParseMode,
}

impl<'a> Lexer<'a> {
Expand All @@ -96,7 +131,7 @@ impl<'a> Lexer<'a> {
inner: text,
original: text,
offset: 0,
lax: false,
mode: ParseMode::STRICT,
}
}

Expand All @@ -110,7 +145,7 @@ impl<'a> Lexer<'a> {
inner: text,
original: text,
offset: 0,
lax: mode == ParseMode::Lax,
mode,
}
}

Expand Down Expand Up @@ -197,7 +232,7 @@ impl<'a> Iterator for Lexer<'a> {
}
Some('(') => ok_token(Token::OpenParen),
Some(')') => ok_token(Token::CloseParen),
Some('/') if self.lax => Some(Ok((Token::Or, 1))),
Some('/') if self.mode.allow_slash_as_or_operator => Some(Ok((Token::Or, 1))),
Some(_) => match Lexer::find_text_token(self.inner) {
None => Some(Err(ParseError {
original: self.original.to_owned(),
Expand All @@ -211,9 +246,9 @@ impl<'a> Iterator for Lexer<'a> {
ok_token(Token::And)
} else if m == "OR" {
ok_token(Token::Or)
} else if self.lax && m == "and" {
} else if self.mode.allow_lower_case_operators && m == "and" {
ok_token(Token::And)
} else if self.lax && m == "or" {
} else if self.mode.allow_lower_case_operators && m == "or" {
ok_token(Token::Or)
} else if let Some(lic_id) = crate::license_id(m) {
ok_token(Token::Spdx(lic_id))
Expand All @@ -230,11 +265,13 @@ impl<'a> Iterator for Lexer<'a> {
doc_ref: None,
lic_ref,
})
} else if let Some((lic_id, token_len)) = if self.lax {
crate::imprecise_license_id(self.inner)
} else {
None
} {
} else if let Some((lic_id, token_len)) =
if self.mode.allow_imprecise_license_names {
crate::imprecise_license_id(self.inner)
} else {
None
}
{
Some(Ok((Token::Spdx(lic_id), token_len)))
} else {
Some(Err(ParseError {
Expand Down
6 changes: 3 additions & 3 deletions tests/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ macro_rules! exact {

macro_rules! check {
($le:expr => [$($logical_expr:expr => $is_allowed:expr),+$(,)?]) => {
check_mode!(spdx::ParseMode::Strict, $le => [$($logical_expr => $is_allowed),+])
check_mode!(spdx::ParseMode::STRICT, $le => [$($logical_expr => $is_allowed),+])
};
}

macro_rules! check_lax {
($le:expr => [$($logical_expr:expr => $is_allowed:expr),+$(,)?]) => {
check_mode!(spdx::ParseMode::Lax, $le => [$($logical_expr => $is_allowed),+])
check_mode!(spdx::ParseMode::LAX, $le => [$($logical_expr => $is_allowed),+])
};
}

Expand Down Expand Up @@ -191,7 +191,7 @@ fn gpl_or_later_plus_strict() {

#[test]
fn gpl_or_later_plus_lax() {
spdx::Expression::parse_mode("GPL-2.0+", spdx::ParseMode::Lax).unwrap();
spdx::Expression::parse_mode("GPL-2.0+", spdx::ParseMode::LAX).unwrap();
}

#[test]
Expand Down
4 changes: 2 additions & 2 deletions tests/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ fn fails_with_slash() {

#[test]
fn lax_takes_slash() {
let lexed: Vec<_> = Lexer::new_mode("MIT/Apache", spdx::ParseMode::Lax)
let lexed: Vec<_> = Lexer::new_mode("MIT/Apache", spdx::ParseMode::LAX)
.map(|r| r.map(|lt| lt.token).unwrap())
.collect();
assert_eq!(
Expand All @@ -131,7 +131,7 @@ fn lax_takes_slash() {

#[test]
fn fixes_license_names() {
let lexed: Vec<_> = Lexer::new_mode("gpl v2 / bsd 2-clause", spdx::ParseMode::Lax)
let lexed: Vec<_> = Lexer::new_mode("gpl v2 / bsd 2-clause", spdx::ParseMode::LAX)
.map(|r| r.map(|lt| lt.token).unwrap())
.collect();
assert_eq!(
Expand Down