diff --git a/Cargo.lock b/Cargo.lock index cc74e3b4cd..8e78865e4b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -981,6 +981,15 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +[[package]] +name = "email_address" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2153bd83ebc09db15bcbdc3e2194d901804952e3dc96967e1cd3b0c5c32d112" +dependencies = [ + "serde", +] + [[package]] name = "encode_unicode" version = "0.3.6" @@ -1800,7 +1809,7 @@ dependencies = [ "cached", "check-if-email-exists", "doc-comment", - "fast_chemail", + "email_address", "futures", "glob", "html5ever", diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 8ca8bb76a9..fec66319fa 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -55,21 +55,21 @@ Or, you can accept all content/MIME types: `--headers "accept=*/*"`. See more info about the Accept header [over at MDN](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept). - ## Unreachable Mail Address We use https://github.com/reacherhq/check-if-email-exists for email checking. You can test your mail address with curl: -``` + +```bash curl -X POST \ 'https://api.reacher.email/v0/check_email' \ -H 'content-type: application/json' \ -H 'authorization: test_api_token' \ -d '{"to_email": "box@domain.test"}' ``` -Some settings on your mail server (such as SPF Policy, DNSBL) may prevent your email from being verified. -If you have an error with checking a working email, you can disable this check using the -[commandline parameter](https://github.com/lycheeverse/lychee#commandline-parameters) `--exclude-mail`. - - +Some settings on your mail server (such as `SPF` Policy, `DNSBL`) may prevent +your email from being verified. If you have an error with checking a working +email, you can disable this check using the [commandline +parameter](https://github.com/lycheeverse/lychee#commandline-parameters) +`--exclude-mail`. diff --git a/examples/builder/builder.rs b/examples/builder/builder.rs index 62ec4b1d13..8ff9dd9075 100644 --- a/examples/builder/builder.rs +++ b/examples/builder/builder.rs @@ -9,9 +9,9 @@ use std::{collections::HashSet, time::Duration}; #[allow(clippy::trivial_regex)] async fn main() -> Result<()> { // Excludes - let excludes = Some(RegexSet::new(&[r"example"]).unwrap()); + let excludes = Some(RegexSet::new([r"example"]).unwrap()); // Includes take precedence over excludes - let includes = Some(RegexSet::new(&[r"example.com"]).unwrap()); + let includes = Some(RegexSet::new([r"example.com"]).unwrap()); // Set custom request headers let mut headers = HeaderMap::new(); diff --git a/fixtures/TEST_EMAIL_QUERY_PARAMS.html b/fixtures/TEST_EMAIL_QUERY_PARAMS.html new file mode 100644 index 0000000000..7b2c170c2f --- /dev/null +++ b/fixtures/TEST_EMAIL_QUERY_PARAMS.html @@ -0,0 +1,14 @@ + +
++ Please email + hello@example.org + for any questions. +
+ + diff --git a/fixtures/TEST_EMAIL_QUERY_PARAMS.md b/fixtures/TEST_EMAIL_QUERY_PARAMS.md new file mode 100644 index 0000000000..8bf285f464 --- /dev/null +++ b/fixtures/TEST_EMAIL_QUERY_PARAMS.md @@ -0,0 +1 @@ +Please email [hello@example.org](mailto:hello@example.org?subject=%5BHello%5D) for any questions. diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index f72c2effb8..6978234288 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -127,6 +127,34 @@ mod cli { ) } + #[test] + fn test_email_html_with_subject() -> Result<()> { + let mut cmd = main_command(); + let input = fixtures_path().join("TEST_EMAIL_QUERY_PARAMS.html"); + + cmd.arg("--dump") + .arg(input) + .assert() + .success() + .stdout(contains("hello@example.org?subject=%5BHello%5D")); + + Ok(()) + } + + #[test] + fn test_email_markdown_with_subject() -> Result<()> { + let mut cmd = main_command(); + let input = fixtures_path().join("TEST_EMAIL_QUERY_PARAMS.md"); + + cmd.arg("--dump") + .arg(input) + .assert() + .success() + .stdout(contains("hello@example.org?subject=%5BHello%5D")); + + Ok(()) + } + /// Test that a GitHub link can be checked without specifying the token. #[test] fn test_check_github_no_token() -> Result<()> { diff --git a/lychee-lib/Cargo.toml b/lychee-lib/Cargo.toml index 5e2232f74d..fe39eceffd 100644 --- a/lychee-lib/Cargo.toml +++ b/lychee-lib/Cargo.toml @@ -18,7 +18,7 @@ version = "0.10.1" [dependencies] check-if-email-exists = "0.9.0" -fast_chemail = "0.9.6" +email_address = "0.2.4" glob = "0.3.0" http = "0.2.8" linkify = "0.9.0" diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 00afb9dc85..31fdeeaedb 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -549,8 +549,13 @@ impl Client { } /// Check a mail address, or equivalently a `mailto` URI. + /// + /// URIs may contain query parameters (e.g. `contact@example.com?subject="Hello"`), + /// which are ignored by this check. The are not part of the mail address + /// and instead passed to a mail client. pub async fn check_mail(&self, uri: &Uri) -> Status { - let input = CheckEmailInput::new(uri.as_str().to_owned()); + let address = uri.url.path().to_string(); + let input = CheckEmailInput::new(address); let result = &(check_email(&input).await); if let Reachable::Invalid = result.is_reachable { diff --git a/lychee-lib/src/filter/mod.rs b/lychee-lib/src/filter/mod.rs index 421d43feab..07f6a2b06d 100644 --- a/lychee-lib/src/filter/mod.rs +++ b/lychee-lib/src/filter/mod.rs @@ -304,7 +304,7 @@ mod tests { #[test] fn test_overwrite_false_positives() { let includes = Includes { - regex: RegexSet::new(&[r"http://www.w3.org/1999/xhtml"]).unwrap(), + regex: RegexSet::new([r"http://www.w3.org/1999/xhtml"]).unwrap(), }; let filter = Filter { includes: Some(includes), @@ -316,7 +316,7 @@ mod tests { #[test] fn test_include_regex() { let includes = Includes { - regex: RegexSet::new(&[r"foo.example.com"]).unwrap(), + regex: RegexSet::new([r"foo.example.com"]).unwrap(), }; let filter = Filter { includes: Some(includes), @@ -344,7 +344,7 @@ mod tests { #[test] fn test_exclude_regex() { let excludes = Excludes { - regex: RegexSet::new(&[r"github.com", r"[a-z]+\.(org|net)", r"@example.com"]).unwrap(), + regex: RegexSet::new([r"github.com", r"[a-z]+\.(org|net)", r"@example.com"]).unwrap(), }; let filter = Filter { excludes: Some(excludes), @@ -361,10 +361,10 @@ mod tests { #[test] fn test_exclude_include_regex() { let includes = Includes { - regex: RegexSet::new(&[r"foo.example.com"]).unwrap(), + regex: RegexSet::new([r"foo.example.com"]).unwrap(), }; let excludes = Excludes { - regex: RegexSet::new(&[r"example.com"]).unwrap(), + regex: RegexSet::new([r"example.com"]).unwrap(), }; let filter = Filter { includes: Some(includes), diff --git a/lychee-lib/src/helpers/path.rs b/lychee-lib/src/helpers/path.rs index 3fbd1df683..b2766162aa 100644 --- a/lychee-lib/src/helpers/path.rs +++ b/lychee-lib/src/helpers/path.rs @@ -97,8 +97,8 @@ fn join(base: PathBuf, dst: &Path) -> PathBuf { // // Unfortunately requires real files for `fs::canonicalize`. pub(crate) fn contains(parent: &PathBuf, child: &PathBuf) -> Result