Skip to content

Commit

Permalink
Add support for validating email addresses
Browse files Browse the repository at this point in the history
  • Loading branch information
rushmorem committed Jan 4, 2017
1 parent 0660620 commit b648684
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 28 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "publicsuffix"
description = "A robust and reliable library for parsing domain names"
version = "1.2.0"
version = "1.3.0"
license = "MIT/Apache-2.0"
repository = "https://github.com/rushmorem/publicsuffix"
documentation = "https://docs.rs/publicsuffix"
Expand All @@ -18,6 +18,7 @@ error-chain = "0.7"
idna = "0.1"
regex = "0.2"
url = "1.2"
lazy_static = "0.2"

[dependencies.native-tls]
version = "0.1"
Expand Down
2 changes: 2 additions & 0 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ error_chain! {

InvalidHost { }

InvalidEmail { }

InvalidRule(t: String) {
description("invalid rule")
display("invalid rule: '{}'", t)
Expand Down
99 changes: 78 additions & 21 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
extern crate error_chain;
#[cfg(feature = "remote_list")]
extern crate native_tls;
#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate idna;
extern crate url;
Expand Down Expand Up @@ -138,6 +140,10 @@ pub enum Host {
Domain(Domain),
}

lazy_static! {
static ref LABEL: Regex = Regex::new(r"^([[:alnum:]]+|[[:alnum:]]+[[:alnum:]-]*[[:alnum:]]+)$").unwrap();
}

/// Converts a type into a Url object
pub trait IntoUrl {
fn into_url(self) -> Result<Url>;
Expand Down Expand Up @@ -244,7 +250,7 @@ impl List {
}
}
}
if list.rules.is_empty() || list.icann().is_empty() || list.private().is_empty() {
if list.rules.is_empty() || list.all().is_empty() {
return Err(ErrorKind::InvalidList.into());
}
Ok(list)
Expand Down Expand Up @@ -279,6 +285,15 @@ impl List {
Self::build(res)
}

/// Build the list from a string
///
/// The list doesn't always have to come from a file. You can maintain your own
/// list, say in a DBMS. You can then pull it at runtime and build the list from
/// the resulting String.
pub fn from_string(string: String) -> Result<List> {
Self::build(string)
}

/// Pull the list from the official URL
#[cfg(feature = "remote_list")]
pub fn fetch() -> Result<List> {
Expand Down Expand Up @@ -338,26 +353,60 @@ impl List {

/// Extracts Host from a URL
pub fn parse_url<U: IntoUrl>(&self, url: U) -> Result<Host> {
match url.into_url()?.host_str() {
Some(host) => self.parse_host(host),
None => Err(ErrorKind::NoHost.into()),
let url = url.into_url()?;
match url.scheme() {
"mailto" => {
match url.host_str() {
Some(host) => self.parse_email(&format!("{}@{}", url.username(), host)),
None => Err(ErrorKind::InvalidEmail.into()),
}
}
_ => {
match url.host_str() {
Some(host) => self.parse_host(host),
None => Err(ErrorKind::NoHost.into()),
}
}
}
}

/// Extracts Host from an email address
///
/// This method can also be used, simply to validate an email address.
/// If it returns an error, the email address is not valid.
// https://en.wikipedia.org/wiki/Email_address#Syntax
// https://en.wikipedia.org/wiki/International_email#Email_addresses
// http://girders.org/blog/2013/01/31/dont-rfc-validate-email-addresses/
// https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address
// https://hackernoon.com/the-100-correct-way-to-validate-email-addresses-7c4818f24643#.pgcir4z3e
pub fn parse_email(&self, address: &str) -> Result<Host> {
let mut parts = address.trim().rsplitn(2, "@");
let host = match parts.next() {
Some(host) => host,
None => { return Err(ErrorKind::InvalidEmail.into()); }
};
let local = match parts.next() {
Some(local) => local,
None => { return Err(ErrorKind::InvalidEmail.into()); }
};
if local.starts_with(".")
|| local.ends_with(".")
|| local.chars().count() > 64
|| format!("{}@{}", local, host).chars().count() > 254
{
return Err(ErrorKind::InvalidEmail.into());
}
self.parse_host(host)
}

/// Parses any arbitrary string
///
/// Effectively this means that the string is either a URL or a host.
/// Effectively this means that the string is either a URL, an email address or a host.
pub fn parse_str(&self, string: &str) -> Result<Host> {
if string.contains("//") {
if string.starts_with("//") {
// If a string starts with `//` it might be a protocol
// relative URL. Since we really do not care about the
// protocol anyway, let's just assume it's `https` to
// give it a fair chance with `Url::parse`.
self.parse_url(&format!("https:{}", string))
} else {
self.parse_url(string)
}
if string.contains("://") {
self.parse_url(string)
} else if string.contains("@") {
self.parse_email(string)
} else {
self.parse_host(string)
}
Expand All @@ -366,12 +415,22 @@ impl List {

impl Host {
fn parse(host: &str, list: &List) -> Result<Host> {
if let Ok(ip) = IpAddr::from_str(host) {
return Ok(Host::Ip(ip));
}
let mut host = host.trim();
if let Ok(domain) = Domain::parse(host, list) {
return Ok(Host::Domain(domain));
}
if host.starts_with("[")
&& !host.starts_with("[[")
&& host.ends_with("]")
&& !host.ends_with("]]")
{
host = host
.trim_left_matches("[")
.trim_right_matches("]");
};
if let Ok(ip) = IpAddr::from_str(host) {
return Ok(Host::Ip(ip));
}
Err(ErrorKind::InvalidHost.into())
}

Expand Down Expand Up @@ -422,8 +481,6 @@ impl Domain {
}
Err(_) => { return false; }
};
// all labels must conform to this pattern
let pattern = Regex::new("^([[:alnum:]]+|[[:alnum:]]+[[:alnum:]-]*[[:alnum:]]+)$").unwrap();
let mut labels: Vec<&str> = domain.split('.').collect();
// strip of the first dot from a domain to support fully qualified domain names
if domain.ends_with(".") { labels.pop(); }
Expand All @@ -436,7 +493,7 @@ impl Domain {
// the tld must not be a number
if i == 0 && label.parse::<f64>().is_ok() { return false; }
// any label must only contain allowed characters
if !pattern.is_match(label) { return false; }
if !LABEL.is_match(label) { return false; }
}
true
}
Expand Down
43 changes: 37 additions & 6 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,23 +216,54 @@ fn list_behaviour() {
pass!()
});

ctx.it("can be parsed from a URL with a domain as hostname", || {
ctx.it("can be parsed from a URL with an IP address as hostname", || {
assert!(list.parse_url("https://127.38.53.247:8080/list/").unwrap().is_ip());
pass!()
});

ctx.it("can be parsed from a standard URL using `parse_str`", || {
ctx.it("can be parsed from a URL using `parse_str`", || {
assert!(list.parse_str("https://127.38.53.247:8080/list/").unwrap().is_ip());
pass!()
});

ctx.it("can be parsed from a protocol-relative URL using `parse_str`", || {
assert!(list.parse_str("//127.38.53.247:8080/list/").unwrap().is_ip());
ctx.it("can be parsed from a non-URL using `parse_str`", || {
assert!(list.parse_str("example.com").unwrap().is_domain());
pass!()
});
});

ctx.it("can be parsed from a non-URL using `parse_str`", || {
assert!(list.parse_str("example.com").unwrap().is_domain());
rdescribe("a parsed email", |ctx| {
ctx.it("should allow valid email addresses", || {
let emails = vec![
"prettyandsimple@example.com",
"very.common@example.com",
"disposable.style.email.with+symbol@example.com",
"other.email-with-dash@example.com",
"x@example.com",
"example-indeed@strange-example.com",
"#!$%&'*+-/=?^_`{}|~@example.org",
"example@s.solutions",
"user@[fd79:cdcb:38cc:9dd:f686:e06d:32f3:c123]",
];
for email in emails {
println!("{} should be valid", email);
assert!(list.parse_email(email).is_ok());
}
pass!()
});

ctx.it("should allow parsing emails as str", || {
assert!(list.parse_str("prettyandsimple@example.com").unwrap().is_domain());
pass!()
});

ctx.it("should allow parsing emails as URL", || {
assert!(list.parse_url("mailto://prettyandsimple@example.com").unwrap().is_domain());
pass!()
});

ctx.it("should allow parsing IDN email addresses", || {
assert!(list.parse_email("用户@例子.广告").is_ok());
pass!()
});
});
Expand Down

0 comments on commit b648684

Please sign in to comment.