Skip to content

Commit 9cd6467

Browse files
authored
Merge pull request #537 from o0Ignition0o/40_tests_left
Pass "new" tests
2 parents 622d260 + 4464840 commit 9cd6467

8 files changed

+1660
-372
lines changed

src/host.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@ pub(crate) enum HostInternal {
2424
Ipv6(Ipv6Addr),
2525
}
2626

27-
impl<S> From<Host<S>> for HostInternal {
28-
fn from(host: Host<S>) -> HostInternal {
27+
impl From<Host<String>> for HostInternal {
28+
fn from(host: Host<String>) -> HostInternal {
2929
match host {
30+
Host::Domain(ref s) if s.is_empty() => HostInternal::None,
3031
Host::Domain(_) => HostInternal::Domain,
3132
Host::Ipv4(address) => HostInternal::Ipv4(address),
3233
Host::Ipv6(address) => HostInternal::Ipv6(address),

src/lib.rs

+134-20
Original file line numberDiff line numberDiff line change
@@ -456,13 +456,15 @@ impl Url {
456456

457457
if self.slice(self.scheme_end + 1..).starts_with("//") {
458458
// URL with authority
459-
match self.byte_at(self.username_end) {
460-
b':' => {
461-
assert!(self.host_start >= self.username_end + 2);
462-
assert_eq!(self.byte_at(self.host_start - 1), b'@');
459+
if self.username_end != self.serialization.len() as u32 {
460+
match self.byte_at(self.username_end) {
461+
b':' => {
462+
assert!(self.host_start >= self.username_end + 2);
463+
assert_eq!(self.byte_at(self.host_start - 1), b'@');
464+
}
465+
b'@' => assert!(self.host_start == self.username_end + 1),
466+
_ => assert_eq!(self.username_end, self.scheme_end + 3),
463467
}
464-
b'@' => assert!(self.host_start == self.username_end + 1),
465-
_ => assert_eq!(self.username_end, self.scheme_end + 3),
466468
}
467469
assert!(self.host_start >= self.username_end);
468470
assert!(self.host_end >= self.host_start);
@@ -490,7 +492,10 @@ impl Url {
490492
Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
491493
);
492494
}
493-
assert_eq!(self.byte_at(self.path_start), b'/');
495+
assert!(
496+
self.path_start as usize == self.serialization.len()
497+
|| matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
498+
);
494499
} else {
495500
// Anarchist URL (no authority)
496501
assert_eq!(self.username_end, self.scheme_end + 1);
@@ -501,11 +506,11 @@ impl Url {
501506
assert_eq!(self.path_start, self.scheme_end + 1);
502507
}
503508
if let Some(start) = self.query_start {
504-
assert!(start > self.path_start);
509+
assert!(start >= self.path_start);
505510
assert_eq!(self.byte_at(start), b'?');
506511
}
507512
if let Some(start) = self.fragment_start {
508-
assert!(start > self.path_start);
513+
assert!(start >= self.path_start);
509514
assert_eq!(self.byte_at(start), b'#');
510515
}
511516
if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
@@ -685,7 +690,7 @@ impl Url {
685690
/// ```
686691
#[inline]
687692
pub fn cannot_be_a_base(&self) -> bool {
688-
!self.slice(self.path_start..).starts_with('/')
693+
!self.slice(self.scheme_end + 1..).starts_with('/')
689694
}
690695

691696
/// Return the username for this URL (typically the empty string)
@@ -745,7 +750,10 @@ impl Url {
745750
pub fn password(&self) -> Option<&str> {
746751
// This ':' is not the one marking a port number since a host can not be empty.
747752
// (Except for file: URLs, which do not have port numbers.)
748-
if self.has_authority() && self.byte_at(self.username_end) == b':' {
753+
if self.has_authority()
754+
&& self.username_end != self.serialization.len() as u32
755+
&& self.byte_at(self.username_end) == b':'
756+
{
749757
debug_assert!(self.byte_at(self.host_start - 1) == b'@');
750758
Some(self.slice(self.username_end + 1..self.host_start - 1))
751759
} else {
@@ -1226,7 +1234,7 @@ impl Url {
12261234
if let Some(input) = fragment {
12271235
self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
12281236
self.serialization.push('#');
1229-
self.mutate(|parser| parser.parse_fragment(parser::Input::new(input)))
1237+
self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input)))
12301238
} else {
12311239
self.fragment_start = None
12321240
}
@@ -1284,7 +1292,12 @@ impl Url {
12841292
let scheme_type = SchemeType::from(self.scheme());
12851293
let scheme_end = self.scheme_end;
12861294
self.mutate(|parser| {
1287-
parser.parse_query(scheme_type, scheme_end, parser::Input::new(input))
1295+
let vfn = parser.violation_fn;
1296+
parser.parse_query(
1297+
scheme_type,
1298+
scheme_end,
1299+
parser::Input::trim_tab_and_newlines(input, vfn),
1300+
)
12881301
});
12891302
}
12901303

@@ -1625,14 +1638,34 @@ impl Url {
16251638
if host == "" && SchemeType::from(self.scheme()).is_special() {
16261639
return Err(ParseError::EmptyHost);
16271640
}
1641+
let mut host_substr = host;
1642+
// Otherwise, if c is U+003A (:) and the [] flag is unset, then
1643+
if !host.starts_with('[') || !host.ends_with(']') {
1644+
match host.find(':') {
1645+
Some(0) => {
1646+
// If buffer is the empty string, validation error, return failure.
1647+
return Err(ParseError::InvalidDomainCharacter);
1648+
}
1649+
// Let host be the result of host parsing buffer
1650+
Some(colon_index) => {
1651+
host_substr = &host[..colon_index];
1652+
}
1653+
None => {}
1654+
}
1655+
}
16281656
if SchemeType::from(self.scheme()).is_special() {
1629-
self.set_host_internal(Host::parse(host)?, None)
1657+
self.set_host_internal(Host::parse(host_substr)?, None);
16301658
} else {
1631-
self.set_host_internal(Host::parse_opaque(host)?, None)
1659+
self.set_host_internal(Host::parse_opaque(host_substr)?, None);
16321660
}
16331661
} else if self.has_host() {
1634-
if SchemeType::from(self.scheme()).is_special() {
1662+
let scheme_type = SchemeType::from(self.scheme());
1663+
if scheme_type.is_special() {
16351664
return Err(ParseError::EmptyHost);
1665+
} else {
1666+
if self.serialization.len() == self.path_start as usize {
1667+
self.serialization.push('/');
1668+
}
16361669
}
16371670
debug_assert!(self.byte_at(self.scheme_end) == b':');
16381671
debug_assert!(self.byte_at(self.path_start) == b'/');
@@ -1935,14 +1968,28 @@ impl Url {
19351968
///
19361969
/// # fn run() -> Result<(), ParseError> {
19371970
/// let mut url = Url::parse("https://example.net")?;
1938-
/// let result = url.set_scheme("foo");
1939-
/// assert_eq!(url.as_str(), "foo://example.net/");
1971+
/// let result = url.set_scheme("http");
1972+
/// assert_eq!(url.as_str(), "http://example.net/");
19401973
/// assert!(result.is_ok());
19411974
/// # Ok(())
19421975
/// # }
19431976
/// # run().unwrap();
19441977
/// ```
1978+
/// Change the URL’s scheme from `foo` to `bar`:
19451979
///
1980+
/// ```
1981+
/// use url::Url;
1982+
/// # use url::ParseError;
1983+
///
1984+
/// # fn run() -> Result<(), ParseError> {
1985+
/// let mut url = Url::parse("foo://example.net")?;
1986+
/// let result = url.set_scheme("bar");
1987+
/// assert_eq!(url.as_str(), "bar://example.net");
1988+
/// assert!(result.is_ok());
1989+
/// # Ok(())
1990+
/// # }
1991+
/// # run().unwrap();
1992+
/// ```
19461993
///
19471994
/// Cannot change URL’s scheme from `https` to `foõ`:
19481995
///
@@ -1975,14 +2022,55 @@ impl Url {
19752022
/// # }
19762023
/// # run().unwrap();
19772024
/// ```
2025+
/// Cannot change the URL’s scheme from `foo` to `https`:
2026+
///
2027+
/// ```
2028+
/// use url::Url;
2029+
/// # use url::ParseError;
2030+
///
2031+
/// # fn run() -> Result<(), ParseError> {
2032+
/// let mut url = Url::parse("foo://example.net")?;
2033+
/// let result = url.set_scheme("https");
2034+
/// assert_eq!(url.as_str(), "foo://example.net");
2035+
/// assert!(result.is_err());
2036+
/// # Ok(())
2037+
/// # }
2038+
/// # run().unwrap();
2039+
/// ```
2040+
/// Cannot change the URL’s scheme from `http` to `foo`:
2041+
///
2042+
/// ```
2043+
/// use url::Url;
2044+
/// # use url::ParseError;
2045+
///
2046+
/// # fn run() -> Result<(), ParseError> {
2047+
/// let mut url = Url::parse("http://example.net")?;
2048+
/// let result = url.set_scheme("foo");
2049+
/// assert_eq!(url.as_str(), "http://example.net/");
2050+
/// assert!(result.is_err());
2051+
/// # Ok(())
2052+
/// # }
2053+
/// # run().unwrap();
2054+
/// ```
19782055
pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
19792056
let mut parser = Parser::for_setter(String::new());
19802057
let remaining = parser.parse_scheme(parser::Input::new(scheme))?;
1981-
if !remaining.is_empty()
1982-
|| (!self.has_host() && SchemeType::from(&parser.serialization).is_special())
2058+
let new_scheme_type = SchemeType::from(&parser.serialization);
2059+
let old_scheme_type = SchemeType::from(self.scheme());
2060+
// If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2061+
if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2062+
// If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2063+
(!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2064+
// If url includes credentials or has a non-null port, and buffer is "file", then return.
2065+
// If url’s scheme is "file" and its host is an empty host or null, then return.
2066+
(new_scheme_type.is_file() && self.has_authority())
19832067
{
19842068
return Err(());
19852069
}
2070+
2071+
if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2072+
return Err(());
2073+
}
19862074
let old_scheme_end = self.scheme_end;
19872075
let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
19882076
let adjust = |index: &mut u32| {
@@ -2004,6 +2092,14 @@ impl Url {
20042092

20052093
parser.serialization.push_str(self.slice(old_scheme_end..));
20062094
self.serialization = parser.serialization;
2095+
2096+
// Update the port so it can be removed
2097+
// If it is the scheme's default
2098+
// we don't mind it silently failing
2099+
// if there was no port in the first place
2100+
let previous_port = self.port();
2101+
let _ = self.set_port(previous_port);
2102+
20072103
Ok(())
20082104
}
20092105

@@ -2408,6 +2504,7 @@ fn path_to_file_url_segments_windows(
24082504
}
24092505
let mut components = path.components();
24102506

2507+
let host_start = serialization.len() + 1;
24112508
let host_end;
24122509
let host_internal;
24132510
match components.next() {
@@ -2434,15 +2531,24 @@ fn path_to_file_url_segments_windows(
24342531
_ => return Err(()),
24352532
}
24362533

2534+
let mut path_only_has_prefix = true;
24372535
for component in components {
24382536
if component == Component::RootDir {
24392537
continue;
24402538
}
2539+
path_only_has_prefix = false;
24412540
// FIXME: somehow work with non-unicode?
24422541
let component = component.as_os_str().to_str().ok_or(())?;
24432542
serialization.push('/');
24442543
serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
24452544
}
2545+
// A windows drive letter must end with a slash.
2546+
if serialization.len() > host_start
2547+
&& parser::is_windows_drive_letter(&serialization[host_start..])
2548+
&& path_only_has_prefix
2549+
{
2550+
serialization.push('/');
2551+
}
24462552
Ok((host_end, host_internal))
24472553
}
24482554

@@ -2467,6 +2573,14 @@ fn file_url_segments_to_pathbuf(
24672573
bytes.push(b'/');
24682574
bytes.extend(percent_decode(segment.as_bytes()));
24692575
}
2576+
// A windows drive letter must end with a slash.
2577+
if bytes.len() > 2 {
2578+
if matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z')
2579+
&& matches!(bytes[bytes.len() - 1], b':' | b'|')
2580+
{
2581+
bytes.push(b'/');
2582+
}
2583+
}
24702584
let os_str = OsStr::from_bytes(&bytes);
24712585
let path = PathBuf::from(os_str);
24722586
debug_assert!(

0 commit comments

Comments
 (0)