@@ -456,13 +456,15 @@ impl Url {
456
456
457
457
if self . slice ( self . scheme_end + 1 ..) . starts_with ( "//" ) {
458
458
// URL with authority
459
- match self . byte_at ( self . username_end ) {
460
- b':' => {
461
- assert ! ( self . host_start >= self . username_end + 2 ) ;
462
- assert_eq ! ( self . byte_at( self . host_start - 1 ) , b'@' ) ;
459
+ if self . username_end != self . serialization . len ( ) as u32 {
460
+ match self . byte_at ( self . username_end ) {
461
+ b':' => {
462
+ assert ! ( self . host_start >= self . username_end + 2 ) ;
463
+ assert_eq ! ( self . byte_at( self . host_start - 1 ) , b'@' ) ;
464
+ }
465
+ b'@' => assert ! ( self . host_start == self . username_end + 1 ) ,
466
+ _ => assert_eq ! ( self . username_end, self . scheme_end + 3 ) ,
463
467
}
464
- b'@' => assert ! ( self . host_start == self . username_end + 1 ) ,
465
- _ => assert_eq ! ( self . username_end, self . scheme_end + 3 ) ,
466
468
}
467
469
assert ! ( self . host_start >= self . username_end) ;
468
470
assert ! ( self . host_end >= self . host_start) ;
@@ -490,7 +492,10 @@ impl Url {
490
492
Some ( port_str. parse:: <u16 >( ) . expect( "Couldn't parse port?" ) )
491
493
) ;
492
494
}
493
- assert_eq ! ( self . byte_at( self . path_start) , b'/' ) ;
495
+ assert ! (
496
+ self . path_start as usize == self . serialization. len( )
497
+ || matches!( self . byte_at( self . path_start) , b'/' | b'#' | b'?' )
498
+ ) ;
494
499
} else {
495
500
// Anarchist URL (no authority)
496
501
assert_eq ! ( self . username_end, self . scheme_end + 1 ) ;
@@ -501,11 +506,11 @@ impl Url {
501
506
assert_eq ! ( self . path_start, self . scheme_end + 1 ) ;
502
507
}
503
508
if let Some ( start) = self . query_start {
504
- assert ! ( start > self . path_start) ;
509
+ assert ! ( start >= self . path_start) ;
505
510
assert_eq ! ( self . byte_at( start) , b'?' ) ;
506
511
}
507
512
if let Some ( start) = self . fragment_start {
508
- assert ! ( start > self . path_start) ;
513
+ assert ! ( start >= self . path_start) ;
509
514
assert_eq ! ( self . byte_at( start) , b'#' ) ;
510
515
}
511
516
if let ( Some ( query_start) , Some ( fragment_start) ) = ( self . query_start , self . fragment_start ) {
@@ -685,7 +690,7 @@ impl Url {
685
690
/// ```
686
691
#[ inline]
687
692
pub fn cannot_be_a_base ( & self ) -> bool {
688
- !self . slice ( self . path_start ..) . starts_with ( '/' )
693
+ !self . slice ( self . scheme_end + 1 ..) . starts_with ( '/' )
689
694
}
690
695
691
696
/// Return the username for this URL (typically the empty string)
@@ -745,7 +750,10 @@ impl Url {
745
750
pub fn password ( & self ) -> Option < & str > {
746
751
// This ':' is not the one marking a port number since a host can not be empty.
747
752
// (Except for file: URLs, which do not have port numbers.)
748
- if self . has_authority ( ) && self . byte_at ( self . username_end ) == b':' {
753
+ if self . has_authority ( )
754
+ && self . username_end != self . serialization . len ( ) as u32
755
+ && self . byte_at ( self . username_end ) == b':'
756
+ {
749
757
debug_assert ! ( self . byte_at( self . host_start - 1 ) == b'@' ) ;
750
758
Some ( self . slice ( self . username_end + 1 ..self . host_start - 1 ) )
751
759
} else {
@@ -1226,7 +1234,7 @@ impl Url {
1226
1234
if let Some ( input) = fragment {
1227
1235
self . fragment_start = Some ( to_u32 ( self . serialization . len ( ) ) . unwrap ( ) ) ;
1228
1236
self . serialization . push ( '#' ) ;
1229
- self . mutate ( |parser| parser. parse_fragment ( parser:: Input :: new ( input) ) )
1237
+ self . mutate ( |parser| parser. parse_fragment ( parser:: Input :: no_trim ( input) ) )
1230
1238
} else {
1231
1239
self . fragment_start = None
1232
1240
}
@@ -1284,7 +1292,12 @@ impl Url {
1284
1292
let scheme_type = SchemeType :: from ( self . scheme ( ) ) ;
1285
1293
let scheme_end = self . scheme_end ;
1286
1294
self . mutate ( |parser| {
1287
- parser. parse_query ( scheme_type, scheme_end, parser:: Input :: new ( input) )
1295
+ let vfn = parser. violation_fn ;
1296
+ parser. parse_query (
1297
+ scheme_type,
1298
+ scheme_end,
1299
+ parser:: Input :: trim_tab_and_newlines ( input, vfn) ,
1300
+ )
1288
1301
} ) ;
1289
1302
}
1290
1303
@@ -1625,14 +1638,34 @@ impl Url {
1625
1638
if host == "" && SchemeType :: from ( self . scheme ( ) ) . is_special ( ) {
1626
1639
return Err ( ParseError :: EmptyHost ) ;
1627
1640
}
1641
+ let mut host_substr = host;
1642
+ // Otherwise, if c is U+003A (:) and the [] flag is unset, then
1643
+ if !host. starts_with ( '[' ) || !host. ends_with ( ']' ) {
1644
+ match host. find ( ':' ) {
1645
+ Some ( 0 ) => {
1646
+ // If buffer is the empty string, validation error, return failure.
1647
+ return Err ( ParseError :: InvalidDomainCharacter ) ;
1648
+ }
1649
+ // Let host be the result of host parsing buffer
1650
+ Some ( colon_index) => {
1651
+ host_substr = & host[ ..colon_index] ;
1652
+ }
1653
+ None => { }
1654
+ }
1655
+ }
1628
1656
if SchemeType :: from ( self . scheme ( ) ) . is_special ( ) {
1629
- self . set_host_internal ( Host :: parse ( host ) ?, None )
1657
+ self . set_host_internal ( Host :: parse ( host_substr ) ?, None ) ;
1630
1658
} else {
1631
- self . set_host_internal ( Host :: parse_opaque ( host ) ?, None )
1659
+ self . set_host_internal ( Host :: parse_opaque ( host_substr ) ?, None ) ;
1632
1660
}
1633
1661
} else if self . has_host ( ) {
1634
- if SchemeType :: from ( self . scheme ( ) ) . is_special ( ) {
1662
+ let scheme_type = SchemeType :: from ( self . scheme ( ) ) ;
1663
+ if scheme_type. is_special ( ) {
1635
1664
return Err ( ParseError :: EmptyHost ) ;
1665
+ } else {
1666
+ if self . serialization . len ( ) == self . path_start as usize {
1667
+ self . serialization . push ( '/' ) ;
1668
+ }
1636
1669
}
1637
1670
debug_assert ! ( self . byte_at( self . scheme_end) == b':' ) ;
1638
1671
debug_assert ! ( self . byte_at( self . path_start) == b'/' ) ;
@@ -1935,14 +1968,28 @@ impl Url {
1935
1968
///
1936
1969
/// # fn run() -> Result<(), ParseError> {
1937
1970
/// let mut url = Url::parse("https://example.net")?;
1938
- /// let result = url.set_scheme("foo ");
1939
- /// assert_eq!(url.as_str(), "foo ://example.net/");
1971
+ /// let result = url.set_scheme("http ");
1972
+ /// assert_eq!(url.as_str(), "http ://example.net/");
1940
1973
/// assert!(result.is_ok());
1941
1974
/// # Ok(())
1942
1975
/// # }
1943
1976
/// # run().unwrap();
1944
1977
/// ```
1978
+ /// Change the URL’s scheme from `foo` to `bar`:
1945
1979
///
1980
+ /// ```
1981
+ /// use url::Url;
1982
+ /// # use url::ParseError;
1983
+ ///
1984
+ /// # fn run() -> Result<(), ParseError> {
1985
+ /// let mut url = Url::parse("foo://example.net")?;
1986
+ /// let result = url.set_scheme("bar");
1987
+ /// assert_eq!(url.as_str(), "bar://example.net");
1988
+ /// assert!(result.is_ok());
1989
+ /// # Ok(())
1990
+ /// # }
1991
+ /// # run().unwrap();
1992
+ /// ```
1946
1993
///
1947
1994
/// Cannot change URL’s scheme from `https` to `foõ`:
1948
1995
///
@@ -1975,14 +2022,55 @@ impl Url {
1975
2022
/// # }
1976
2023
/// # run().unwrap();
1977
2024
/// ```
2025
+ /// Cannot change the URL’s scheme from `foo` to `https`:
2026
+ ///
2027
+ /// ```
2028
+ /// use url::Url;
2029
+ /// # use url::ParseError;
2030
+ ///
2031
+ /// # fn run() -> Result<(), ParseError> {
2032
+ /// let mut url = Url::parse("foo://example.net")?;
2033
+ /// let result = url.set_scheme("https");
2034
+ /// assert_eq!(url.as_str(), "foo://example.net");
2035
+ /// assert!(result.is_err());
2036
+ /// # Ok(())
2037
+ /// # }
2038
+ /// # run().unwrap();
2039
+ /// ```
2040
+ /// Cannot change the URL’s scheme from `http` to `foo`:
2041
+ ///
2042
+ /// ```
2043
+ /// use url::Url;
2044
+ /// # use url::ParseError;
2045
+ ///
2046
+ /// # fn run() -> Result<(), ParseError> {
2047
+ /// let mut url = Url::parse("http://example.net")?;
2048
+ /// let result = url.set_scheme("foo");
2049
+ /// assert_eq!(url.as_str(), "http://example.net/");
2050
+ /// assert!(result.is_err());
2051
+ /// # Ok(())
2052
+ /// # }
2053
+ /// # run().unwrap();
2054
+ /// ```
1978
2055
pub fn set_scheme ( & mut self , scheme : & str ) -> Result < ( ) , ( ) > {
1979
2056
let mut parser = Parser :: for_setter ( String :: new ( ) ) ;
1980
2057
let remaining = parser. parse_scheme ( parser:: Input :: new ( scheme) ) ?;
1981
- if !remaining. is_empty ( )
1982
- || ( !self . has_host ( ) && SchemeType :: from ( & parser. serialization ) . is_special ( ) )
2058
+ let new_scheme_type = SchemeType :: from ( & parser. serialization ) ;
2059
+ let old_scheme_type = SchemeType :: from ( self . scheme ( ) ) ;
2060
+ // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2061
+ if ( new_scheme_type. is_special ( ) && !old_scheme_type. is_special ( ) ) ||
2062
+ // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2063
+ ( !new_scheme_type. is_special ( ) && old_scheme_type. is_special ( ) ) ||
2064
+ // If url includes credentials or has a non-null port, and buffer is "file", then return.
2065
+ // If url’s scheme is "file" and its host is an empty host or null, then return.
2066
+ ( new_scheme_type. is_file ( ) && self . has_authority ( ) )
1983
2067
{
1984
2068
return Err ( ( ) ) ;
1985
2069
}
2070
+
2071
+ if !remaining. is_empty ( ) || ( !self . has_host ( ) && new_scheme_type. is_special ( ) ) {
2072
+ return Err ( ( ) ) ;
2073
+ }
1986
2074
let old_scheme_end = self . scheme_end ;
1987
2075
let new_scheme_end = to_u32 ( parser. serialization . len ( ) ) . unwrap ( ) ;
1988
2076
let adjust = |index : & mut u32 | {
@@ -2004,6 +2092,14 @@ impl Url {
2004
2092
2005
2093
parser. serialization . push_str ( self . slice ( old_scheme_end..) ) ;
2006
2094
self . serialization = parser. serialization ;
2095
+
2096
+ // Update the port so it can be removed
2097
+ // If it is the scheme's default
2098
+ // we don't mind it silently failing
2099
+ // if there was no port in the first place
2100
+ let previous_port = self . port ( ) ;
2101
+ let _ = self . set_port ( previous_port) ;
2102
+
2007
2103
Ok ( ( ) )
2008
2104
}
2009
2105
@@ -2408,6 +2504,7 @@ fn path_to_file_url_segments_windows(
2408
2504
}
2409
2505
let mut components = path. components ( ) ;
2410
2506
2507
+ let host_start = serialization. len ( ) + 1 ;
2411
2508
let host_end;
2412
2509
let host_internal;
2413
2510
match components. next ( ) {
@@ -2434,15 +2531,24 @@ fn path_to_file_url_segments_windows(
2434
2531
_ => return Err ( ( ) ) ,
2435
2532
}
2436
2533
2534
+ let mut path_only_has_prefix = true ;
2437
2535
for component in components {
2438
2536
if component == Component :: RootDir {
2439
2537
continue ;
2440
2538
}
2539
+ path_only_has_prefix = false ;
2441
2540
// FIXME: somehow work with non-unicode?
2442
2541
let component = component. as_os_str ( ) . to_str ( ) . ok_or ( ( ) ) ?;
2443
2542
serialization. push ( '/' ) ;
2444
2543
serialization. extend ( percent_encode ( component. as_bytes ( ) , PATH_SEGMENT ) ) ;
2445
2544
}
2545
+ // A windows drive letter must end with a slash.
2546
+ if serialization. len ( ) > host_start
2547
+ && parser:: is_windows_drive_letter ( & serialization[ host_start..] )
2548
+ && path_only_has_prefix
2549
+ {
2550
+ serialization. push ( '/' ) ;
2551
+ }
2446
2552
Ok ( ( host_end, host_internal) )
2447
2553
}
2448
2554
@@ -2467,6 +2573,14 @@ fn file_url_segments_to_pathbuf(
2467
2573
bytes. push ( b'/' ) ;
2468
2574
bytes. extend ( percent_decode ( segment. as_bytes ( ) ) ) ;
2469
2575
}
2576
+ // A windows drive letter must end with a slash.
2577
+ if bytes. len ( ) > 2 {
2578
+ if matches ! ( bytes[ bytes. len( ) - 2 ] , b'a' ..=b'z' | b'A' ..=b'Z' )
2579
+ && matches ! ( bytes[ bytes. len( ) - 1 ] , b':' | b'|' )
2580
+ {
2581
+ bytes. push ( b'/' ) ;
2582
+ }
2583
+ }
2470
2584
let os_str = OsStr :: from_bytes ( & bytes) ;
2471
2585
let path = PathBuf :: from ( os_str) ;
2472
2586
debug_assert ! (
0 commit comments