@@ -7,8 +7,8 @@ use crate::instance::PyNativeType;
7
7
use crate :: object:: PyObject ;
8
8
use crate :: types:: PyAny ;
9
9
use crate :: AsPyPointer ;
10
+ use crate :: IntoPy ;
10
11
use crate :: Python ;
11
- use crate :: { exceptions, IntoPy } ;
12
12
use crate :: { ffi, FromPy } ;
13
13
use std:: borrow:: Cow ;
14
14
use std:: ops:: Index ;
@@ -59,37 +59,38 @@ impl PyString {
59
59
}
60
60
61
61
/// Get the Python string as a byte slice.
62
+ ///
63
+ /// Returns a `UnicodeEncodeError` if the input is not valid unicode
64
+ /// (containing unpaired surrogates).
62
65
#[ inline]
63
- pub fn as_bytes ( & self ) -> & [ u8 ] {
66
+ pub fn as_bytes ( & self ) -> PyResult < & [ u8 ] > {
64
67
unsafe {
65
68
let mut size: ffi:: Py_ssize_t = 0 ;
66
69
let data = ffi:: PyUnicode_AsUTF8AndSize ( self . 0 . as_ptr ( ) , & mut size) as * const u8 ;
67
- // PyUnicode_AsUTF8AndSize would return null if the pointer did not reference a valid
68
- // unicode object, but because we have a valid PyString, assume success
69
- debug_assert ! ( !data. is_null( ) ) ;
70
- std:: slice:: from_raw_parts ( data, size as usize )
70
+ if data. is_null ( ) {
71
+ Err ( PyErr :: fetch ( self . py ( ) ) )
72
+ } else {
73
+ Ok ( std:: slice:: from_raw_parts ( data, size as usize ) )
74
+ }
71
75
}
72
76
}
73
77
74
78
/// Convert the `PyString` into a Rust string.
75
- ///
76
- /// Returns a `UnicodeDecodeError` if the input is not valid unicode
77
- /// (containing unpaired surrogates).
78
79
pub fn to_string ( & self ) -> PyResult < Cow < str > > {
79
- match std:: str:: from_utf8 ( self . as_bytes ( ) ) {
80
- Ok ( s) => Ok ( Cow :: Borrowed ( s) ) ,
81
- Err ( e) => Err ( PyErr :: from_instance (
82
- exceptions:: UnicodeDecodeError :: new_utf8 ( self . py ( ) , self . as_bytes ( ) , e) ?,
83
- ) ) ,
84
- }
80
+ let bytes = self . as_bytes ( ) ?;
81
+ let string = std:: str:: from_utf8 ( bytes) ?;
82
+ Ok ( Cow :: Borrowed ( string) )
85
83
}
86
84
87
85
/// Convert the `PyString` into a Rust string.
88
86
///
89
87
/// Unpaired surrogates invalid UTF-8 sequences are
90
88
/// replaced with U+FFFD REPLACEMENT CHARACTER.
91
89
pub fn to_string_lossy ( & self ) -> Cow < str > {
92
- String :: from_utf8_lossy ( self . as_bytes ( ) )
90
+ // TODO: Handle error of `as_bytes`
91
+ // see https://github.com/PyO3/pyo3/pull/634
92
+ let bytes = self . as_bytes ( ) . unwrap ( ) ;
93
+ String :: from_utf8_lossy ( bytes)
93
94
}
94
95
}
95
96
@@ -273,7 +274,16 @@ mod test {
273
274
let s = "ascii 🐈" ;
274
275
let obj: PyObject = PyString :: new ( py, s) . into ( ) ;
275
276
let py_string = <PyString as PyTryFrom >:: try_from ( obj. as_ref ( py) ) . unwrap ( ) ;
276
- assert_eq ! ( s. as_bytes( ) , py_string. as_bytes( ) ) ;
277
+ assert_eq ! ( s. as_bytes( ) , py_string. as_bytes( ) . unwrap( ) ) ;
278
+ }
279
+
280
+ #[ test]
281
+ fn test_as_bytes_surrogate ( ) {
282
+ let gil = Python :: acquire_gil ( ) ;
283
+ let py = gil. python ( ) ;
284
+ let obj: PyObject = py. eval ( r#"'\ud800'"# , None , None ) . unwrap ( ) . into ( ) ;
285
+ let py_string = <PyString as PyTryFrom >:: try_from ( obj. as_ref ( py) ) . unwrap ( ) ;
286
+ assert ! ( py_string. as_bytes( ) . is_err( ) ) ;
277
287
}
278
288
279
289
#[ test]
0 commit comments