From 2a35d596b33221301fbf1cfac48ce85639f7f6b9 Mon Sep 17 00:00:00 2001 From: YOUNGSUK_KIM Date: Sat, 16 Nov 2019 00:13:56 -0500 Subject: [PATCH 01/11] read_wide_str --- src/librustc_mir/interpret/memory.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index e929b0855834e..4c8017de0ad46 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -791,6 +791,21 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { self.get_raw(ptr.alloc_id)?.read_c_str(self, ptr) } + /// Reads a 0x00-terminated sequence of bytes from memory. Returns them as a slice. + /// Needed for reading wide-strings in Windows-OS + pub fn read_wide_str(&self, ptr: Scalar) -> InterpResult<'tcx, &[u8]> { + let widestr_u8_initbyte = self.read_bytes(ptr, Size::from_bytes(1))?; + let mut widestr_len = 0; // length in bytes + unsafe { + let mut tracker = &widestr_u8_initbyte[0] as *const u8; + while !(*tracker == 0 && *tracker.add(1) == 0) { + tracker = tracker.add(2); + widestr_len += 2; + } + } + self.read_bytes(ptr, Size::from_bytes(widestr_len)) + } + /// Writes the given stream of bytes into memory. /// /// Performs appropriate bounds checks. From bfd460e6c0ce41a47f4e35878717690a3a67840f Mon Sep 17 00:00:00 2001 From: YOUNGSUK_KIM Date: Sat, 16 Nov 2019 00:30:41 -0500 Subject: [PATCH 02/11] fix typo in comment --- src/librustc_mir/interpret/memory.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 4c8017de0ad46..68843c4a0fdce 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -791,7 +791,7 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { self.get_raw(ptr.alloc_id)?.read_c_str(self, ptr) } - /// Reads a 0x00-terminated sequence of bytes from memory. Returns them as a slice. + /// Reads a 0x0000-terminated sequence of bytes from memory. Returns them as a slice. /// Needed for reading wide-strings in Windows-OS pub fn read_wide_str(&self, ptr: Scalar) -> InterpResult<'tcx, &[u8]> { let widestr_u8_initbyte = self.read_bytes(ptr, Size::from_bytes(1))?; From 4d127edd23db033db901596fc6996cd80711c703 Mon Sep 17 00:00:00 2001 From: YOUNGSUK_KIM Date: Sat, 16 Nov 2019 09:55:11 -0500 Subject: [PATCH 03/11] remove trailing whitespaces --- src/librustc_mir/interpret/memory.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 68843c4a0fdce..125222e4965fc 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -792,13 +792,13 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { } /// Reads a 0x0000-terminated sequence of bytes from memory. Returns them as a slice. - /// Needed for reading wide-strings in Windows-OS + /// Needed for reading wide-strings in Windows-OS pub fn read_wide_str(&self, ptr: Scalar) -> InterpResult<'tcx, &[u8]> { let widestr_u8_initbyte = self.read_bytes(ptr, Size::from_bytes(1))?; let mut widestr_len = 0; // length in bytes unsafe { let mut tracker = &widestr_u8_initbyte[0] as *const u8; - while !(*tracker == 0 && *tracker.add(1) == 0) { + while !(*tracker == 0 && *tracker.add(1) == 0) { tracker = tracker.add(2); widestr_len += 2; } From ead89fbcf2fde07560e252a7bf6af1a5aed5f5fc Mon Sep 17 00:00:00 2001 From: Youngsuk Kim Date: Sat, 16 Nov 2019 18:32:57 -0500 Subject: [PATCH 04/11] Add safety comment for function read_wide_str --- src/librustc_mir/interpret/memory.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 125222e4965fc..3858ec35f6687 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -796,6 +796,9 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { pub fn read_wide_str(&self, ptr: Scalar) -> InterpResult<'tcx, &[u8]> { let widestr_u8_initbyte = self.read_bytes(ptr, Size::from_bytes(1))?; let mut widestr_len = 0; // length in bytes + // The below unsafe block uses a raw-pointer(*const u8) to find the exact length + // of the wide_string, which terminates with a double null byte. It doesn't write anything to Memory. + // Each character in a wide_string takes up two bytes. unsafe { let mut tracker = &widestr_u8_initbyte[0] as *const u8; while !(*tracker == 0 && *tracker.add(1) == 0) { From f1002cbd401cce9778f7509b5d05fa165cb812bc Mon Sep 17 00:00:00 2001 From: Youngsuk Kim Date: Sat, 16 Nov 2019 18:45:06 -0500 Subject: [PATCH 05/11] Fix line that had more than 100 chars in it --- src/librustc_mir/interpret/memory.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 3858ec35f6687..99378b173a45c 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -796,9 +796,10 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { pub fn read_wide_str(&self, ptr: Scalar) -> InterpResult<'tcx, &[u8]> { let widestr_u8_initbyte = self.read_bytes(ptr, Size::from_bytes(1))?; let mut widestr_len = 0; // length in bytes - // The below unsafe block uses a raw-pointer(*const u8) to find the exact length - // of the wide_string, which terminates with a double null byte. It doesn't write anything to Memory. - // Each character in a wide_string takes up two bytes. + // The below unsafe block uses a raw-pointer(*const u8) to find the length + // of the wide-string, which terminates with a double null byte. + // The below unsafe block doesn't write anything to Memory. + // Each character in a wide-string takes up two bytes. unsafe { let mut tracker = &widestr_u8_initbyte[0] as *const u8; while !(*tracker == 0 && *tracker.add(1) == 0) { From bc670b951dc7df635a9130dcd55420a523912544 Mon Sep 17 00:00:00 2001 From: Youngsuk Kim Date: Sat, 16 Nov 2019 19:18:06 -0500 Subject: [PATCH 06/11] remove trailing whitespace in comment --- src/librustc_mir/interpret/memory.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 99378b173a45c..b8038c21e9bca 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -797,7 +797,7 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { let widestr_u8_initbyte = self.read_bytes(ptr, Size::from_bytes(1))?; let mut widestr_len = 0; // length in bytes // The below unsafe block uses a raw-pointer(*const u8) to find the length - // of the wide-string, which terminates with a double null byte. + // of the wide-string, which terminates with a double null byte. // The below unsafe block doesn't write anything to Memory. // Each character in a wide-string takes up two bytes. unsafe { From d4ff7362c7843f5b98e51f48e487772a0a4a47d4 Mon Sep 17 00:00:00 2001 From: YOUNGSUK_KIM Date: Sat, 23 Nov 2019 08:54:41 -0500 Subject: [PATCH 07/11] WIP : add 'read_wide_str' API to Allocation --- src/librustc/mir/interpret/allocation.rs | 27 ++++++++++++++++++++++++ src/librustc_mir/interpret/memory.rs | 18 ++++------------ 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/src/librustc/mir/interpret/allocation.rs b/src/librustc/mir/interpret/allocation.rs index aa8ac4902a894..25219b2817b74 100644 --- a/src/librustc/mir/interpret/allocation.rs +++ b/src/librustc/mir/interpret/allocation.rs @@ -324,6 +324,33 @@ impl<'tcx, Tag: Copy, Extra: AllocationExtra> Allocation { }) } + /// Reads bytes until a `0x00` is encountered. Will error if the end of the allocation + /// is reached before a `0x00` is found. + /// + /// Most likely, you want to call `Memory::read_wide_str` instead of this method. + pub fn read_wide_str( + &self, + cx: &impl HasDataLayout, + ptr : Pointer + ) -> InterpResult<'tcx, &[u8]> + { + assert_eq!(ptr.offset.bytes() as usize as u64, ptr.offset.bytes()); + let offset = ptr.offset.bytes() as usize; + Ok(match self.bytes[offset..self.bytes.len() - 1].iter().step_by(2) + .zip(self.bytes[(offset+1)..].iter().step_by(2)) + .position(|&(l,r)| l == 0 && r == 0) { + Some(size) => { + let size_with_null = Size::from_bytes((size + 2) as u64); + // Go through `get_bytes` for checks and AllocationExtra hooks. + // We read the null, so we include it in the request, but we want it removed + // from the result, so we do subslicing. + &self.get_bytes(cx, ptr, size_with_null)?[..size] + } + // This includes the case where `offset` is out-of-bounds to begin with. + None => throw_unsup!(UnterminatedCString(ptr.erase_tag())), + }) + } + /// Validates that `ptr.offset` and `ptr.offset + size` do not point to the middle of a /// relocation. If `allow_ptr_and_undef` is `false`, also enforces that the memory in the /// given range contains neither relocations nor undef bytes. diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index b8038c21e9bca..2fcf5d58fad1d 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -793,21 +793,11 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { /// Reads a 0x0000-terminated sequence of bytes from memory. Returns them as a slice. /// Needed for reading wide-strings in Windows-OS + /// + /// Performs appropriate bounds checks. pub fn read_wide_str(&self, ptr: Scalar) -> InterpResult<'tcx, &[u8]> { - let widestr_u8_initbyte = self.read_bytes(ptr, Size::from_bytes(1))?; - let mut widestr_len = 0; // length in bytes - // The below unsafe block uses a raw-pointer(*const u8) to find the length - // of the wide-string, which terminates with a double null byte. - // The below unsafe block doesn't write anything to Memory. - // Each character in a wide-string takes up two bytes. - unsafe { - let mut tracker = &widestr_u8_initbyte[0] as *const u8; - while !(*tracker == 0 && *tracker.add(1) == 0) { - tracker = tracker.add(2); - widestr_len += 2; - } - } - self.read_bytes(ptr, Size::from_bytes(widestr_len)) + let ptr = self.force_ptr(ptr)?; // We need to read at least 1 byte, so we *need* a ptr. + self.get_raw(ptr.alloc_id)?.read_wide_str(self, ptr) } /// Writes the given stream of bytes into memory. From fd9a974c6e73a93869284ed06d55afd09f5aebcf Mon Sep 17 00:00:00 2001 From: YOUNGSUK_KIM Date: Sat, 23 Nov 2019 09:15:25 -0500 Subject: [PATCH 08/11] remove trailing whitespaces --- src/librustc/mir/interpret/allocation.rs | 2 +- src/librustc_mir/interpret/memory.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/librustc/mir/interpret/allocation.rs b/src/librustc/mir/interpret/allocation.rs index 25219b2817b74..f7a21598ab635 100644 --- a/src/librustc/mir/interpret/allocation.rs +++ b/src/librustc/mir/interpret/allocation.rs @@ -324,7 +324,7 @@ impl<'tcx, Tag: Copy, Extra: AllocationExtra> Allocation { }) } - /// Reads bytes until a `0x00` is encountered. Will error if the end of the allocation + /// Reads bytes until a `0x00` is encountered. Will error if the end of the allocation /// is reached before a `0x00` is found. /// /// Most likely, you want to call `Memory::read_wide_str` instead of this method. diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 2fcf5d58fad1d..dc81416a44d75 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -793,7 +793,7 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { /// Reads a 0x0000-terminated sequence of bytes from memory. Returns them as a slice. /// Needed for reading wide-strings in Windows-OS - /// + /// /// Performs appropriate bounds checks. pub fn read_wide_str(&self, ptr: Scalar) -> InterpResult<'tcx, &[u8]> { let ptr = self.force_ptr(ptr)?; // We need to read at least 1 byte, so we *need* a ptr. From 72f62521ea71804653ff4f9596c60b081ecffc15 Mon Sep 17 00:00:00 2001 From: YOUNGSUK_KIM Date: Mon, 25 Nov 2019 09:44:57 -0500 Subject: [PATCH 09/11] Fix type error --- src/librustc/mir/interpret/allocation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustc/mir/interpret/allocation.rs b/src/librustc/mir/interpret/allocation.rs index f7a21598ab635..402e6602f8376 100644 --- a/src/librustc/mir/interpret/allocation.rs +++ b/src/librustc/mir/interpret/allocation.rs @@ -338,7 +338,7 @@ impl<'tcx, Tag: Copy, Extra: AllocationExtra> Allocation { let offset = ptr.offset.bytes() as usize; Ok(match self.bytes[offset..self.bytes.len() - 1].iter().step_by(2) .zip(self.bytes[(offset+1)..].iter().step_by(2)) - .position(|&(l,r)| l == 0 && r == 0) { + .position(|(&l, &r)| l == 0 && r == 0) { Some(size) => { let size_with_null = Size::from_bytes((size + 2) as u64); // Go through `get_bytes` for checks and AllocationExtra hooks. From 14e40dd1dd42bed633cde59c07f095f2c13ea601 Mon Sep 17 00:00:00 2001 From: JOE1994 Date: Wed, 4 Dec 2019 00:22:32 -0500 Subject: [PATCH 10/11] post-review commit --- src/librustc/mir/interpret/allocation.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/librustc/mir/interpret/allocation.rs b/src/librustc/mir/interpret/allocation.rs index 402e6602f8376..b60fe8bbb7521 100644 --- a/src/librustc/mir/interpret/allocation.rs +++ b/src/librustc/mir/interpret/allocation.rs @@ -324,8 +324,8 @@ impl<'tcx, Tag: Copy, Extra: AllocationExtra> Allocation { }) } - /// Reads bytes until a `0x00` is encountered. Will error if the end of the allocation - /// is reached before a `0x00` is found. + /// Reads bytes until a `0x0000` is encountered. Will error if the end of the allocation + /// is reached before a `0x0000` is found. /// /// Most likely, you want to call `Memory::read_wide_str` instead of this method. pub fn read_wide_str( @@ -336,7 +336,9 @@ impl<'tcx, Tag: Copy, Extra: AllocationExtra> Allocation { { assert_eq!(ptr.offset.bytes() as usize as u64, ptr.offset.bytes()); let offset = ptr.offset.bytes() as usize; - Ok(match self.bytes[offset..self.bytes.len() - 1].iter().step_by(2) + // The iterator below yields pairs of adjacent bytes, in order to find 0x0000. + Ok(match + self.bytes[offset..].iter().step_by(2) .zip(self.bytes[(offset+1)..].iter().step_by(2)) .position(|(&l, &r)| l == 0 && r == 0) { Some(size) => { From 883a33bcaad75fdb5715299ebc1c90d1f8f5e6c3 Mon Sep 17 00:00:00 2001 From: JOE1994 Date: Wed, 4 Dec 2019 00:33:36 -0500 Subject: [PATCH 11/11] remove trailing whitespace --- src/librustc/mir/interpret/allocation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustc/mir/interpret/allocation.rs b/src/librustc/mir/interpret/allocation.rs index b60fe8bbb7521..529016cbe95e0 100644 --- a/src/librustc/mir/interpret/allocation.rs +++ b/src/librustc/mir/interpret/allocation.rs @@ -337,7 +337,7 @@ impl<'tcx, Tag: Copy, Extra: AllocationExtra> Allocation { assert_eq!(ptr.offset.bytes() as usize as u64, ptr.offset.bytes()); let offset = ptr.offset.bytes() as usize; // The iterator below yields pairs of adjacent bytes, in order to find 0x0000. - Ok(match + Ok(match self.bytes[offset..].iter().step_by(2) .zip(self.bytes[(offset+1)..].iter().step_by(2)) .position(|(&l, &r)| l == 0 && r == 0) {