From 020a61a74dda8053854ad75f86fd46553b4f2b30 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 2 Jul 2024 13:00:54 -0400 Subject: [PATCH 01/18] add ZipEntry API in src/unstable/read.rs - also use git dep for zstd to pull in removal of R: BufRead bound - move new methods into unstable/read.rs - support getting aes verification info from a raw ZipEntry - flesh out other methods from ZipFile -> ZipEntry - now create a streaming abstraction - move some methods around --- Cargo.toml | 3 +- src/aes.rs | 8 +- src/crc32.rs | 108 +++++++ src/read.rs | 336 +++++++++++++++----- src/types.rs | 13 +- src/unstable.rs | 2 + src/unstable/read.rs | 714 +++++++++++++++++++++++++++++++++++++++++++ src/write.rs | 32 +- src/zipcrypto.rs | 2 +- 9 files changed, 1119 insertions(+), 99 deletions(-) mode change 100644 => 100755 src/unstable.rs create mode 100644 src/unstable/read.rs diff --git a/Cargo.toml b/Cargo.toml index b161f1155..f55bcabfb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,8 @@ time = { workspace = true, optional = true, features = [ "std", ] } zeroize = { version = "1.8.1", optional = true, features = ["zeroize_derive"] } -zstd = { version = "0.13.1", optional = true, default-features = false } +# zstd = { version = "0.13.1", optional = true, default-features = false } +zstd = { git = "https://github.com/gyscos/zstd-rs", rev = "a3738d680542e34b5529207ee30491ed7b69ed71", optional = true, default-features = false } zopfli = { version = "0.8.1", optional = true } deflate64 = { version = "0.1.8", optional = true } lzma-rs = { version = "0.3.0", default-features = false, optional = true } diff --git a/src/aes.rs b/src/aes.rs index 6c9eb371f..c86a8e3a2 100644 --- a/src/aes.rs +++ b/src/aes.rs @@ -66,7 +66,7 @@ pub struct AesReader { data_length: u64, } -impl AesReader { +impl AesReader { pub const fn new(reader: R, aes_mode: AesMode, compressed_size: u64) -> AesReader { let data_length = compressed_size - (PWD_VERIFY_LENGTH + AUTH_CODE_LENGTH + aes_mode.salt_length()) as u64; @@ -77,7 +77,9 @@ impl AesReader { data_length, } } +} +impl AesReader { /// Read the AES header bytes and validate the password. /// /// Even if the validation succeeds, there is still a 1 in 65536 chance that an incorrect @@ -150,7 +152,7 @@ impl AesReader { /// There is a 1 in 65536 chance that an invalid password passes that check. /// After the data has been read and decrypted an HMAC will be checked and provide a final means /// to check if either the password is invalid or if the data has been changed. -pub struct AesReaderValid { +pub struct AesReaderValid { reader: R, data_remaining: u64, cipher: Cipher, @@ -214,7 +216,7 @@ impl Read for AesReaderValid { } } -impl AesReaderValid { +impl AesReaderValid { /// Consumes this decoder, returning the underlying reader. pub fn into_inner(self) -> R { self.reader diff --git a/src/crc32.rs b/src/crc32.rs index 4b2beb62e..c17857c1c 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -83,6 +83,114 @@ impl Read for Crc32Reader { } } +pub(crate) mod non_crypto { + use std::io; + use std::io::prelude::*; + + use crc32fast::Hasher; + + /// Reader that validates the CRC32 when it reaches the EOF. + pub struct Crc32Reader { + inner: R, + hasher: Hasher, + check: u32, + } + + impl Crc32Reader { + /// Get a new Crc32Reader which checks the inner reader against checksum. + pub(crate) fn new(inner: R, checksum: u32) -> Self { + Crc32Reader { + inner, + hasher: Hasher::new(), + check: checksum, + } + } + + fn check_matches(&self) -> Result<(), &'static str> { + let res = self.hasher.clone().finalize(); + if self.check == res { + Ok(()) + } else { + /* TODO: make this into our own Crc32Error error type! */ + Err("Invalid checksum") + } + } + + pub fn into_inner(self) -> R { + self.inner + } + } + + impl Read for Crc32Reader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + /* We want to make sure we only check the hash when the input stream is exhausted. */ + if buf.is_empty() { + /* If the input buf is empty (this shouldn't happen, but isn't guaranteed), we + * still want to "pull" from the source in case it surfaces an i/o error. This will + * always return a count of Ok(0) if successful. */ + return self.inner.read(buf); + } + + let count = self.inner.read(buf)?; + if count == 0 { + return self + .check_matches() + .map(|()| 0) + /* TODO: use io::Error::other for MSRV >=1.74 */ + .map_err(|e| io::Error::new(io::ErrorKind::Other, e)); + } + self.hasher.update(&buf[..count]); + Ok(count) + } + } + + #[cfg(test)] + mod test { + use super::*; + + #[test] + fn test_empty_reader() { + let data: &[u8] = b""; + let mut buf = [0; 1]; + + let mut reader = Crc32Reader::new(data, 0); + assert_eq!(reader.read(&mut buf).unwrap(), 0); + + let mut reader = Crc32Reader::new(data, 1); + assert!(reader + .read(&mut buf) + .unwrap_err() + .to_string() + .contains("Invalid checksum")); + } + + #[test] + fn test_byte_by_byte() { + let data: &[u8] = b"1234"; + let mut buf = [0; 1]; + + let mut reader = Crc32Reader::new(data, 0x9be3e0a3); + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.read(&mut buf).unwrap(), 0); + // Can keep reading 0 bytes after the end + assert_eq!(reader.read(&mut buf).unwrap(), 0); + } + + #[test] + fn test_zero_read() { + let data: &[u8] = b"1234"; + let mut buf = [0; 5]; + + let mut reader = Crc32Reader::new(data, 0x9be3e0a3); + assert_eq!(reader.read(&mut buf[..0]).unwrap(), 0); + assert_eq!(reader.read(&mut buf).unwrap(), 4); + } + } +} + #[cfg(test)] mod test { use super::*; diff --git a/src/read.rs b/src/read.rs index 830a58514..ff23f88a3 100644 --- a/src/read.rs +++ b/src/read.rs @@ -10,7 +10,7 @@ use crate::read::zip_archive::{Shared, SharedBuilder}; use crate::result::{ZipError, ZipResult}; use crate::spec::{self, FixedSizeBlock, Zip32CentralDirectoryEnd, ZIP64_ENTRY_THR}; use crate::types::{ - AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData, + AesMode, AesModeInfo, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData, ZipLocalEntryBlock, }; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; @@ -21,7 +21,6 @@ use std::fs::create_dir_all; use std::io::{self, copy, prelude::*, sink, SeekFrom}; use std::mem; use std::mem::size_of; -use std::ops::Deref; use std::path::{Path, PathBuf}; use std::rc::Rc; use std::sync::{Arc, OnceLock}; @@ -128,9 +127,15 @@ use crate::read::lzma::LzmaDecoder; #[cfg(feature = "xz")] use crate::read::xz::XzDecoder; use crate::result::ZipError::{InvalidArchive, InvalidPassword, UnsupportedArchive}; -use crate::spec::is_dir; use crate::types::ffi::S_IFLNK; use crate::unstable::{path_to_string, LittleEndianReadExt}; + +use crate::crc32::non_crypto::Crc32Reader as NewCrc32Reader; +use crate::unstable::read::{ + construct_decompressing_reader, find_entry_content_range, CryptoEntryReader, CryptoVariant, + ZipEntry, +}; + pub use zip_archive::ZipArchive; #[allow(clippy::large_enum_variant)] @@ -351,9 +356,9 @@ pub(crate) fn find_content<'a>( Ok((reader as &mut dyn Read).take(data.compressed_size)) } -fn find_data_start( +pub(crate) fn find_data_start( data: &ZipFileData, - reader: &mut (impl Read + Seek + Sized), + reader: &mut (impl Read + Seek), ) -> Result { // Go to start of data. reader.seek(io::SeekFrom::Start(data.header_start))?; @@ -388,7 +393,7 @@ pub(crate) fn make_crypto_reader<'a>( using_data_descriptor: bool, reader: io::Take<&'a mut dyn Read>, password: Option<&[u8]>, - aes_info: Option<(AesMode, AesVendorVersion, CompressionMethod)>, + aes_info: Option, #[cfg(feature = "aes-crypto")] compressed_size: u64, ) -> ZipResult> { #[allow(deprecated)] @@ -406,7 +411,14 @@ pub(crate) fn make_crypto_reader<'a>( )) } #[cfg(feature = "aes-crypto")] - (Some(password), Some((aes_mode, vendor_version, _))) => CryptoReader::Aes { + ( + Some(password), + Some(AesModeInfo { + aes_mode, + vendor_version, + .. + }), + ) => CryptoReader::Aes { reader: AesReader::new(reader, aes_mode, compressed_size).validate(password)?, vendor_version, }, @@ -547,6 +559,75 @@ impl ZipArchive { } Some(total) } + + const fn zip64_cde_len() -> usize { + mem::size_of::() + + mem::size_of::() + } + + const fn order_lower_upper_bounds(a: u64, b: u64) -> (u64, u64) { + if a > b { + (b, a) + } else { + (a, b) + } + } + + /// Number of files contained in this zip. + pub fn len(&self) -> usize { + self.shared.files.len() + } + + /// Whether this zip archive contains no files + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes. + /// + /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size + /// of that prepended data. + pub fn offset(&self) -> u64 { + self.shared.offset + } + + /// Get the comment of the zip archive. + pub fn comment(&self) -> &[u8] { + &self.comment + } + + /// Returns an iterator over all the file and directory names in this archive. + pub fn file_names(&self) -> impl Iterator { + self.shared.files.keys().map(|s| s.as_ref()) + } + + /// Get the index of a file entry by name, if it's present. + #[inline(always)] + pub fn index_for_name(&self, name: &str) -> Option { + self.shared.files.get_index_of(name) + } + + /// Get the index of a file entry by path, if it's present. + #[inline(always)] + pub fn index_for_path>(&self, path: T) -> Option { + self.index_for_name(&path_to_string(path)) + } + + #[inline(always)] + fn index_for_name_err(&self, name: impl AsRef) -> Result { + self.index_for_name(name.as_ref()) + .map(Ok) + .unwrap_or(Err(ZipError::FileNotFound)) + } + + /// Get the name of a file entry, if it's present. + #[inline(always)] + pub fn name_for_index(&self, index: usize) -> Option<&str> { + self.shared + .files + .get_index(index) + .map(|(name, _)| name.as_ref()) + } } impl ZipArchive { @@ -664,19 +745,6 @@ impl ZipArchive { }) } - const fn zip64_cde_len() -> usize { - mem::size_of::() - + mem::size_of::() - } - - const fn order_lower_upper_bounds(a: u64, b: u64) -> (u64, u64) { - if a > b { - (b, a) - } else { - (a, b) - } - } - fn get_directory_info_zip64( config: &Config, reader: &mut R, @@ -939,7 +1007,7 @@ impl ZipArchive { let limit_reader = find_content(data, &mut self.reader)?; match data.aes_mode { None => Ok(None), - Some((aes_mode, _, _)) => { + Some(AesModeInfo { aes_mode, .. }) => { let (verification_value, salt) = AesReader::new(limit_reader, aes_mode, data.compressed_size) .get_verification_value_and_salt()?; @@ -1006,6 +1074,8 @@ impl ZipArchive { } let symlink_target = if file.is_symlink() && (cfg!(unix) || cfg!(windows)) { let mut target = Vec::with_capacity(file.size() as usize); + /* FIXME: this is broken: needs to be .read_to_end(), otherwise it writes into an + * empty slice. */ file.read_exact(&mut target)?; Some(target) } else { @@ -1090,34 +1160,6 @@ impl ZipArchive { Ok(()) } - /// Number of files contained in this zip. - pub fn len(&self) -> usize { - self.shared.files.len() - } - - /// Whether this zip archive contains no files - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes. - /// - /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size - /// of that prepended data. - pub fn offset(&self) -> u64 { - self.shared.offset - } - - /// Get the comment of the zip archive. - pub fn comment(&self) -> &[u8] { - &self.comment - } - - /// Returns an iterator over all the file and directory names in this archive. - pub fn file_names(&self) -> impl Iterator { - self.shared.files.keys().map(|s| s.as_ref()) - } - /// Search for a file entry by name, decrypt with given password /// /// # Warning @@ -1140,35 +1182,12 @@ impl ZipArchive { self.by_name_with_optional_password(name, None) } - /// Get the index of a file entry by name, if it's present. - #[inline(always)] - pub fn index_for_name(&self, name: &str) -> Option { - self.shared.files.get_index_of(name) - } - - /// Get the index of a file entry by path, if it's present. - #[inline(always)] - pub fn index_for_path>(&self, path: T) -> Option { - self.index_for_name(&path_to_string(path)) - } - - /// Get the name of a file entry, if it's present. - #[inline(always)] - pub fn name_for_index(&self, index: usize) -> Option<&str> { - self.shared - .files - .get_index(index) - .map(|(name, _)| name.as_ref()) - } - fn by_name_with_optional_password<'a>( &'a mut self, name: &str, password: Option<&[u8]>, ) -> ZipResult> { - let Some(index) = self.shared.files.get_index_of(name) else { - return Err(ZipError::FileNotFound); - }; + let index = self.index_for_name_err(name)?; self.by_index_with_optional_password(index, password) } @@ -1257,6 +1276,146 @@ impl ZipArchive { } } +impl ZipArchive +where + R: Read + Seek, +{ + /// Search for a file entry by name + pub fn by_name_new( + &mut self, + name: impl AsRef, + ) -> ZipResult> { + let index = self.index_for_name_err(name)?; + self.by_index_new(index) + } + + /// Get a contained file by index + pub fn by_index_new(&mut self, file_number: usize) -> ZipResult> { + let Self { + ref mut reader, + ref shared, + .. + } = self; + let (_, data) = shared + .files + .get_index(file_number) + .ok_or(ZipError::FileNotFound)?; + let content = find_entry_content_range(data, reader)?; + let entry_reader = construct_decompressing_reader(&data.compression_method, content)?; + let crc32_reader = NewCrc32Reader::new(entry_reader, data.crc32); + Ok(ZipEntry { + data, + reader: crc32_reader, + }) + } + + /// Get a contained file by name without decompressing it + pub fn by_name_raw_new( + &mut self, + name: impl AsRef, + ) -> ZipResult> { + let index = self.index_for_name_err(name)?; + self.by_index_raw_new(index) + } + + /// Get a contained file by index without decompressing it + pub fn by_index_raw_new( + &mut self, + file_number: usize, + ) -> ZipResult> { + let Self { + ref mut reader, + ref shared, + .. + } = self; + let (_, data) = shared + .files + .get_index(file_number) + .ok_or(ZipError::FileNotFound)?; + let content = find_entry_content_range(data, reader)?; + Ok(ZipEntry { + data, + reader: content, + }) + } + + /// Search for a file entry by name, decrypt with given password + /// + /// # Warning + /// + /// The implementation of the cryptographic algorithms has not + /// gone through a correctness review, and you should assume it is insecure: + /// passwords used with this API may be compromised. + /// + /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us + /// to check for a 1/256 chance that the password is correct. + /// There are many passwords out there that will also pass the validity checks + /// we are able to perform. This is a weakness of the ZipCrypto algorithm, + /// due to its fairly primitive approach to cryptography. + pub fn by_name_decrypt_new( + &mut self, + name: impl AsRef, + password: &[u8], + ) -> ZipResult> { + let index = self.index_for_name_err(name)?; + self.by_index_decrypt_new(index, password) + } + + /// Get a contained file by index, decrypt with given password + /// + /// # Warning + /// + /// The implementation of the cryptographic algorithms has not + /// gone through a correctness review, and you should assume it is insecure: + /// passwords used with this API may be compromised. + /// + /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us + /// to check for a 1/256 chance that the password is correct. + /// There are many passwords out there that will also pass the validity checks + /// we are able to perform. This is a weakness of the ZipCrypto algorithm, + /// due to its fairly primitive approach to cryptography. + pub fn by_index_decrypt_new( + &mut self, + file_number: usize, + password: &[u8], + ) -> ZipResult> { + let Self { + ref mut reader, + ref shared, + .. + } = self; + let (_, data) = shared + .files + .get_index(file_number) + .ok_or(ZipError::FileNotFound)?; + + let content = find_entry_content_range(data, reader)?; + + let final_reader = if data.encrypted { + let crypto_variant = CryptoVariant::from_data(data)?; + let is_ae2_encrypted = crypto_variant.is_ae2_encrypted(); + let crypto_reader = crypto_variant.make_crypto_reader(password, content)?; + let entry_reader = + construct_decompressing_reader(&data.compression_method, crypto_reader)?; + if is_ae2_encrypted { + /* Ae2 voids crc checking: https://www.winzip.com/en/support/aes-encryption/ */ + CryptoEntryReader::Ae2Encrypted(entry_reader) + } else { + CryptoEntryReader::NonAe2Encrypted(NewCrc32Reader::new(entry_reader, data.crc32)) + } + } else { + /* Not encrypted, so do the same as in .by_index_new(): */ + let entry_reader = construct_decompressing_reader(&data.compression_method, content)?; + CryptoEntryReader::Unencrypted(NewCrc32Reader::new(entry_reader, data.crc32)) + }; + + Ok(ZipEntry { + data, + reader: final_reader, + }) + } +} + /// Holds the AES information of a file in the zip archive #[derive(Debug)] #[cfg(feature = "aes-crypto")] @@ -1304,7 +1463,7 @@ fn read_variable_length_byte_field(reader: &mut R, len: usize) -> io::R } /// Parse a central directory entry to collect the information for the file. -fn central_header_to_zip_file_inner( +pub(crate) fn central_header_to_zip_file_inner( reader: &mut R, archive_offset: u64, central_header_start: u64, @@ -1489,9 +1648,27 @@ pub(crate) fn parse_single_extra_field( _ => return Err(ZipError::InvalidArchive("Invalid AES vendor version")), }; match aes_mode { - 0x01 => file.aes_mode = Some((AesMode::Aes128, vendor_version, compression_method)), - 0x02 => file.aes_mode = Some((AesMode::Aes192, vendor_version, compression_method)), - 0x03 => file.aes_mode = Some((AesMode::Aes256, vendor_version, compression_method)), + 0x01 => { + file.aes_mode = Some(AesModeInfo { + aes_mode: AesMode::Aes128, + vendor_version, + compression_method, + }) + } + 0x02 => { + file.aes_mode = Some(AesModeInfo { + aes_mode: AesMode::Aes192, + vendor_version, + compression_method, + }) + } + 0x03 => { + file.aes_mode = Some(AesModeInfo { + aes_mode: AesMode::Aes256, + vendor_version, + compression_method, + }) + } _ => return Err(ZipError::InvalidArchive("Invalid AES encryption strength")), }; file.compression_method = compression_method; @@ -1647,9 +1824,10 @@ impl<'a> ZipFile<'a> { pub fn last_modified(&self) -> Option { self.data.last_modified_time } + /// Returns whether the file is actually a directory pub fn is_dir(&self) -> bool { - is_dir(self.name()) + self.data.is_dir() } /// Returns whether the file is actually a symbolic link @@ -1675,7 +1853,7 @@ impl<'a> ZipFile<'a> { /// Get the extra data of the zip header for this file pub fn extra_data(&self) -> Option<&[u8]> { - self.data.extra_field.as_ref().map(|v| v.deref().deref()) + self.data.extra_field.as_deref().map(|v| v.as_ref()) } /// Get the starting offset of the data of the compressed file diff --git a/src/types.rs b/src/types.rs index 91031a08c..fd5d7f7b0 100644 --- a/src/types.rs +++ b/src/types.rs @@ -420,9 +420,16 @@ impl TryFrom for OffsetDateTime { pub const MIN_VERSION: u8 = 10; pub const DEFAULT_VERSION: u8 = 45; +#[derive(Debug, Copy, Clone)] +pub struct AesModeInfo { + pub aes_mode: AesMode, + pub vendor_version: AesVendorVersion, + pub compression_method: CompressionMethod, +} + /// Structure representing a ZIP file. #[derive(Debug, Clone, Default)] -pub struct ZipFileData { +pub(crate) struct ZipFileData { /// Compatibility of the file attribute information pub system: System, /// Specification version @@ -470,7 +477,7 @@ pub struct ZipFileData { /// Reserve local ZIP64 extra field pub large_file: bool, /// AES mode if applicable - pub aes_mode: Option<(AesMode, AesVendorVersion, CompressionMethod)>, + pub aes_mode: Option, /// Specifies where in the extra data the AES metadata starts pub aes_extra_data_start: u64, @@ -621,7 +628,7 @@ impl ZipFileData { extra_data_start: Option, aes_extra_data_start: u64, compression_method: crate::compression::CompressionMethod, - aes_mode: Option<(AesMode, AesVendorVersion, CompressionMethod)>, + aes_mode: Option, extra_field: &[u8], ) -> Self where diff --git a/src/unstable.rs b/src/unstable.rs old mode 100644 new mode 100755 index 81cf18ea5..3c6a725cf --- a/src/unstable.rs +++ b/src/unstable.rs @@ -5,6 +5,8 @@ use std::io; use std::io::{Read, Write}; use std::path::{Component, Path, MAIN_SEPARATOR}; +pub mod read; + /// Provides high level API for reading from a stream. pub mod stream { pub use crate::read::stream::*; diff --git a/src/unstable/read.rs b/src/unstable/read.rs new file mode 100644 index 000000000..dc969e69a --- /dev/null +++ b/src/unstable/read.rs @@ -0,0 +1,714 @@ +//! Alternate implementation of [`crate::read`]. + +use crate::compression::CompressionMethod; +use crate::crc32::non_crypto::Crc32Reader; +use crate::extra_fields::ExtraField; +use crate::read::find_data_start; +use crate::result::{ZipError, ZipResult}; +use crate::types::ffi::S_IFLNK; +use crate::types::{AesModeInfo, AesVendorVersion, DateTime, ZipFileData}; +use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; + +#[cfg(feature = "lzma")] +use crate::read::lzma::LzmaDecoder; +#[cfg(feature = "xz")] +use crate::read::xz::XzDecoder; +#[cfg(feature = "aes-crypto")] +use crate::{ + aes::{AesReader, AesReaderValid}, + read::AesInfo, +}; + +#[cfg(feature = "bzip2")] +use bzip2::read::BzDecoder; +#[cfg(feature = "deflate64")] +use deflate64::Deflate64Decoder; +#[cfg(feature = "deflate-flate2")] +use flate2::read::DeflateDecoder; +#[cfg(feature = "zstd")] +use zstd::stream::read::Decoder as ZstdDecoder; + +use std::io::{self, Read, Seek}; +use std::path::PathBuf; + +pub(crate) enum EntryReader { + Stored(R), + #[cfg(feature = "_deflate-any")] + Deflated(DeflateDecoder), + #[cfg(feature = "deflate64")] + Deflate64(Deflate64Decoder>), + #[cfg(feature = "bzip2")] + Bzip2(BzDecoder), + #[cfg(feature = "zstd")] + Zstd(ZstdDecoder<'static, io::BufReader>), + #[cfg(feature = "lzma")] + /* According to clippy, this is >30x larger than the other variants, so we box it to avoid + * unnecessary large stack allocations. */ + Lzma(Box>), + #[cfg(feature = "xz")] + Xz(XzDecoder), +} + +impl Read for EntryReader +where + R: Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + Self::Stored(r) => r.read(buf), + #[cfg(feature = "_deflate-any")] + Self::Deflated(r) => r.read(buf), + #[cfg(feature = "deflate64")] + Self::Deflate64(r) => r.read(buf), + #[cfg(feature = "bzip2")] + Self::Bzip2(r) => r.read(buf), + #[cfg(feature = "zstd")] + Self::Zstd(r) => r.read(buf), + #[cfg(feature = "lzma")] + Self::Lzma(r) => r.read(buf), + #[cfg(feature = "xz")] + Self::Xz(r) => r.read(buf), + } + } +} + +/// A struct for reading a zip file +pub struct ZipEntry<'a, R> { + pub(crate) data: &'a ZipFileData, + pub(crate) reader: R, +} + +impl<'a, R> Read for ZipEntry<'a, R> +where + R: Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.reader.read(buf) + } +} + +impl<'a, R> ZipEntry<'a, R> +where + R: Read, +{ + /// Returns the verification value and salt for the AES encryption of the file + /// + /// # Returns + /// + /// - None if the file is not encrypted with AES + #[cfg(feature = "aes-crypto")] + pub fn get_aes_verification_key_and_salt(self) -> ZipResult> { + let Self { data, reader } = self; + if let Some(AesModeInfo { aes_mode, .. }) = data.aes_mode { + let (verification_value, salt) = AesReader::new(reader, aes_mode, data.compressed_size) + .get_verification_value_and_salt()?; + let aes_info = AesInfo { + aes_mode, + verification_value, + salt, + }; + Ok(Some(aes_info)) + } else { + Ok(None) + } + } +} + +mod sealed_data { + use super::ZipFileData; + + #[doc(hidden)] + #[allow(private_interfaces)] + pub trait ArchiveData { + fn data(&self) -> &ZipFileData; + } +} + +pub trait ArchiveEntry: Read + sealed_data::ArchiveData { + /// Get the version of the file + fn version_made_by(&self) -> (u8, u8) { + ( + self.data().version_made_by / 10, + self.data().version_made_by % 10, + ) + } + + /// Get the name of the file + /// + /// # Warnings + /// + /// It is dangerous to use this name directly when extracting an archive. + /// It may contain an absolute path (`/etc/shadow`), or break out of the + /// current directory (`../runtime`). Carelessly writing to these paths + /// allows an attacker to craft a ZIP archive that will overwrite critical + /// files. + /// + /// You can use the [`ZipFile::enclosed_name`] method to validate the name + /// as a safe path. + fn name(&self) -> &str { + &self.data().file_name + } + + /// Get the name of the file, in the raw (internal) byte representation. + /// + /// The encoding of this data is currently undefined. + fn name_raw(&self) -> &[u8] { + &self.data().file_name_raw + } + + /// Rewrite the path, ignoring any path components with special meaning. + /// + /// - Absolute paths are made relative + /// - [`ParentDir`]s are ignored + /// - Truncates the filename at a NULL byte + /// + /// This is appropriate if you need to be able to extract *something* from + /// any archive, but will easily misrepresent trivial paths like + /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, + /// [`ZipFile::enclosed_name`] is the better option in most scenarios. + /// + /// [`ParentDir`]: `Component::ParentDir` + fn mangled_name(&self) -> PathBuf { + self.data().file_name_sanitized() + } + + /// Ensure the file path is safe to use as a [`Path`]. + /// + /// - It can't contain NULL bytes + /// - It can't resolve to a path outside the current directory + /// > `foo/../bar` is fine, `foo/../../bar` is not. + /// - It can't be an absolute path + /// + /// This will read well-formed ZIP files correctly, and is resistant + /// to path-based exploits. It is recommended over + /// [`ZipFile::mangled_name`]. + fn enclosed_name(&self) -> Option { + self.data().enclosed_name() + } + + /// Get the comment of the file + fn comment(&self) -> &str { + &self.data().file_comment + } + + /// Get the compression method used to store the file + fn compression(&self) -> CompressionMethod { + self.data().compression_method + } + + /// Get the size of the file, in bytes, in the archive + fn compressed_size(&self) -> u64 { + self.data().compressed_size + } + + /// Get the size of the file, in bytes, when uncompressed + fn size(&self) -> u64 { + self.data().uncompressed_size + } + + /// Get the time the file was last modified + fn last_modified(&self) -> Option { + self.data().last_modified_time + } + + /// Returns whether the file is actually a directory + fn is_dir(&self) -> bool { + self.data().is_dir() + } + + /// Returns whether the file is actually a symbolic link + fn is_symlink(&self) -> bool { + self.unix_mode() + .is_some_and(|mode| mode & S_IFLNK == S_IFLNK) + } + + /// Returns whether the file is a normal file (i.e. not a directory or symlink) + fn is_file(&self) -> bool { + !self.is_dir() && !self.is_symlink() + } + + /// Get unix mode for the file + fn unix_mode(&self) -> Option { + self.data().unix_mode() + } + + /// Get the CRC32 hash of the original file + fn crc32(&self) -> u32 { + self.data().crc32 + } + + /// Get the extra data of the zip header for this file + fn extra_data(&self) -> Option<&[u8]> { + self.data().extra_field.as_deref().map(|v| v.as_ref()) + } + + /// Get the starting offset of the data of the compressed file + fn data_start(&self) -> u64 { + *self.data().data_start.get().unwrap() + } + + /// Get the starting offset of the zip header for this file + fn header_start(&self) -> u64 { + self.data().header_start + } + /// Get the starting offset of the zip header in the central directory for this file + fn central_header_start(&self) -> u64 { + self.data().central_header_start + } + + /// iterate through all extra fields + fn extra_data_fields(&self) -> impl Iterator { + self.data().extra_fields.iter() + } +} + +impl<'a, R> sealed_data::ArchiveData for ZipEntry<'a, R> { + #[allow(private_interfaces)] + fn data(&self) -> &ZipFileData { + self.data + } +} + +impl<'a, R> ArchiveEntry for ZipEntry<'a, R> where R: Read {} + +pub(crate) fn find_entry_content_range( + data: &ZipFileData, + mut reader: R, +) -> Result, ZipError> +where + R: Read + Seek, +{ + // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result! + let data_start = match data.data_start.get() { + Some(data_start) => *data_start, + None => find_data_start(data, &mut reader)?, + }; + + reader.seek(io::SeekFrom::Start(data_start))?; + Ok(reader.take(data.compressed_size)) +} + +pub(crate) fn construct_decompressing_reader( + compression_method: &CompressionMethod, + reader: R, +) -> Result, ZipError> +where + /* TODO: this really shouldn't be required upon construction (especially since the reader + * doesn't need to be mutable, indicating the Read capability isn't used), but multiple of our + * constituent constructors require it. We should be able to make upstream PRs to fix these. */ + R: Read, +{ + match compression_method { + &CompressionMethod::Stored => Ok(EntryReader::Stored(reader)), + #[cfg(feature = "_deflate-any")] + &CompressionMethod::Deflated => { + let deflate_reader = DeflateDecoder::new(reader); + Ok(EntryReader::Deflated(deflate_reader)) + } + #[cfg(feature = "deflate64")] + &CompressionMethod::Deflate64 => { + let deflate64_reader = Deflate64Decoder::new(reader); + Ok(EntryReader::Deflate64(deflate64_reader)) + } + #[cfg(feature = "bzip2")] + &CompressionMethod::Bzip2 => { + let bzip2_reader = BzDecoder::new(reader); + Ok(EntryReader::Bzip2(bzip2_reader)) + } + #[cfg(feature = "zstd")] + &CompressionMethod::Zstd => { + let zstd_reader = ZstdDecoder::new(reader).unwrap(); + Ok(EntryReader::Zstd(zstd_reader)) + } + #[cfg(feature = "lzma")] + &CompressionMethod::Lzma => { + let reader = LzmaDecoder::new(reader); + Ok(EntryReader::Lzma(Box::new(reader))) + } + #[cfg(feature = "xz")] + &CompressionMethod::Xz => { + let reader = XzDecoder::new(reader); + Ok(EntryReader::Xz(reader)) + } + /* TODO: make this into its own EntryReadError error type! */ + _ => Err(ZipError::UnsupportedArchive( + "Compression method not supported", + )), + } +} + +pub(crate) enum CryptoReader { + ZipCrypto(ZipCryptoReaderValid), + #[cfg(feature = "aes-crypto")] + Aes(AesReaderValid), +} + +impl Read for CryptoReader +where + R: Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + CryptoReader::ZipCrypto(r) => r.read(buf), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes(r) => r.read(buf), + } + } +} + +pub(crate) enum CryptoVariant { + Crc32(u32), + DateTime(DateTime), + #[cfg(feature = "aes-crypto")] + Aes { + info: AesModeInfo, + compressed_size: u64, + }, +} + +impl CryptoVariant { + pub fn from_data(data: &ZipFileData) -> Result { + assert!( + data.encrypted, + "should never enter this method except on encrypted entries" + ); + #[allow(deprecated)] + if let CompressionMethod::Unsupported(_) = data.compression_method { + /* TODO: make this into its own EntryReadError error type! */ + return Err(ZipError::UnsupportedArchive( + "Compression method not supported", + )); + } + if let Some(info) = data.aes_mode { + #[cfg(not(feature = "aes-crypto"))] + /* TODO: make this into its own EntryReadError error type! */ + return Err(ZipError::UnsupportedArchive( + "AES encrypted files cannot be decrypted without the aes-crypto feature.", + )); + #[cfg(feature = "aes-crypto")] + return Ok(Self::Aes { + info, + compressed_size: data.compressed_size, + }); + } + if let Some(last_modified_time) = data.last_modified_time { + /* TODO: use let chains once stabilized! */ + if data.using_data_descriptor { + return Ok(Self::DateTime(last_modified_time)); + } + } + Ok(Self::Crc32(data.crc32)) + } + + /// Returns `true` if the data is encrypted using AE2. + pub const fn is_ae2_encrypted(&self) -> bool { + match self { + #[cfg(feature = "aes-crypto")] + Self::Aes { + info: + AesModeInfo { + vendor_version: AesVendorVersion::Ae2, + .. + }, + .. + } => true, + _ => false, + } + } + + pub fn make_crypto_reader( + self, + password: &[u8], + reader: R, + ) -> Result, ZipError> + where + R: Read, + { + match self { + Self::Aes { + info: AesModeInfo { aes_mode, .. }, + compressed_size, + } => { + assert!( + cfg!(feature = "aes-crypto"), + "should never get here unless aes support was enabled" + ); + let aes_reader = + AesReader::new(reader, aes_mode, compressed_size).validate(password)?; + Ok(CryptoReader::Aes(aes_reader)) + } + Self::DateTime(last_modified_time) => { + let validator = ZipCryptoValidator::InfoZipMsdosTime(last_modified_time.timepart()); + let zc_reader = ZipCryptoReader::new(reader, password).validate(validator)?; + Ok(CryptoReader::ZipCrypto(zc_reader)) + } + Self::Crc32(crc32) => { + let validator = ZipCryptoValidator::PkzipCrc32(crc32); + let zc_reader = ZipCryptoReader::new(reader, password).validate(validator)?; + Ok(CryptoReader::ZipCrypto(zc_reader)) + } + } + } +} + +pub(crate) enum CryptoEntryReader { + Unencrypted(Crc32Reader>), + Ae2Encrypted(EntryReader>), + NonAe2Encrypted(Crc32Reader>>), +} + +impl Read for CryptoEntryReader +where + R: Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + Self::Unencrypted(r) => r.read(buf), + Self::Ae2Encrypted(r) => r.read(buf), + Self::NonAe2Encrypted(r) => r.read(buf), + } + } +} + +pub mod streaming { + use super::{ + construct_decompressing_reader, sealed_data, ArchiveEntry, Crc32Reader, ZipError, + ZipFileData, ZipResult, + }; + + use crate::read::{central_header_to_zip_file_inner, parse_extra_field}; + use crate::spec::{self, FixedSizeBlock}; + use crate::types::{ZipCentralEntryBlock, ZipLocalEntryBlock}; + + use std::io::{self, Read}; + use std::mem; + use std::ops; + + pub struct StreamingArchive { + reader: R, + remaining_before_next_entry: u64, + first_metadata_block: Option>, + } + + impl StreamingArchive { + pub const fn new(reader: R) -> Self { + Self { + reader, + remaining_before_next_entry: 0, + first_metadata_block: None, + } + } + + pub fn into_inner(self) -> R { + let Self { reader, .. } = self; + reader + } + } + + impl StreamingArchive + where + R: Read, + { + pub fn next_entry(&mut self) -> ZipResult>> { + // We can't use the typical ::parse() method, as we follow separate code paths depending + // on the "magic" value (since the magic value will be from the central directory header + // if we've finished iterating over all the actual files). + /* TODO: smallvec? */ + let Self { + ref mut reader, + ref mut remaining_before_next_entry, + ref mut first_metadata_block, + } = self; + if *remaining_before_next_entry > 0 { + io::copy( + &mut (reader as &mut dyn Read).take(*remaining_before_next_entry), + &mut io::sink(), + )?; + *remaining_before_next_entry = 0; + } + + let mut block = [0u8; mem::size_of::()]; + reader.read_exact(&mut block)?; + let block: Box<[u8]> = block.into(); + + let signature = spec::Magic::from_first_le_bytes(&block); + + match signature { + spec::Magic::LOCAL_FILE_HEADER_SIGNATURE => (), + spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => { + assert!( + first_metadata_block.is_none(), + "metadata block should never be set except exactly once" + ); + *first_metadata_block = Some(block); + return Ok(None); + } + _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR), + } + + let block = ZipLocalEntryBlock::interpret(&block)?; + + let mut data = ZipFileData::from_local_block(block, reader)?; + + match parse_extra_field(&mut data) { + /* FIXME: check for the right error type here instead of accepting any old i/o + * error. */ + Ok(..) | Err(ZipError::Io(..)) => {} + Err(e) => return Err(e), + } + + let limit_reader = + DrainWrapper::new(data.compressed_size, remaining_before_next_entry, reader); + let entry_reader = + construct_decompressing_reader(&data.compression_method, limit_reader)?; + let crc32_reader = Crc32Reader::new(entry_reader, data.crc32); + Ok(Some(StreamingZipEntry { + data, + reader: crc32_reader, + })) + } + + pub fn next_metadata_entry(&mut self) -> ZipResult> { + let Self { + ref mut reader, + ref mut remaining_before_next_entry, + ref mut first_metadata_block, + } = self; + if *remaining_before_next_entry > 0 { + io::copy( + &mut (reader as &mut dyn Read).take(*remaining_before_next_entry), + &mut io::sink(), + )?; + *remaining_before_next_entry = 0; + } + + // Parse central header + let block = match first_metadata_block.take() { + None => match ZipCentralEntryBlock::parse(reader) { + Ok(block) => block, + Err(ZipError::Io(e)) if e.kind() == io::ErrorKind::UnexpectedEof => { + return Ok(None); + } + Err(e) => return Err(e), + }, + Some(block) => ZipCentralEntryBlock::parse(&mut io::Cursor::new(block))?, + }; + + // Give archive_offset and central_header_start dummy value 0, since + // they are not used in the output. + let archive_offset = 0; + let central_header_start = 0; + + let data = central_header_to_zip_file_inner( + reader, + archive_offset, + central_header_start, + block, + )?; + Ok(Some(ZipStreamFileMetadata(data))) + } + } + + struct DrainWrapper<'a, R> { + full_extent: usize, + current_progress: usize, + remaining_to_notify: &'a mut u64, + inner: R, + } + + impl<'a, R> DrainWrapper<'a, R> { + pub fn new(extent: u64, remaining_to_notify: &'a mut u64, inner: R) -> Self { + Self { + full_extent: extent.try_into().unwrap(), + current_progress: 0, + remaining_to_notify, + inner, + } + } + + fn remaining(&self) -> usize { + debug_assert!(self.current_progress <= self.full_extent); + self.full_extent - self.current_progress + } + } + + impl<'a, R> Read for DrainWrapper<'a, R> + where + R: Read, + { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let to_read = self.remaining().min(buf.len()); + /* If the input is exhausted, or `buf` was empty, just forward any error. */ + if to_read == 0 { + return self.inner.read(&mut []); + } + + let count = self.inner.read(&mut buf[..to_read])?; + if count == 0 { + /* `to_read` was >0, so this was unexpected: */ + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "failed to read expected number of bytes for zip entry from stream", + )); + } + + debug_assert!(count <= to_read); + self.current_progress += count; + Ok(count) + } + } + + impl<'a, R> ops::Drop for DrainWrapper<'a, R> { + fn drop(&mut self) { + assert_eq!( + 0, *self.remaining_to_notify, + "remaining should always be zero before drop is called" + ); + *self.remaining_to_notify = self.remaining().try_into().unwrap(); + } + } + + /// A struct for reading a zip file from a stream. + pub struct StreamingZipEntry { + pub(crate) data: ZipFileData, + pub(crate) reader: R, + } + + impl Read for StreamingZipEntry + where + R: Read, + { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.reader.read(buf) + } + } + + impl sealed_data::ArchiveData for StreamingZipEntry { + #[allow(private_interfaces)] + fn data(&self) -> &ZipFileData { + &self.data + } + } + + impl ArchiveEntry for StreamingZipEntry where R: Read {} + + /// Additional metadata for the file. + #[derive(Debug)] + pub struct ZipStreamFileMetadata(ZipFileData); + + impl sealed_data::ArchiveData for ZipStreamFileMetadata { + #[allow(private_interfaces)] + fn data(&self) -> &ZipFileData { + let Self(data) = self; + data + } + } + + impl Read for ZipStreamFileMetadata { + fn read(&mut self, _buf: &mut [u8]) -> io::Result { + Ok(0) + } + } + + impl ArchiveEntry for ZipStreamFileMetadata {} +} diff --git a/src/write.rs b/src/write.rs index a8a31a53f..2c9cf53b3 100644 --- a/src/write.rs +++ b/src/write.rs @@ -3,16 +3,17 @@ #[cfg(feature = "aes-crypto")] use crate::aes::AesWriter; use crate::compression::CompressionMethod; -use crate::read::{ - find_content, parse_single_extra_field, Config, ZipArchive, ZipFile, ZipFileReader, -}; +use crate::read::ZipFile; +use crate::read::{parse_single_extra_field, Config, ZipArchive}; use crate::result::{ZipError, ZipResult}; use crate::spec::{self, FixedSizeBlock, Zip32CDEBlock}; #[cfg(feature = "aes-crypto")] use crate::types::AesMode; use crate::types::{ - ffi, AesVendorVersion, DateTime, ZipFileData, ZipLocalEntryBlock, ZipRawValues, MIN_VERSION, + ffi, AesModeInfo, AesVendorVersion, DateTime, ZipFileData, ZipLocalEntryBlock, ZipRawValues, + MIN_VERSION, }; +use crate::unstable::read::{find_entry_content_range, ArchiveEntry, ZipEntry}; use crate::write::ffi::S_IFLNK; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64; @@ -683,10 +684,8 @@ impl ZipWriter { compressed_size, uncompressed_size, }; - let mut reader = BufReader::new(ZipFileReader::Raw(find_content( - src_data, - self.inner.get_plain(), - )?)); + let mut reader = + BufReader::new(find_entry_content_range(src_data, self.inner.get_plain())?); let mut copy = Vec::with_capacity(compressed_size as usize); reader.read_to_end(&mut copy)?; drop(reader); @@ -902,7 +901,11 @@ impl ZipWriter { #[cfg(feature = "aes-crypto")] Some(EncryptWith::Aes { mode, .. }) => ( CompressionMethod::Aes, - Some((mode, AesVendorVersion::Ae2, options.compression_method)), + Some(AesModeInfo { + aes_mode: mode, + vendor_version: AesVendorVersion::Ae2, + compression_method: options.compression_method, + }), ), _ => (options.compression_method, None), }; @@ -1058,7 +1061,7 @@ impl ZipWriter { // unencrypted contents. // // C.f. https://www.winzip.com/en/support/aes-encryption/#crc-faq - aes_mode.1 = if self.stats.bytes_written < 20 { + aes_mode.vendor_version = if self.stats.bytes_written < 20 { crc = false; AesVendorVersion::Ae2 } else { @@ -1247,7 +1250,7 @@ impl ZipWriter { /// Add a new file using the already compressed data from a ZIP file being read and renames it, this /// allows faster copies of the `ZipFile` since there is no need to decompress and compress it again. /// Any `ZipFile` metadata is copied and not checked, for example the file CRC. - + /// /// ```no_run /// use std::fs::File; /// use std::io::{Read, Seek, Write}; @@ -1818,7 +1821,12 @@ fn update_aes_extra_data( writer: &mut W, file: &mut ZipFileData, ) -> ZipResult<()> { - let Some((aes_mode, version, compression_method)) = file.aes_mode else { + let Some(AesModeInfo { + aes_mode, + vendor_version: version, + compression_method, + }) = file.aes_mode + else { return Ok(()); }; diff --git a/src/zipcrypto.rs b/src/zipcrypto.rs index bd81b5c43..5561ac3d5 100644 --- a/src/zipcrypto.rs +++ b/src/zipcrypto.rs @@ -202,7 +202,7 @@ impl std::io::Read for ZipCryptoReaderValid { } } -impl ZipCryptoReaderValid { +impl ZipCryptoReaderValid { /// Consumes this decoder, returning the underlying reader. pub fn into_inner(self) -> R { self.reader.file From 3abac52d1999350bb2959ac1e6ee60bc4650b7d9 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 16:40:14 -0400 Subject: [PATCH 02/18] remove read_zipfile_from_stream() --- benches/read_metadata.rs | 31 ++++++-- examples/stdin_info.rs | 7 +- fuzz/fuzz_targets/fuzz_read.rs | 24 +++++-- src/read.rs | 76 ++------------------ src/read/stream.rs | 127 ++++----------------------------- src/unstable/read.rs | 19 ++++- src/write.rs | 2 +- 7 files changed, 88 insertions(+), 198 deletions(-) mode change 100644 => 100755 fuzz/fuzz_targets/fuzz_read.rs diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index 73f2b26ed..1c11d7302 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -7,7 +7,14 @@ use bencher::Bencher; use getrandom::getrandom; use tempdir::TempDir; use zip::write::SimpleFileOptions; -use zip::{result::ZipResult, CompressionMethod, ZipArchive, ZipWriter}; +use zip::{ + result::ZipResult, + unstable::{ + read::streaming::{StreamingZipEntry, ZipStreamFileMetadata}, + stream::{ZipStreamReader, ZipStreamVisitor}, + }, + CompressionMethod, ZipArchive, ZipWriter, +}; const FILE_COUNT: usize = 15_000; const FILE_SIZE: usize = 1024; @@ -106,12 +113,24 @@ fn parse_stream_archive(bench: &mut Bencher) { let out = dir.path().join("bench-out.zip"); fs::write(&out, &bytes).unwrap(); + struct V; + impl ZipStreamVisitor for V { + fn visit_file(&mut self, _file: &mut StreamingZipEntry) -> ZipResult<()> { + Ok(()) + } + + fn visit_additional_metadata( + &mut self, + _metadata: &ZipStreamFileMetadata, + ) -> ZipResult<()> { + Ok(()) + } + } + bench.iter(|| { - let mut f = fs::File::open(&out).unwrap(); - while zip::read::read_zipfile_from_stream(&mut f) - .unwrap() - .is_some() - {} + let f = fs::File::open(&out).unwrap(); + let archive = ZipStreamReader::new(f); + archive.visit(&mut V).unwrap(); }); bench.bytes = bytes.len() as u64; } diff --git a/examples/stdin_info.rs b/examples/stdin_info.rs index a609916a0..25f8f895b 100644 --- a/examples/stdin_info.rs +++ b/examples/stdin_info.rs @@ -1,16 +1,19 @@ use std::io::{self, Read}; +use zip::unstable::read::{streaming::StreamingArchive, ArchiveEntry}; + fn main() { std::process::exit(real_main()); } fn real_main() -> i32 { let stdin = io::stdin(); - let mut stdin_handle = stdin.lock(); + let stdin_handle = stdin.lock(); let mut buf = [0u8; 16]; + let mut archive = StreamingArchive::new(stdin_handle); loop { - match zip::read::read_zipfile_from_stream(&mut stdin_handle) { + match archive.next_entry() { Ok(Some(mut file)) => { println!( "{}: {} bytes ({} bytes packed)", diff --git a/fuzz/fuzz_targets/fuzz_read.rs b/fuzz/fuzz_targets/fuzz_read.rs old mode 100644 new mode 100755 index 78fe670ec..4a4327528 --- a/fuzz/fuzz_targets/fuzz_read.rs +++ b/fuzz/fuzz_targets/fuzz_read.rs @@ -3,7 +3,10 @@ use libfuzzer_sys::fuzz_target; use std::io::{Read, Seek, SeekFrom}; use tikv_jemallocator::Jemalloc; -use zip::read::read_zipfile_from_stream; +use zip::unstable::{ + read::streaming::{StreamingArchive, StreamingZipEntry, ZipStreamFileMetadata}, + stream::ZipStreamVisitor, +}; const MAX_BYTES_TO_READ: u64 = 1 << 24; @@ -18,11 +21,24 @@ fn decompress_all(data: &[u8]) -> Result<(), Box> { let mut file = zip.by_index(i)?.take(MAX_BYTES_TO_READ); std::io::copy(&mut file, &mut std::io::sink())?; } + let mut reader = zip.into_inner(); - reader.seek(SeekFrom::Start(0))?; - while let Ok(Some(mut file)) = read_zipfile_from_stream(&mut reader) { - std::io::copy(&mut file, &mut std::io::sink())?; + reader.rewind()?; + + struct V; + impl ZipStreamVisitor for V { + fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()> { + std::io::copy(&mut file, &mut std::io::sink())? + } + + fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()> { + Ok(()) + } } + + let archive = StreamingArchive::new(reader)?; + archive.visit(&mut V)?; + Ok(()) } diff --git a/src/read.rs b/src/read.rs index ff23f88a3..7821b3ec2 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1917,71 +1917,6 @@ impl<'a> Drop for ZipFile<'a> { } } -/// Read ZipFile structures from a non-seekable reader. -/// -/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions -/// as some information will be missing when reading this manner. -/// -/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is -/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory -/// is encountered. No more files should be read after this. -/// -/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after -/// the structure is done. -/// -/// Missing fields are: -/// * `comment`: set to an empty string -/// * `data_start`: set to 0 -/// * `external_attributes`: `unix_mode()`: will return None -pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult>> { - // We can't use the typical ::parse() method, as we follow separate code paths depending on the - // "magic" value (since the magic value will be from the central directory header if we've - // finished iterating over all the actual files). - /* TODO: smallvec? */ - let mut block = [0u8; mem::size_of::()]; - reader.read_exact(&mut block)?; - let block: Box<[u8]> = block.into(); - - let signature = spec::Magic::from_first_le_bytes(&block); - - match signature { - spec::Magic::LOCAL_FILE_HEADER_SIGNATURE => (), - spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None), - _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR), - } - - let block = ZipLocalEntryBlock::interpret(&block)?; - - let mut result = ZipFileData::from_local_block(block, reader)?; - - match parse_extra_field(&mut result) { - Ok(..) | Err(ZipError::Io(..)) => {} - Err(e) => return Err(e), - } - - let limit_reader = (reader as &'a mut dyn Read).take(result.compressed_size); - - let result_crc32 = result.crc32; - let result_compression_method = result.compression_method; - let crypto_reader = make_crypto_reader( - result_compression_method, - result_crc32, - result.last_modified_time, - result.using_data_descriptor, - limit_reader, - None, - None, - #[cfg(feature = "aes-crypto")] - result.compressed_size, - )?; - - Ok(Some(ZipFile { - data: Cow::Owned(result), - crypto_reader: None, - reader: make_reader(result_compression_method, result_crc32, crypto_reader)?, - })) -} - #[cfg(test)] mod test { use crate::result::ZipResult; @@ -2034,16 +1969,13 @@ mod test { #[test] fn zip_read_streaming() { - use super::read_zipfile_from_stream; + use crate::unstable::read::streaming::StreamingArchive; let mut v = Vec::new(); v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip")); - let mut reader = Cursor::new(v); - loop { - if read_zipfile_from_stream(&mut reader).unwrap().is_none() { - break; - } - } + let reader = Cursor::new(v); + let mut archive = StreamingArchive::new(reader); + while archive.next_entry().unwrap().is_some() {} } #[test] diff --git a/src/read/stream.rs b/src/read/stream.rs index 7fb76e70c..c6c27aca4 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -1,50 +1,33 @@ use std::fs; use std::io::{self, Read}; -use std::path::{Path, PathBuf}; +use std::path::Path; -use super::{ - central_header_to_zip_file_inner, read_zipfile_from_stream, ZipCentralEntryBlock, ZipError, - ZipFile, ZipFileData, ZipResult, +use super::{ZipError, ZipResult}; +use crate::unstable::read::{ + streaming::{StreamingArchive, StreamingZipEntry, ZipStreamFileMetadata}, + ArchiveEntry, }; -use crate::spec::FixedSizeBlock; /// Stream decoder for zip. #[derive(Debug)] -pub struct ZipStreamReader(R); +pub struct ZipStreamReader(StreamingArchive); impl ZipStreamReader { /// Create a new ZipStreamReader pub const fn new(reader: R) -> Self { - Self(reader) + Self(StreamingArchive::new(reader)) } } impl ZipStreamReader { - fn parse_central_directory(&mut self) -> ZipResult { - // Give archive_offset and central_header_start dummy value 0, since - // they are not used in the output. - let archive_offset = 0; - let central_header_start = 0; - - // Parse central header - let block = ZipCentralEntryBlock::parse(&mut self.0)?; - let file = central_header_to_zip_file_inner( - &mut self.0, - archive_offset, - central_header_start, - block, - )?; - Ok(ZipStreamFileMetadata(file)) - } - /// Iterate over the stream and extract all file and their /// metadata. pub fn visit(mut self, visitor: &mut V) -> ZipResult<()> { - while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? { + while let Some(mut file) = self.0.next_entry()? { visitor.visit_file(&mut file)?; } - while let Ok(metadata) = self.parse_central_directory() { + while let Some(metadata) = self.0.next_metadata_entry()? { visitor.visit_additional_metadata(&metadata)?; } @@ -59,7 +42,7 @@ impl ZipStreamReader { pub fn extract>(self, directory: P) -> ZipResult<()> { struct Extractor<'a>(&'a Path); impl ZipStreamVisitor for Extractor<'_> { - fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()> { let filepath = file .enclosed_name() .ok_or(ZipError::InvalidArchive("Invalid file path"))?; @@ -113,7 +96,7 @@ pub trait ZipStreamVisitor { /// - `comment`: set to an empty string /// - `data_start`: set to 0 /// - `external_attributes`: `unix_mode()`: will return None - fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>; + fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()>; /// This function is guranteed to be called after all `visit_file`s. /// @@ -121,86 +104,6 @@ pub trait ZipStreamVisitor { fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>; } -/// Additional metadata for the file. -#[derive(Debug)] -pub struct ZipStreamFileMetadata(ZipFileData); - -impl ZipStreamFileMetadata { - /// Get the name of the file - /// - /// # Warnings - /// - /// It is dangerous to use this name directly when extracting an archive. - /// It may contain an absolute path (`/etc/shadow`), or break out of the - /// current directory (`../runtime`). Carelessly writing to these paths - /// allows an attacker to craft a ZIP archive that will overwrite critical - /// files. - /// - /// You can use the [`ZipFile::enclosed_name`] method to validate the name - /// as a safe path. - pub fn name(&self) -> &str { - &self.0.file_name - } - - /// Get the name of the file, in the raw (internal) byte representation. - /// - /// The encoding of this data is currently undefined. - pub fn name_raw(&self) -> &[u8] { - &self.0.file_name_raw - } - - /// Rewrite the path, ignoring any path components with special meaning. - /// - /// - Absolute paths are made relative - /// - [std::path::Component::ParentDir]s are ignored - /// - Truncates the filename at a NULL byte - /// - /// This is appropriate if you need to be able to extract *something* from - /// any archive, but will easily misrepresent trivial paths like - /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, - /// [`ZipFile::enclosed_name`] is the better option in most scenarios. - pub fn mangled_name(&self) -> PathBuf { - self.0.file_name_sanitized() - } - - /// Ensure the file path is safe to use as a [`Path`]. - /// - /// - It can't contain NULL bytes - /// - It can't resolve to a path outside the current directory - /// > `foo/../bar` is fine, `foo/../../bar` is not. - /// - It can't be an absolute path - /// - /// This will read well-formed ZIP files correctly, and is resistant - /// to path-based exploits. It is recommended over - /// [`ZipFile::mangled_name`]. - pub fn enclosed_name(&self) -> Option { - self.0.enclosed_name() - } - - /// Returns whether the file is actually a directory - pub fn is_dir(&self) -> bool { - self.name() - .chars() - .next_back() - .map_or(false, |c| c == '/' || c == '\\') - } - - /// Returns whether the file is a regular file - pub fn is_file(&self) -> bool { - !self.is_dir() - } - - /// Get the comment of the file - pub fn comment(&self) -> &str { - &self.0.file_comment - } - - /// Get unix mode for the file - pub const fn unix_mode(&self) -> Option { - self.0.unix_mode() - } -} - #[cfg(test)] mod test { use super::*; @@ -208,7 +111,7 @@ mod test { struct DummyVisitor; impl ZipStreamVisitor for DummyVisitor { - fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, _file: &mut StreamingZipEntry) -> ZipResult<()> { Ok(()) } @@ -224,7 +127,7 @@ mod test { #[derive(Default, Debug, Eq, PartialEq)] struct CounterVisitor(u64, u64); impl ZipStreamVisitor for CounterVisitor { - fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, _file: &mut StreamingZipEntry) -> ZipResult<()> { self.0 += 1; Ok(()) } @@ -267,7 +170,7 @@ mod test { filenames: BTreeSet>, } impl ZipStreamVisitor for V { - fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()> { if file.is_file() { self.filenames.insert(file.name().into()); } @@ -304,7 +207,7 @@ mod test { filenames: BTreeSet>, } impl ZipStreamVisitor for V { - fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()> { let full_name = file.enclosed_name().unwrap(); let file_name = full_name.file_name().unwrap().to_str().unwrap(); assert!( diff --git a/src/unstable/read.rs b/src/unstable/read.rs index dc969e69a..b602531c6 100644 --- a/src/unstable/read.rs +++ b/src/unstable/read.rs @@ -484,6 +484,7 @@ pub mod streaming { use std::mem; use std::ops; + #[derive(Debug)] pub struct StreamingArchive { reader: R, remaining_before_next_entry: u64, @@ -591,7 +592,23 @@ pub mod streaming { } Err(e) => return Err(e), }, - Some(block) => ZipCentralEntryBlock::parse(&mut io::Cursor::new(block))?, + Some(block) => { + assert_eq!(block.len(), mem::size_of::()); + assert!( + mem::size_of::() + < mem::size_of::() + ); + + let mut remaining_block = [0u8; mem::size_of::() + - mem::size_of::()]; + reader.read_exact(remaining_block.as_mut())?; + + let mut joined_block = [0u8; mem::size_of::()]; + joined_block[..block.len()].copy_from_slice(&block); + joined_block[block.len()..].copy_from_slice(&remaining_block); + + ZipCentralEntryBlock::interpret(&joined_block)? + } }; // Give archive_offset and central_header_start dummy value 0, since diff --git a/src/write.rs b/src/write.rs index 2c9cf53b3..c3df64a2a 100644 --- a/src/write.rs +++ b/src/write.rs @@ -13,7 +13,7 @@ use crate::types::{ ffi, AesModeInfo, AesVendorVersion, DateTime, ZipFileData, ZipLocalEntryBlock, ZipRawValues, MIN_VERSION, }; -use crate::unstable::read::{find_entry_content_range, ArchiveEntry, ZipEntry}; +use crate::unstable::read::find_entry_content_range; use crate::write::ffi::S_IFLNK; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64; From b463f025188a54b6015b51e63d084572de3bffc0 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 16:51:21 -0400 Subject: [PATCH 03/18] fix up some public methods to use new impls --- src/read.rs | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/src/read.rs b/src/read.rs index 7821b3ec2..d5191257a 100644 --- a/src/read.rs +++ b/src/read.rs @@ -132,8 +132,8 @@ use crate::unstable::{path_to_string, LittleEndianReadExt}; use crate::crc32::non_crypto::Crc32Reader as NewCrc32Reader; use crate::unstable::read::{ - construct_decompressing_reader, find_entry_content_range, CryptoEntryReader, CryptoVariant, - ZipEntry, + construct_decompressing_reader, find_entry_content_range, ArchiveEntry, CryptoEntryReader, + CryptoVariant, ZipEntry, }; pub use zip_archive::ZipArchive; @@ -998,27 +998,8 @@ impl ZipArchive { &mut self, file_number: usize, ) -> ZipResult> { - let (_, data) = self - .shared - .files - .get_index(file_number) - .ok_or(ZipError::FileNotFound)?; - - let limit_reader = find_content(data, &mut self.reader)?; - match data.aes_mode { - None => Ok(None), - Some(AesModeInfo { aes_mode, .. }) => { - let (verification_value, salt) = - AesReader::new(limit_reader, aes_mode, data.compressed_size) - .get_verification_value_and_salt()?; - let aes_info = AesInfo { - aes_mode, - verification_value, - salt, - }; - Ok(Some(aes_info)) - } - } + let entry = self.by_index_raw_new(file_number)?; + entry.get_aes_verification_key_and_salt() } /// Read a ZIP archive, collecting the files it contains. @@ -1061,7 +1042,7 @@ impl ZipArchive { #[cfg(unix)] let mut files_by_unix_mode = Vec::new(); for i in 0..self.len() { - let mut file = self.by_index(i)?; + let mut file = self.by_index_new(i)?; let filepath = file .enclosed_name() .ok_or(ZipError::InvalidArchive("Invalid file path"))?; @@ -1117,7 +1098,7 @@ impl ZipArchive { } continue; } - let mut file = self.by_index(i)?; + let mut file = self.by_index_new(i)?; let mut outfile = fs::File::create(&outpath)?; io::copy(&mut file, &mut outfile)?; #[cfg(unix)] From f0faf2446375fc8e28cbc5f046c40649c3ece739 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 17:25:20 -0400 Subject: [PATCH 04/18] remove the old ZipFile API entirely --- benches/merge_archive.rs | 4 +- examples/extract.rs | 2 + examples/file_info.rs | 2 + src/crc32.rs | 187 ++------ src/read.rs | 734 ++------------------------------ src/unstable/read.rs | 2 +- src/write.rs | 36 +- tests/aes_encryption.rs | 5 +- tests/deflate64.rs | 2 +- tests/end_to_end.rs | 9 +- tests/lzma.rs | 2 +- tests/xz.rs | 2 +- tests/zip64_large.rs | 2 + tests/zip_crypto.rs | 1 + tests/zip_extended_timestamp.rs | 2 +- 15 files changed, 98 insertions(+), 894 deletions(-) mode change 100644 => 100755 tests/aes_encryption.rs mode change 100644 => 100755 tests/deflate64.rs mode change 100644 => 100755 tests/lzma.rs mode change 100644 => 100755 tests/xz.rs diff --git a/benches/merge_archive.rs b/benches/merge_archive.rs index ff07e1a6c..698e0a80c 100644 --- a/benches/merge_archive.rs +++ b/benches/merge_archive.rs @@ -41,8 +41,8 @@ fn perform_raw_copy_file( mut target: ZipWriter, ) -> ZipResult> { for i in 0..src.len() { - let entry = src.by_index(i)?; - target.raw_copy_file(entry)?; + let entry = src.by_index_raw(i)?; + target.copy_file(entry)?; } Ok(target) } diff --git a/examples/extract.rs b/examples/extract.rs index 57cfba0d7..aaa652663 100644 --- a/examples/extract.rs +++ b/examples/extract.rs @@ -1,6 +1,8 @@ use std::fs; use std::io; +use zip::unstable::read::ArchiveEntry; + fn main() { std::process::exit(real_main()); } diff --git a/examples/file_info.rs b/examples/file_info.rs index 84878c583..c29c356a4 100644 --- a/examples/file_info.rs +++ b/examples/file_info.rs @@ -1,6 +1,8 @@ use std::fs; use std::io::BufReader; +use zip::unstable::read::ArchiveEntry; + fn main() { std::process::exit(real_main()); } diff --git a/src/crc32.rs b/src/crc32.rs index c17857c1c..770b148b0 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -10,25 +10,26 @@ pub struct Crc32Reader { inner: R, hasher: Hasher, check: u32, - /// Signals if `inner` stores aes encrypted data. - /// AE-2 encrypted data doesn't use crc and sets the value to 0. - enabled: bool, } impl Crc32Reader { /// Get a new Crc32Reader which checks the inner reader against checksum. - /// The check is disabled if `ae2_encrypted == true`. - pub(crate) fn new(inner: R, checksum: u32, ae2_encrypted: bool) -> Crc32Reader { + pub(crate) fn new(inner: R, checksum: u32) -> Self { Crc32Reader { inner, hasher: Hasher::new(), check: checksum, - enabled: !ae2_encrypted, } } - fn check_matches(&self) -> bool { - self.check == self.hasher.clone().finalize() + fn check_matches(&self) -> Result<(), &'static str> { + let res = self.hasher.clone().finalize(); + if self.check == res { + Ok(()) + } else { + /* TODO: make this into our own Crc32Error error type! */ + Err("Invalid checksum") + } } pub fn into_inner(self) -> R { @@ -36,158 +37,26 @@ impl Crc32Reader { } } -#[cold] -fn invalid_checksum() -> io::Error { - io::Error::new(io::ErrorKind::InvalidData, "Invalid checksum") -} - impl Read for Crc32Reader { fn read(&mut self, buf: &mut [u8]) -> io::Result { - let count = self.inner.read(buf)?; - - if self.enabled { - if count == 0 && !buf.is_empty() && !self.check_matches() { - return Err(invalid_checksum()); - } - self.hasher.update(&buf[..count]); - } - Ok(count) - } - - fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { - let start = buf.len(); - let n = self.inner.read_to_end(buf)?; - - if self.enabled { - self.hasher.update(&buf[start..]); - if !self.check_matches() { - return Err(invalid_checksum()); - } - } - - Ok(n) - } - - fn read_to_string(&mut self, buf: &mut String) -> io::Result { - let start = buf.len(); - let n = self.inner.read_to_string(buf)?; - - if self.enabled { - self.hasher.update(&buf.as_bytes()[start..]); - if !self.check_matches() { - return Err(invalid_checksum()); - } - } - - Ok(n) - } -} - -pub(crate) mod non_crypto { - use std::io; - use std::io::prelude::*; - - use crc32fast::Hasher; - - /// Reader that validates the CRC32 when it reaches the EOF. - pub struct Crc32Reader { - inner: R, - hasher: Hasher, - check: u32, - } - - impl Crc32Reader { - /// Get a new Crc32Reader which checks the inner reader against checksum. - pub(crate) fn new(inner: R, checksum: u32) -> Self { - Crc32Reader { - inner, - hasher: Hasher::new(), - check: checksum, - } + /* We want to make sure we only check the hash when the input stream is exhausted. */ + if buf.is_empty() { + /* If the input buf is empty (this shouldn't happen, but isn't guaranteed), we + * still want to "pull" from the source in case it surfaces an i/o error. This will + * always return a count of Ok(0) if successful. */ + return self.inner.read(buf); } - fn check_matches(&self) -> Result<(), &'static str> { - let res = self.hasher.clone().finalize(); - if self.check == res { - Ok(()) - } else { - /* TODO: make this into our own Crc32Error error type! */ - Err("Invalid checksum") - } - } - - pub fn into_inner(self) -> R { - self.inner - } - } - - impl Read for Crc32Reader { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - /* We want to make sure we only check the hash when the input stream is exhausted. */ - if buf.is_empty() { - /* If the input buf is empty (this shouldn't happen, but isn't guaranteed), we - * still want to "pull" from the source in case it surfaces an i/o error. This will - * always return a count of Ok(0) if successful. */ - return self.inner.read(buf); - } - - let count = self.inner.read(buf)?; - if count == 0 { - return self - .check_matches() - .map(|()| 0) - /* TODO: use io::Error::other for MSRV >=1.74 */ - .map_err(|e| io::Error::new(io::ErrorKind::Other, e)); - } - self.hasher.update(&buf[..count]); - Ok(count) - } - } - - #[cfg(test)] - mod test { - use super::*; - - #[test] - fn test_empty_reader() { - let data: &[u8] = b""; - let mut buf = [0; 1]; - - let mut reader = Crc32Reader::new(data, 0); - assert_eq!(reader.read(&mut buf).unwrap(), 0); - - let mut reader = Crc32Reader::new(data, 1); - assert!(reader - .read(&mut buf) - .unwrap_err() - .to_string() - .contains("Invalid checksum")); - } - - #[test] - fn test_byte_by_byte() { - let data: &[u8] = b"1234"; - let mut buf = [0; 1]; - - let mut reader = Crc32Reader::new(data, 0x9be3e0a3); - assert_eq!(reader.read(&mut buf).unwrap(), 1); - assert_eq!(reader.read(&mut buf).unwrap(), 1); - assert_eq!(reader.read(&mut buf).unwrap(), 1); - assert_eq!(reader.read(&mut buf).unwrap(), 1); - assert_eq!(reader.read(&mut buf).unwrap(), 0); - // Can keep reading 0 bytes after the end - assert_eq!(reader.read(&mut buf).unwrap(), 0); - } - - #[test] - fn test_zero_read() { - let data: &[u8] = b"1234"; - let mut buf = [0; 5]; - - let mut reader = Crc32Reader::new(data, 0x9be3e0a3); - assert_eq!(reader.read(&mut buf[..0]).unwrap(), 0); - assert_eq!(reader.read(&mut buf).unwrap(), 4); + let count = self.inner.read(buf)?; + if count == 0 { + return self + .check_matches() + .map(|()| 0) + /* TODO: use io::Error::other for MSRV >=1.74 */ + .map_err(|e| io::Error::new(io::ErrorKind::Other, e)); } + self.hasher.update(&buf[..count]); + Ok(count) } } @@ -200,10 +69,10 @@ mod test { let data: &[u8] = b""; let mut buf = [0; 1]; - let mut reader = Crc32Reader::new(data, 0, false); + let mut reader = Crc32Reader::new(data, 0); assert_eq!(reader.read(&mut buf).unwrap(), 0); - let mut reader = Crc32Reader::new(data, 1, false); + let mut reader = Crc32Reader::new(data, 1); assert!(reader .read(&mut buf) .unwrap_err() @@ -216,7 +85,7 @@ mod test { let data: &[u8] = b"1234"; let mut buf = [0; 1]; - let mut reader = Crc32Reader::new(data, 0x9be3e0a3, false); + let mut reader = Crc32Reader::new(data, 0x9be3e0a3); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); @@ -231,7 +100,7 @@ mod test { let data: &[u8] = b"1234"; let mut buf = [0; 5]; - let mut reader = Crc32Reader::new(data, 0x9be3e0a3, false); + let mut reader = Crc32Reader::new(data, 0x9be3e0a3); assert_eq!(reader.read(&mut buf[..0]).unwrap(), 0); assert_eq!(reader.read(&mut buf).unwrap(), 4); } diff --git a/src/read.rs b/src/read.rs index d5191257a..7e9bc76e7 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1,10 +1,7 @@ //! Types for reading ZIP archives -#[cfg(feature = "aes-crypto")] -use crate::aes::{AesReader, AesReaderValid}; use crate::compression::CompressionMethod; use crate::cp437::FromCp437; -use crate::crc32::Crc32Reader; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::read::zip_archive::{Shared, SharedBuilder}; use crate::result::{ZipError, ZipResult}; @@ -13,30 +10,16 @@ use crate::types::{ AesMode, AesModeInfo, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData, ZipLocalEntryBlock, }; -use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; -use std::borrow::Cow; use std::ffi::OsString; use std::fs::create_dir_all; -use std::io::{self, copy, prelude::*, sink, SeekFrom}; +use std::io::{self, prelude::*, SeekFrom}; use std::mem; use std::mem::size_of; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::rc::Rc; use std::sync::{Arc, OnceLock}; -#[cfg(feature = "deflate-flate2")] -use flate2::read::DeflateDecoder; - -#[cfg(feature = "deflate64")] -use deflate64::Deflate64Decoder; - -#[cfg(feature = "bzip2")] -use bzip2::read::BzDecoder; - -#[cfg(feature = "zstd")] -use zstd::stream::read::Decoder as ZstdDecoder; - mod config; pub use config::*; @@ -122,15 +105,10 @@ pub(crate) mod zip_archive { #[cfg(feature = "aes-crypto")] use crate::aes::PWD_VERIFY_LENGTH; use crate::extra_fields::UnicodeExtraField; -#[cfg(feature = "lzma")] -use crate::read::lzma::LzmaDecoder; -#[cfg(feature = "xz")] -use crate::read::xz::XzDecoder; -use crate::result::ZipError::{InvalidArchive, InvalidPassword, UnsupportedArchive}; -use crate::types::ffi::S_IFLNK; +use crate::result::ZipError::InvalidArchive; use crate::unstable::{path_to_string, LittleEndianReadExt}; -use crate::crc32::non_crypto::Crc32Reader as NewCrc32Reader; +use crate::crc32::Crc32Reader; use crate::unstable::read::{ construct_decompressing_reader, find_entry_content_range, ArchiveEntry, CryptoEntryReader, CryptoVariant, ZipEntry, @@ -138,224 +116,6 @@ use crate::unstable::read::{ pub use zip_archive::ZipArchive; -#[allow(clippy::large_enum_variant)] -pub(crate) enum CryptoReader<'a> { - Plaintext(io::Take<&'a mut dyn Read>), - ZipCrypto(ZipCryptoReaderValid>), - #[cfg(feature = "aes-crypto")] - Aes { - reader: AesReaderValid>, - vendor_version: AesVendorVersion, - }, -} - -impl<'a> Read for CryptoReader<'a> { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - match self { - CryptoReader::Plaintext(r) => r.read(buf), - CryptoReader::ZipCrypto(r) => r.read(buf), - #[cfg(feature = "aes-crypto")] - CryptoReader::Aes { reader: r, .. } => r.read(buf), - } - } - - fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { - match self { - CryptoReader::Plaintext(r) => r.read_to_end(buf), - CryptoReader::ZipCrypto(r) => r.read_to_end(buf), - #[cfg(feature = "aes-crypto")] - CryptoReader::Aes { reader: r, .. } => r.read_to_end(buf), - } - } - - fn read_to_string(&mut self, buf: &mut String) -> io::Result { - match self { - CryptoReader::Plaintext(r) => r.read_to_string(buf), - CryptoReader::ZipCrypto(r) => r.read_to_string(buf), - #[cfg(feature = "aes-crypto")] - CryptoReader::Aes { reader: r, .. } => r.read_to_string(buf), - } - } -} - -impl<'a> CryptoReader<'a> { - /// Consumes this decoder, returning the underlying reader. - pub fn into_inner(self) -> io::Take<&'a mut dyn Read> { - match self { - CryptoReader::Plaintext(r) => r, - CryptoReader::ZipCrypto(r) => r.into_inner(), - #[cfg(feature = "aes-crypto")] - CryptoReader::Aes { reader: r, .. } => r.into_inner(), - } - } - - /// Returns `true` if the data is encrypted using AE2. - pub const fn is_ae2_encrypted(&self) -> bool { - #[cfg(feature = "aes-crypto")] - return matches!( - self, - CryptoReader::Aes { - vendor_version: AesVendorVersion::Ae2, - .. - } - ); - #[cfg(not(feature = "aes-crypto"))] - false - } -} - -pub(crate) enum ZipFileReader<'a> { - NoReader, - Raw(io::Take<&'a mut dyn Read>), - Stored(Crc32Reader>), - #[cfg(feature = "_deflate-any")] - Deflated(Crc32Reader>>), - #[cfg(feature = "deflate64")] - Deflate64(Crc32Reader>>>), - #[cfg(feature = "bzip2")] - Bzip2(Crc32Reader>>), - #[cfg(feature = "zstd")] - Zstd(Crc32Reader>>>), - #[cfg(feature = "lzma")] - Lzma(Crc32Reader>>>), - #[cfg(feature = "xz")] - Xz(Crc32Reader>>), -} - -impl<'a> Read for ZipFileReader<'a> { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - match self { - ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), - ZipFileReader::Raw(r) => r.read(buf), - ZipFileReader::Stored(r) => r.read(buf), - #[cfg(feature = "_deflate-any")] - ZipFileReader::Deflated(r) => r.read(buf), - #[cfg(feature = "deflate64")] - ZipFileReader::Deflate64(r) => r.read(buf), - #[cfg(feature = "bzip2")] - ZipFileReader::Bzip2(r) => r.read(buf), - #[cfg(feature = "zstd")] - ZipFileReader::Zstd(r) => r.read(buf), - #[cfg(feature = "lzma")] - ZipFileReader::Lzma(r) => r.read(buf), - #[cfg(feature = "xz")] - ZipFileReader::Xz(r) => r.read(buf), - } - } - - fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { - match self { - ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), - ZipFileReader::Raw(r) => r.read_exact(buf), - ZipFileReader::Stored(r) => r.read_exact(buf), - #[cfg(feature = "_deflate-any")] - ZipFileReader::Deflated(r) => r.read_exact(buf), - #[cfg(feature = "deflate64")] - ZipFileReader::Deflate64(r) => r.read_exact(buf), - #[cfg(feature = "bzip2")] - ZipFileReader::Bzip2(r) => r.read_exact(buf), - #[cfg(feature = "zstd")] - ZipFileReader::Zstd(r) => r.read_exact(buf), - #[cfg(feature = "lzma")] - ZipFileReader::Lzma(r) => r.read_exact(buf), - #[cfg(feature = "xz")] - ZipFileReader::Xz(r) => r.read_exact(buf), - } - } - - fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { - match self { - ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), - ZipFileReader::Raw(r) => r.read_to_end(buf), - ZipFileReader::Stored(r) => r.read_to_end(buf), - #[cfg(feature = "_deflate-any")] - ZipFileReader::Deflated(r) => r.read_to_end(buf), - #[cfg(feature = "deflate64")] - ZipFileReader::Deflate64(r) => r.read_to_end(buf), - #[cfg(feature = "bzip2")] - ZipFileReader::Bzip2(r) => r.read_to_end(buf), - #[cfg(feature = "zstd")] - ZipFileReader::Zstd(r) => r.read_to_end(buf), - #[cfg(feature = "lzma")] - ZipFileReader::Lzma(r) => r.read_to_end(buf), - #[cfg(feature = "xz")] - ZipFileReader::Xz(r) => r.read_to_end(buf), - } - } - - fn read_to_string(&mut self, buf: &mut String) -> io::Result { - match self { - ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), - ZipFileReader::Raw(r) => r.read_to_string(buf), - ZipFileReader::Stored(r) => r.read_to_string(buf), - #[cfg(feature = "_deflate-any")] - ZipFileReader::Deflated(r) => r.read_to_string(buf), - #[cfg(feature = "deflate64")] - ZipFileReader::Deflate64(r) => r.read_to_string(buf), - #[cfg(feature = "bzip2")] - ZipFileReader::Bzip2(r) => r.read_to_string(buf), - #[cfg(feature = "zstd")] - ZipFileReader::Zstd(r) => r.read_to_string(buf), - #[cfg(feature = "lzma")] - ZipFileReader::Lzma(r) => r.read_to_string(buf), - #[cfg(feature = "xz")] - ZipFileReader::Xz(r) => r.read_to_string(buf), - } - } -} - -impl<'a> ZipFileReader<'a> { - /// Consumes this decoder, returning the underlying reader. - pub fn drain(self) { - let mut inner = match self { - ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), - ZipFileReader::Raw(r) => r, - ZipFileReader::Stored(r) => r.into_inner().into_inner(), - #[cfg(feature = "_deflate-any")] - ZipFileReader::Deflated(r) => r.into_inner().into_inner().into_inner(), - #[cfg(feature = "deflate64")] - ZipFileReader::Deflate64(r) => r.into_inner().into_inner().into_inner().into_inner(), - #[cfg(feature = "bzip2")] - ZipFileReader::Bzip2(r) => r.into_inner().into_inner().into_inner(), - #[cfg(feature = "zstd")] - ZipFileReader::Zstd(r) => r.into_inner().finish().into_inner().into_inner(), - #[cfg(feature = "lzma")] - ZipFileReader::Lzma(r) => { - // Lzma reader owns its buffer rather than mutably borrowing it, so we have to drop - // it separately - if let Ok(mut remaining) = r.into_inner().finish() { - let _ = copy(&mut remaining, &mut sink()); - } - return; - } - #[cfg(feature = "xz")] - ZipFileReader::Xz(r) => r.into_inner().into_inner().into_inner(), - }; - let _ = copy(&mut inner, &mut sink()); - } -} - -/// A struct for reading a zip file -pub struct ZipFile<'a> { - pub(crate) data: Cow<'a, ZipFileData>, - pub(crate) crypto_reader: Option>, - pub(crate) reader: ZipFileReader<'a>, -} - -pub(crate) fn find_content<'a>( - data: &ZipFileData, - reader: &'a mut (impl Read + Seek), -) -> ZipResult> { - // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result! - let data_start = match data.data_start.get() { - Some(data_start) => *data_start, - None => find_data_start(data, reader)?, - }; - - reader.seek(io::SeekFrom::Start(data_start))?; - Ok((reader as &mut dyn Read).take(data.compressed_size)) -} - pub(crate) fn find_data_start( data: &ZipFileData, reader: &mut (impl Read + Seek), @@ -385,131 +145,6 @@ pub(crate) fn find_data_start( Ok(data_start) } -#[allow(clippy::too_many_arguments)] -pub(crate) fn make_crypto_reader<'a>( - compression_method: CompressionMethod, - crc32: u32, - mut last_modified_time: Option, - using_data_descriptor: bool, - reader: io::Take<&'a mut dyn Read>, - password: Option<&[u8]>, - aes_info: Option, - #[cfg(feature = "aes-crypto")] compressed_size: u64, -) -> ZipResult> { - #[allow(deprecated)] - { - if let CompressionMethod::Unsupported(_) = compression_method { - return unsupported_zip_error("Compression method not supported"); - } - } - - let reader = match (password, aes_info) { - #[cfg(not(feature = "aes-crypto"))] - (Some(_), Some(_)) => { - return Err(ZipError::UnsupportedArchive( - "AES encrypted files cannot be decrypted without the aes-crypto feature.", - )) - } - #[cfg(feature = "aes-crypto")] - ( - Some(password), - Some(AesModeInfo { - aes_mode, - vendor_version, - .. - }), - ) => CryptoReader::Aes { - reader: AesReader::new(reader, aes_mode, compressed_size).validate(password)?, - vendor_version, - }, - (Some(password), None) => { - if !using_data_descriptor { - last_modified_time = None; - } - let validator = if let Some(last_modified_time) = last_modified_time { - ZipCryptoValidator::InfoZipMsdosTime(last_modified_time.timepart()) - } else { - ZipCryptoValidator::PkzipCrc32(crc32) - }; - CryptoReader::ZipCrypto(ZipCryptoReader::new(reader, password).validate(validator)?) - } - (None, Some(_)) => return Err(InvalidPassword), - (None, None) => CryptoReader::Plaintext(reader), - }; - Ok(reader) -} - -pub(crate) fn make_reader( - compression_method: CompressionMethod, - crc32: u32, - reader: CryptoReader, -) -> ZipResult { - let ae2_encrypted = reader.is_ae2_encrypted(); - - match compression_method { - CompressionMethod::Stored => Ok(ZipFileReader::Stored(Crc32Reader::new( - reader, - crc32, - ae2_encrypted, - ))), - #[cfg(feature = "_deflate-any")] - CompressionMethod::Deflated => { - let deflate_reader = DeflateDecoder::new(reader); - Ok(ZipFileReader::Deflated(Crc32Reader::new( - deflate_reader, - crc32, - ae2_encrypted, - ))) - } - #[cfg(feature = "deflate64")] - CompressionMethod::Deflate64 => { - let deflate64_reader = Deflate64Decoder::new(reader); - Ok(ZipFileReader::Deflate64(Crc32Reader::new( - deflate64_reader, - crc32, - ae2_encrypted, - ))) - } - #[cfg(feature = "bzip2")] - CompressionMethod::Bzip2 => { - let bzip2_reader = BzDecoder::new(reader); - Ok(ZipFileReader::Bzip2(Crc32Reader::new( - bzip2_reader, - crc32, - ae2_encrypted, - ))) - } - #[cfg(feature = "zstd")] - CompressionMethod::Zstd => { - let zstd_reader = ZstdDecoder::new(reader).unwrap(); - Ok(ZipFileReader::Zstd(Crc32Reader::new( - zstd_reader, - crc32, - ae2_encrypted, - ))) - } - #[cfg(feature = "lzma")] - CompressionMethod::Lzma => { - let reader = LzmaDecoder::new(reader); - Ok(ZipFileReader::Lzma(Crc32Reader::new( - Box::new(reader), - crc32, - ae2_encrypted, - ))) - } - #[cfg(feature = "xz")] - CompressionMethod::Xz => { - let reader = XzDecoder::new(reader); - Ok(ZipFileReader::Xz(Crc32Reader::new( - reader, - crc32, - ae2_encrypted, - ))) - } - _ => Err(UnsupportedArchive("Compression method not supported")), - } -} - #[derive(Debug)] pub(crate) struct CentralDirectoryInfo { pub(crate) archive_offset: u64, @@ -998,7 +633,7 @@ impl ZipArchive { &mut self, file_number: usize, ) -> ZipResult> { - let entry = self.by_index_raw_new(file_number)?; + let entry = self.by_index_raw(file_number)?; entry.get_aes_verification_key_and_salt() } @@ -1042,7 +677,7 @@ impl ZipArchive { #[cfg(unix)] let mut files_by_unix_mode = Vec::new(); for i in 0..self.len() { - let mut file = self.by_index_new(i)?; + let mut file = self.by_index(i)?; let filepath = file .enclosed_name() .ok_or(ZipError::InvalidArchive("Invalid file path"))?; @@ -1098,7 +733,7 @@ impl ZipArchive { } continue; } - let mut file = self.by_index_new(i)?; + let mut file = self.by_index(i)?; let mut outfile = fs::File::create(&outpath)?; io::copy(&mut file, &mut outfile)?; #[cfg(unix)] @@ -1141,114 +776,6 @@ impl ZipArchive { Ok(()) } - /// Search for a file entry by name, decrypt with given password - /// - /// # Warning - /// - /// The implementation of the cryptographic algorithms has not - /// gone through a correctness review, and you should assume it is insecure: - /// passwords used with this API may be compromised. - /// - /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us - /// to check for a 1/256 chance that the password is correct. - /// There are many passwords out there that will also pass the validity checks - /// we are able to perform. This is a weakness of the ZipCrypto algorithm, - /// due to its fairly primitive approach to cryptography. - pub fn by_name_decrypt(&mut self, name: &str, password: &[u8]) -> ZipResult { - self.by_name_with_optional_password(name, Some(password)) - } - - /// Search for a file entry by name - pub fn by_name(&mut self, name: &str) -> ZipResult { - self.by_name_with_optional_password(name, None) - } - - fn by_name_with_optional_password<'a>( - &'a mut self, - name: &str, - password: Option<&[u8]>, - ) -> ZipResult> { - let index = self.index_for_name_err(name)?; - self.by_index_with_optional_password(index, password) - } - - /// Get a contained file by index, decrypt with given password - /// - /// # Warning - /// - /// The implementation of the cryptographic algorithms has not - /// gone through a correctness review, and you should assume it is insecure: - /// passwords used with this API may be compromised. - /// - /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us - /// to check for a 1/256 chance that the password is correct. - /// There are many passwords out there that will also pass the validity checks - /// we are able to perform. This is a weakness of the ZipCrypto algorithm, - /// due to its fairly primitive approach to cryptography. - pub fn by_index_decrypt( - &mut self, - file_number: usize, - password: &[u8], - ) -> ZipResult> { - self.by_index_with_optional_password(file_number, Some(password)) - } - - /// Get a contained file by index - pub fn by_index(&mut self, file_number: usize) -> ZipResult> { - self.by_index_with_optional_password(file_number, None) - } - - /// Get a contained file by index without decompressing it - pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult> { - let reader = &mut self.reader; - let (_, data) = self - .shared - .files - .get_index(file_number) - .ok_or(ZipError::FileNotFound)?; - Ok(ZipFile { - crypto_reader: None, - reader: ZipFileReader::Raw(find_content(data, reader)?), - data: Cow::Borrowed(data), - }) - } - - fn by_index_with_optional_password( - &mut self, - file_number: usize, - mut password: Option<&[u8]>, - ) -> ZipResult> { - let (_, data) = self - .shared - .files - .get_index(file_number) - .ok_or(ZipError::FileNotFound)?; - - match (password, data.encrypted) { - (None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)), - (Some(_), false) => password = None, //Password supplied, but none needed! Discard. - _ => {} - } - let limit_reader = find_content(data, &mut self.reader)?; - - let crypto_reader = make_crypto_reader( - data.compression_method, - data.crc32, - data.last_modified_time, - data.using_data_descriptor, - limit_reader, - password, - data.aes_mode, - #[cfg(feature = "aes-crypto")] - data.compressed_size, - )?; - Ok(ZipFile { - crypto_reader: Some(crypto_reader), - reader: ZipFileReader::NoReader, - data: Cow::Borrowed(data), - }) - } - /// Unwrap and return the inner reader object /// /// The position of the reader is undefined. @@ -1262,16 +789,13 @@ where R: Read + Seek, { /// Search for a file entry by name - pub fn by_name_new( - &mut self, - name: impl AsRef, - ) -> ZipResult> { + pub fn by_name(&mut self, name: impl AsRef) -> ZipResult> { let index = self.index_for_name_err(name)?; - self.by_index_new(index) + self.by_index(index) } /// Get a contained file by index - pub fn by_index_new(&mut self, file_number: usize) -> ZipResult> { + pub fn by_index(&mut self, file_number: usize) -> ZipResult> { let Self { ref mut reader, ref shared, @@ -1281,9 +805,15 @@ where .files .get_index(file_number) .ok_or(ZipError::FileNotFound)?; + + /* Don't allow users to read out an encrypted entry without providing a password. */ + if data.encrypted { + return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)); + } + let content = find_entry_content_range(data, reader)?; let entry_reader = construct_decompressing_reader(&data.compression_method, content)?; - let crc32_reader = NewCrc32Reader::new(entry_reader, data.crc32); + let crc32_reader = Crc32Reader::new(entry_reader, data.crc32); Ok(ZipEntry { data, reader: crc32_reader, @@ -1291,19 +821,16 @@ where } /// Get a contained file by name without decompressing it - pub fn by_name_raw_new( + pub fn by_name_raw( &mut self, name: impl AsRef, ) -> ZipResult> { let index = self.index_for_name_err(name)?; - self.by_index_raw_new(index) + self.by_index_raw(index) } /// Get a contained file by index without decompressing it - pub fn by_index_raw_new( - &mut self, - file_number: usize, - ) -> ZipResult> { + pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult> { let Self { ref mut reader, ref shared, @@ -1333,13 +860,13 @@ where /// There are many passwords out there that will also pass the validity checks /// we are able to perform. This is a weakness of the ZipCrypto algorithm, /// due to its fairly primitive approach to cryptography. - pub fn by_name_decrypt_new( + pub fn by_name_decrypt( &mut self, name: impl AsRef, password: &[u8], ) -> ZipResult> { let index = self.index_for_name_err(name)?; - self.by_index_decrypt_new(index, password) + self.by_index_decrypt(index, password) } /// Get a contained file by index, decrypt with given password @@ -1355,7 +882,7 @@ where /// There are many passwords out there that will also pass the validity checks /// we are able to perform. This is a weakness of the ZipCrypto algorithm, /// due to its fairly primitive approach to cryptography. - pub fn by_index_decrypt_new( + pub fn by_index_decrypt( &mut self, file_number: usize, password: &[u8], @@ -1382,12 +909,12 @@ where /* Ae2 voids crc checking: https://www.winzip.com/en/support/aes-encryption/ */ CryptoEntryReader::Ae2Encrypted(entry_reader) } else { - CryptoEntryReader::NonAe2Encrypted(NewCrc32Reader::new(entry_reader, data.crc32)) + CryptoEntryReader::NonAe2Encrypted(Crc32Reader::new(entry_reader, data.crc32)) } } else { - /* Not encrypted, so do the same as in .by_index_new(): */ + /* Not encrypted, so do the same as in .by_index(): */ let entry_reader = construct_decompressing_reader(&data.compression_method, content)?; - CryptoEntryReader::Unencrypted(NewCrc32Reader::new(entry_reader, data.crc32)) + CryptoEntryReader::Unencrypted(Crc32Reader::new(entry_reader, data.crc32)) }; Ok(ZipEntry { @@ -1690,217 +1217,10 @@ pub(crate) fn parse_single_extra_field( Ok(false) } -/// Methods for retrieving information on zip files -impl<'a> ZipFile<'a> { - fn get_reader(&mut self) -> ZipResult<&mut ZipFileReader<'a>> { - if let ZipFileReader::NoReader = self.reader { - let data = &self.data; - let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); - self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)?; - } - Ok(&mut self.reader) - } - - pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read { - if let ZipFileReader::NoReader = self.reader { - let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); - self.reader = ZipFileReader::Raw(crypto_reader.into_inner()) - } - &mut self.reader - } - - /// Get the version of the file - pub fn version_made_by(&self) -> (u8, u8) { - ( - self.data.version_made_by / 10, - self.data.version_made_by % 10, - ) - } - - /// Get the name of the file - /// - /// # Warnings - /// - /// It is dangerous to use this name directly when extracting an archive. - /// It may contain an absolute path (`/etc/shadow`), or break out of the - /// current directory (`../runtime`). Carelessly writing to these paths - /// allows an attacker to craft a ZIP archive that will overwrite critical - /// files. - /// - /// You can use the [`ZipFile::enclosed_name`] method to validate the name - /// as a safe path. - pub fn name(&self) -> &str { - &self.data.file_name - } - - /// Get the name of the file, in the raw (internal) byte representation. - /// - /// The encoding of this data is currently undefined. - pub fn name_raw(&self) -> &[u8] { - &self.data.file_name_raw - } - - /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte, - /// removes a leading '/' and removes '..' parts. - #[deprecated( - since = "0.5.7", - note = "by stripping `..`s from the path, the meaning of paths can change. - `mangled_name` can be used if this behaviour is desirable" - )] - pub fn sanitized_name(&self) -> PathBuf { - self.mangled_name() - } - - /// Rewrite the path, ignoring any path components with special meaning. - /// - /// - Absolute paths are made relative - /// - [`ParentDir`]s are ignored - /// - Truncates the filename at a NULL byte - /// - /// This is appropriate if you need to be able to extract *something* from - /// any archive, but will easily misrepresent trivial paths like - /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, - /// [`ZipFile::enclosed_name`] is the better option in most scenarios. - /// - /// [`ParentDir`]: `Component::ParentDir` - pub fn mangled_name(&self) -> PathBuf { - self.data.file_name_sanitized() - } - - /// Ensure the file path is safe to use as a [`Path`]. - /// - /// - It can't contain NULL bytes - /// - It can't resolve to a path outside the current directory - /// > `foo/../bar` is fine, `foo/../../bar` is not. - /// - It can't be an absolute path - /// - /// This will read well-formed ZIP files correctly, and is resistant - /// to path-based exploits. It is recommended over - /// [`ZipFile::mangled_name`]. - pub fn enclosed_name(&self) -> Option { - self.data.enclosed_name() - } - - /// Get the comment of the file - pub fn comment(&self) -> &str { - &self.data.file_comment - } - - /// Get the compression method used to store the file - pub fn compression(&self) -> CompressionMethod { - self.data.compression_method - } - - /// Get the size of the file, in bytes, in the archive - pub fn compressed_size(&self) -> u64 { - self.data.compressed_size - } - - /// Get the size of the file, in bytes, when uncompressed - pub fn size(&self) -> u64 { - self.data.uncompressed_size - } - - /// Get the time the file was last modified - pub fn last_modified(&self) -> Option { - self.data.last_modified_time - } - - /// Returns whether the file is actually a directory - pub fn is_dir(&self) -> bool { - self.data.is_dir() - } - - /// Returns whether the file is actually a symbolic link - pub fn is_symlink(&self) -> bool { - self.unix_mode() - .is_some_and(|mode| mode & S_IFLNK == S_IFLNK) - } - - /// Returns whether the file is a normal file (i.e. not a directory or symlink) - pub fn is_file(&self) -> bool { - !self.is_dir() && !self.is_symlink() - } - - /// Get unix mode for the file - pub fn unix_mode(&self) -> Option { - self.data.unix_mode() - } - - /// Get the CRC32 hash of the original file - pub fn crc32(&self) -> u32 { - self.data.crc32 - } - - /// Get the extra data of the zip header for this file - pub fn extra_data(&self) -> Option<&[u8]> { - self.data.extra_field.as_deref().map(|v| v.as_ref()) - } - - /// Get the starting offset of the data of the compressed file - pub fn data_start(&self) -> u64 { - *self.data.data_start.get().unwrap() - } - - /// Get the starting offset of the zip header for this file - pub fn header_start(&self) -> u64 { - self.data.header_start - } - /// Get the starting offset of the zip header in the central directory for this file - pub fn central_header_start(&self) -> u64 { - self.data.central_header_start - } - - /// iterate through all extra fields - pub fn extra_data_fields(&self) -> impl Iterator { - self.data.extra_fields.iter() - } -} - -impl<'a> Read for ZipFile<'a> { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.get_reader()?.read(buf) - } - - fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { - self.get_reader()?.read_exact(buf) - } - - fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { - self.get_reader()?.read_to_end(buf) - } - - fn read_to_string(&mut self, buf: &mut String) -> io::Result { - self.get_reader()?.read_to_string(buf) - } -} - -impl<'a> Drop for ZipFile<'a> { - fn drop(&mut self) { - // self.data is Owned, this reader is constructed by a streaming reader. - // In this case, we want to exhaust the reader so that the next file is accessible. - if let Cow::Owned(_) = self.data { - // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped. - match &mut self.reader { - ZipFileReader::NoReader => { - let innerreader = self.crypto_reader.take(); - let _ = copy( - &mut innerreader.expect("Invalid reader state").into_inner(), - &mut sink(), - ); - } - reader => { - let innerreader = std::mem::replace(reader, ZipFileReader::NoReader); - innerreader.drain(); - } - }; - } - } -} - #[cfg(test)] mod test { use crate::result::ZipResult; + use crate::unstable::read::ArchiveEntry; use crate::write::SimpleFileOptions; use crate::CompressionMethod::Stored; use crate::{ZipArchive, ZipWriter}; diff --git a/src/unstable/read.rs b/src/unstable/read.rs index b602531c6..ef39a4504 100644 --- a/src/unstable/read.rs +++ b/src/unstable/read.rs @@ -1,7 +1,7 @@ //! Alternate implementation of [`crate::read`]. use crate::compression::CompressionMethod; -use crate::crc32::non_crypto::Crc32Reader; +use crate::crc32::Crc32Reader; use crate::extra_fields::ExtraField; use crate::read::find_data_start; use crate::result::{ZipError, ZipResult}; diff --git a/src/write.rs b/src/write.rs index c3df64a2a..bf87cbf9c 100644 --- a/src/write.rs +++ b/src/write.rs @@ -3,7 +3,6 @@ #[cfg(feature = "aes-crypto")] use crate::aes::AesWriter; use crate::compression::CompressionMethod; -use crate::read::ZipFile; use crate::read::{parse_single_extra_field, Config, ZipArchive}; use crate::result::{ZipError, ZipResult}; use crate::spec::{self, FixedSizeBlock, Zip32CDEBlock}; @@ -13,7 +12,7 @@ use crate::types::{ ffi, AesModeInfo, AesVendorVersion, DateTime, ZipFileData, ZipLocalEntryBlock, ZipRawValues, MIN_VERSION, }; -use crate::unstable::read::find_entry_content_range; +use crate::unstable::read::{find_entry_content_range, ArchiveEntry, ZipEntry}; use crate::write::ffi::S_IFLNK; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64; @@ -1169,7 +1168,7 @@ impl ZipWriter { /// This method extracts file metadata from the `source` archive, then simply performs a single /// big [`io::copy()`](io::copy) to transfer all the actual file contents without any /// decompression or decryption. This is more performant than the equivalent operation of - /// calling [`Self::raw_copy_file()`] for each entry from the `source` archive in sequence. + /// calling [`Self::copy_file()`] for each entry from the `source` archive in sequence. /// ///``` /// # fn main() -> Result<(), zip::result::ZipError> { @@ -1210,7 +1209,7 @@ impl ZipWriter { self.finish_file()?; /* Ensure we accept the file contents on faith (and avoid overwriting the data). - * See raw_copy_file_rename(). */ + * See copy_file_rename(). */ self.writing_to_file = true; self.writing_raw = true; @@ -1265,15 +1264,19 @@ impl ZipWriter { /// W: Write + Seek, /// { /// // Retrieve file entry by name - /// let file = src.by_name("src_file.txt")?; + /// let file = src.by_name_raw("src_file.txt")?; /// /// // Copy and rename the previously obtained file entry to the destination zip archive - /// dst.raw_copy_file_rename(file, "new_name.txt")?; + /// dst.copy_file_rename(file, "new_name.txt")?; /// /// Ok(()) /// } /// ``` - pub fn raw_copy_file_rename(&mut self, mut file: ZipFile, name: S) -> ZipResult<()> + pub fn copy_file_rename( + &mut self, + mut file: ZipEntry<'_, impl Read>, + name: S, + ) -> ZipResult<()> where S: Into> + ToOwned, SToOwned: Into>, @@ -1300,22 +1303,22 @@ impl ZipWriter { self.writing_to_file = true; self.writing_raw = true; - io::copy(file.get_raw_reader(), self)?; + io::copy(&mut file, self)?; Ok(()) } - /// Like `raw_copy_file_to_path`, but uses Path arguments. + /// Like `copy_file_rename`, but uses Path arguments. /// /// This function ensures that the '/' path separator is used and normalizes `.` and `..`. It /// ignores any `..` or Windows drive letter that would produce a path outside the ZIP file's /// root. - pub fn raw_copy_file_to_path>( + pub fn copy_file_to_path>( &mut self, - file: ZipFile, + file: ZipEntry<'_, impl Read>, path: P, ) -> ZipResult<()> { - self.raw_copy_file_rename(file, path_to_string(path)) + self.copy_file_rename(file, path_to_string(path)) } /// Add a new file using the already compressed data from a ZIP file being read, this allows faster @@ -1333,17 +1336,17 @@ impl ZipWriter { /// W: Write + Seek, /// { /// // Retrieve file entry by name - /// let file = src.by_name("src_file.txt")?; + /// let file = src.by_name_raw("src_file.txt")?; /// /// // Copy the previously obtained file entry to the destination zip archive - /// dst.raw_copy_file(file)?; + /// dst.copy_file(file)?; /// /// Ok(()) /// } /// ``` - pub fn raw_copy_file(&mut self, file: ZipFile) -> ZipResult<()> { + pub fn copy_file(&mut self, file: ZipEntry<'_, impl Read>) -> ZipResult<()> { let name = file.name().to_owned(); - self.raw_copy_file_rename(file, name) + self.copy_file_rename(file, name) } /// Add a directory entry. @@ -1979,6 +1982,7 @@ mod test { use crate::compression::CompressionMethod; use crate::result::ZipResult; use crate::types::DateTime; + use crate::unstable::read::ArchiveEntry; use crate::write::EncryptWith::ZipCrypto; use crate::write::SimpleFileOptions; use crate::zipcrypto::ZipCryptoKeys; diff --git a/tests/aes_encryption.rs b/tests/aes_encryption.rs old mode 100644 new mode 100755 index c135914d0..e605b4eda --- a/tests/aes_encryption.rs +++ b/tests/aes_encryption.rs @@ -1,7 +1,10 @@ #![cfg(feature = "aes-crypto")] use std::io::{self, Read, Write}; -use zip::{result::ZipError, write::SimpleFileOptions, AesMode, CompressionMethod, ZipArchive}; +use zip::{ + result::ZipError, unstable::read::ArchiveEntry, write::SimpleFileOptions, AesMode, + CompressionMethod, ZipArchive, +}; const SECRET_CONTENT: &str = "Lorem ipsum dolor sit amet"; diff --git a/tests/deflate64.rs b/tests/deflate64.rs old mode 100644 new mode 100755 index b0cd95a95..4989f3a91 --- a/tests/deflate64.rs +++ b/tests/deflate64.rs @@ -1,7 +1,7 @@ #![cfg(feature = "deflate64")] use std::io::{self, Read}; -use zip::ZipArchive; +use zip::{unstable::read::ArchiveEntry, ZipArchive}; #[test] fn decompress_deflate64() { diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs index 7943a2ac3..97b969421 100644 --- a/tests/end_to_end.rs +++ b/tests/end_to_end.rs @@ -2,6 +2,7 @@ use std::collections::HashSet; use std::io::prelude::*; use std::io::Cursor; use zip::result::ZipResult; +use zip::unstable::read::ArchiveEntry; use zip::unstable::LittleEndianWriteExt; use zip::write::ExtendedFileOptions; use zip::write::FileOptions; @@ -46,18 +47,18 @@ fn copy() { { let file = src_archive - .by_name(ENTRY_NAME) + .by_name_raw(ENTRY_NAME) .expect("Missing expected file"); - zip.raw_copy_file(file).expect("Couldn't copy file"); + zip.copy_file(file).expect("Couldn't copy file"); } { let file = src_archive - .by_name(ENTRY_NAME) + .by_name_raw(ENTRY_NAME) .expect("Missing expected file"); - zip.raw_copy_file_rename(file, COPY_ENTRY_NAME) + zip.copy_file_rename(file, COPY_ENTRY_NAME) .expect("Couldn't copy and rename file"); } } diff --git a/tests/lzma.rs b/tests/lzma.rs old mode 100644 new mode 100755 index 01a14a2e0..8ab2166e6 --- a/tests/lzma.rs +++ b/tests/lzma.rs @@ -1,7 +1,7 @@ #![cfg(feature = "lzma")] use std::io::{self, Read}; -use zip::ZipArchive; +use zip::{unstable::read::ArchiveEntry, ZipArchive}; #[test] fn decompress_lzma() { diff --git a/tests/xz.rs b/tests/xz.rs old mode 100644 new mode 100755 index 110b40859..94881fac9 --- a/tests/xz.rs +++ b/tests/xz.rs @@ -1,7 +1,7 @@ #![cfg(feature = "xz")] use std::io::{self, Read}; -use zip::ZipArchive; +use zip::{unstable::read::ArchiveEntry, ZipArchive}; #[test] fn decompress_xz() -> io::Result<()> { diff --git a/tests/zip64_large.rs b/tests/zip64_large.rs index 468ef198f..9864ee7ff 100644 --- a/tests/zip64_large.rs +++ b/tests/zip64_large.rs @@ -189,6 +189,8 @@ impl Read for Zip64File { #[test] fn zip64_large() { + use zip::unstable::read::ArchiveEntry; + let zipfile = Zip64File::new(); let mut archive = zip::ZipArchive::new(zipfile).unwrap(); let mut buf = [0u8; 32]; diff --git a/tests/zip_crypto.rs b/tests/zip_crypto.rs index 4c4cc8b29..12f091724 100644 --- a/tests/zip_crypto.rs +++ b/tests/zip_crypto.rs @@ -34,6 +34,7 @@ const ZIP_CRYPTO_FILE: &[u8] = &[ use std::io::Cursor; use zip::result::ZipError; +use zip::unstable::read::ArchiveEntry; #[test] fn encrypting_file() { diff --git a/tests/zip_extended_timestamp.rs b/tests/zip_extended_timestamp.rs index 9657028f9..58fb2953f 100644 --- a/tests/zip_extended_timestamp.rs +++ b/tests/zip_extended_timestamp.rs @@ -1,5 +1,5 @@ use std::io; -use zip::ZipArchive; +use zip::{unstable::read::ArchiveEntry, ZipArchive}; #[test] fn test_extended_timestamp() { From bf63d19b8d3b2a88f2ba681e853aad3d867c8138 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 17:37:14 -0400 Subject: [PATCH 05/18] remove a few more dyns --- src/read.rs | 6 ++---- src/unstable/read.rs | 4 ++-- src/write.rs | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/read.rs b/src/read.rs index 7e9bc76e7..9c786aef5 100644 --- a/src/read.rs +++ b/src/read.rs @@ -316,10 +316,8 @@ impl ZipArchive { self.reader.rewind()?; /* Find the end of the file data. */ let length_to_read = self.shared.dir_start; - /* Produce a Read that reads bytes up until the start of the central directory header. - * This "as &mut dyn Read" trick is used elsewhere to avoid having to clone the underlying - * handle, which it really shouldn't need to anyway. */ - let mut limited_raw = (&mut self.reader as &mut dyn Read).take(length_to_read); + /* Produce a Read that reads bytes up until the start of the central directory header. */ + let mut limited_raw = self.reader.by_ref().take(length_to_read); /* Copy over file data from source archive directly. */ io::copy(&mut limited_raw, &mut w)?; diff --git a/src/unstable/read.rs b/src/unstable/read.rs index ef39a4504..fe2576a25 100644 --- a/src/unstable/read.rs +++ b/src/unstable/read.rs @@ -522,7 +522,7 @@ pub mod streaming { } = self; if *remaining_before_next_entry > 0 { io::copy( - &mut (reader as &mut dyn Read).take(*remaining_before_next_entry), + &mut reader.by_ref().take(*remaining_before_next_entry), &mut io::sink(), )?; *remaining_before_next_entry = 0; @@ -577,7 +577,7 @@ pub mod streaming { } = self; if *remaining_before_next_entry > 0 { io::copy( - &mut (reader as &mut dyn Read).take(*remaining_before_next_entry), + &mut reader.by_ref().take(*remaining_before_next_entry), &mut io::sink(), )?; *remaining_before_next_entry = 0; diff --git a/src/write.rs b/src/write.rs index bf87cbf9c..9faa9ac94 100644 --- a/src/write.rs +++ b/src/write.rs @@ -85,7 +85,7 @@ impl Write for MaybeEncrypted { } } -enum GenericZipWriter { +enum GenericZipWriter { Closed, Storer(MaybeEncrypted), #[cfg(feature = "deflate-flate2")] From 3960e345bb74d428a9c5ec3ef95c49db916a908d Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 17:40:51 -0400 Subject: [PATCH 06/18] make AesModeInfo pub(crate) to make its intention clear --- src/types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types.rs b/src/types.rs index fd5d7f7b0..800d337fe 100644 --- a/src/types.rs +++ b/src/types.rs @@ -421,7 +421,7 @@ pub const MIN_VERSION: u8 = 10; pub const DEFAULT_VERSION: u8 = 45; #[derive(Debug, Copy, Clone)] -pub struct AesModeInfo { +pub(crate) struct AesModeInfo { pub aes_mode: AesMode, pub vendor_version: AesVendorVersion, pub compression_method: CompressionMethod, From 27ba4d9060b2fbfafa5fc4fb2055397bb3d1650e Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 18:25:26 -0400 Subject: [PATCH 07/18] refactor streaming --- src/read.rs | 21 ++++--- src/unstable/read.rs | 132 +++++++++++++++++++++++++------------------ 2 files changed, 88 insertions(+), 65 deletions(-) diff --git a/src/read.rs b/src/read.rs index 9c786aef5..91320f7e1 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1041,13 +1041,8 @@ pub(crate) fn central_header_to_zip_file_inner( aes_extra_data_start: 0, extra_fields: Vec::new(), }; - match parse_extra_field(&mut result) { - Ok(stripped_extra_field) => { - result.extra_field = stripped_extra_field; - } - Err(ZipError::Io(..)) => {} - Err(e) => return Err(e), - } + let stripped_extra_field = parse_extra_field(&mut result)?; + result.extra_field = stripped_extra_field; let aes_enabled = result.compression_method == CompressionMethod::AES; if aes_enabled && result.aes_mode.is_none() { @@ -1076,9 +1071,17 @@ pub(crate) fn parse_extra_field(file: &mut ZipFileData) -> ZipResult b, + /* If we get an error reading too far, then assume this is an extra field we don't know + * how to handle, and just return the remaining amount. */ + Err(ZipError::Io(e)) if e.kind() == io::ErrorKind::UnexpectedEof => { + return Ok(Some(processed_extra_field)) + } + Err(e) => return Err(e), + }; position = reader.position() as usize; if remove { let remaining = len - (position - old_position); diff --git a/src/unstable/read.rs b/src/unstable/read.rs index fe2576a25..67f4fc5ce 100644 --- a/src/unstable/read.rs +++ b/src/unstable/read.rs @@ -488,7 +488,7 @@ pub mod streaming { pub struct StreamingArchive { reader: R, remaining_before_next_entry: u64, - first_metadata_block: Option>, + first_metadata_block: Option<[u8; mem::size_of::()]>, } impl StreamingArchive { @@ -510,15 +510,11 @@ pub mod streaming { where R: Read, { - pub fn next_entry(&mut self) -> ZipResult>> { - // We can't use the typical ::parse() method, as we follow separate code paths depending - // on the "magic" value (since the magic value will be from the central directory header - // if we've finished iterating over all the actual files). - /* TODO: smallvec? */ + fn drain_remaining(&mut self) -> Result<(), ZipError> { let Self { ref mut reader, ref mut remaining_before_next_entry, - ref mut first_metadata_block, + .. } = self; if *remaining_before_next_entry > 0 { io::copy( @@ -527,36 +523,50 @@ pub mod streaming { )?; *remaining_before_next_entry = 0; } + Ok(()) + } + + pub fn next_entry(&mut self) -> ZipResult>> { + // We can't use the typical ::parse() method, as we follow separate code paths depending + // on the "magic" value (since the magic value will be from the central directory header + // if we've finished iterating over all the actual files). + self.drain_remaining()?; + let Self { + ref mut reader, + ref mut remaining_before_next_entry, + ref mut first_metadata_block, + } = self; + assert_eq!(0, *remaining_before_next_entry); let mut block = [0u8; mem::size_of::()]; reader.read_exact(&mut block)?; - let block: Box<[u8]> = block.into(); - - let signature = spec::Magic::from_first_le_bytes(&block); + let signature = spec::Magic::from_first_le_bytes(block.as_ref()); match signature { - spec::Magic::LOCAL_FILE_HEADER_SIGNATURE => (), - spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => { + ZipLocalEntryBlock::MAGIC => (), + /* If the signature corresponds to the first central directory entry, then we are + * out of file entries. We can't seek backwards in a stream, so we save the block + * we just read to our mutable state. */ + ZipCentralEntryBlock::MAGIC => { assert!( first_metadata_block.is_none(), "metadata block should never be set except exactly once" ); + assert!( + mem::size_of::() + < mem::size_of::() + ); *first_metadata_block = Some(block); return Ok(None); } _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR), } - let block = ZipLocalEntryBlock::interpret(&block)?; let mut data = ZipFileData::from_local_block(block, reader)?; - match parse_extra_field(&mut data) { - /* FIXME: check for the right error type here instead of accepting any old i/o - * error. */ - Ok(..) | Err(ZipError::Io(..)) => {} - Err(e) => return Err(e), - } + let stripped_extra_field = parse_extra_field(&mut data)?; + data.extra_field = stripped_extra_field; let limit_reader = DrainWrapper::new(data.compressed_size, remaining_before_next_entry, reader); @@ -570,46 +580,55 @@ pub mod streaming { } pub fn next_metadata_entry(&mut self) -> ZipResult> { + /* We should only need to drain remaining exactly once (if at all), since that's only + * employed if the end user fails to read the entire contents of a streaming file + * entry. */ + self.drain_remaining()?; let Self { ref mut reader, - ref mut remaining_before_next_entry, + ref remaining_before_next_entry, ref mut first_metadata_block, } = self; - if *remaining_before_next_entry > 0 { - io::copy( - &mut reader.by_ref().take(*remaining_before_next_entry), - &mut io::sink(), - )?; - *remaining_before_next_entry = 0; - } - - // Parse central header - let block = match first_metadata_block.take() { - None => match ZipCentralEntryBlock::parse(reader) { - Ok(block) => block, - Err(ZipError::Io(e)) if e.kind() == io::ErrorKind::UnexpectedEof => { - return Ok(None); + assert_eq!(0, *remaining_before_next_entry); + + /* Get the bytes out of the stream necessary to create a parseable central block. */ + let block: [u8; mem::size_of::()] = + match first_metadata_block.take() { + /* If we have a block we tried to parse earlier from .next_entry(), get the + * data from there, then read the additional bytes necessary to construct + * a central directory entry. This should always happen exactly once. */ + Some(block) => { + assert!( + mem::size_of::() + < mem::size_of::() + ); + assert_eq!(block.len(), mem::size_of::()); + + let mut remaining_block = [0u8; mem::size_of::() + - mem::size_of::()]; + reader.read_exact(remaining_block.as_mut())?; + + let mut joined_block = [0u8; mem::size_of::()]; + joined_block[..block.len()].copy_from_slice(&block); + joined_block[block.len()..].copy_from_slice(&remaining_block); + joined_block } - Err(e) => return Err(e), - }, - Some(block) => { - assert_eq!(block.len(), mem::size_of::()); - assert!( - mem::size_of::() - < mem::size_of::() - ); - - let mut remaining_block = [0u8; mem::size_of::() - - mem::size_of::()]; - reader.read_exact(remaining_block.as_mut())?; - - let mut joined_block = [0u8; mem::size_of::()]; - joined_block[..block.len()].copy_from_slice(&block); - joined_block[block.len()..].copy_from_slice(&remaining_block); - - ZipCentralEntryBlock::interpret(&joined_block)? - } - }; + /* After the first central block is parsed, we should always go into this + * branch, reading the necessary bytes from the stream. */ + None => { + let mut block = [0u8; mem::size_of::()]; + match reader.read_exact(&mut block) { + Ok(()) => (), + /* The reader is done! This is expected to happen exactly once when the + * stream is completely finished. */ + Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None), + Err(e) => return Err(e.into()), + }; + block + } + }; + // Parse central header + let block = ZipCentralEntryBlock::interpret(&block)?; // Give archive_offset and central_header_start dummy value 0, since // they are not used in the output. @@ -654,10 +673,11 @@ pub mod streaming { R: Read, { fn read(&mut self, buf: &mut [u8]) -> io::Result { + assert!(!buf.is_empty()); let to_read = self.remaining().min(buf.len()); - /* If the input is exhausted, or `buf` was empty, just forward any error. */ + /* If the input is exhausted, or `buf` was empty, we are done. */ if to_read == 0 { - return self.inner.read(&mut []); + return Ok(0); } let count = self.inner.read(&mut buf[..to_read])?; From 9fee2c88908ebebed9e6d4f88a1465fa7a9b7ec4 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 18:30:51 -0400 Subject: [PATCH 08/18] fmt and clippy --- src/unstable/read.rs | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/unstable/read.rs b/src/unstable/read.rs index 67f4fc5ce..8e5362a32 100644 --- a/src/unstable/read.rs +++ b/src/unstable/read.rs @@ -6,7 +6,7 @@ use crate::extra_fields::ExtraField; use crate::read::find_data_start; use crate::result::{ZipError, ZipResult}; use crate::types::ffi::S_IFLNK; -use crate::types::{AesModeInfo, AesVendorVersion, DateTime, ZipFileData}; +use crate::types::{DateTime, ZipFileData}; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; #[cfg(feature = "lzma")] @@ -17,6 +17,7 @@ use crate::read::xz::XzDecoder; use crate::{ aes::{AesReader, AesReaderValid}, read::AesInfo, + types::{AesModeInfo, AesVendorVersion}, }; #[cfg(feature = "bzip2")] @@ -372,6 +373,7 @@ impl CryptoVariant { data.encrypted, "should never enter this method except on encrypted entries" ); + #[allow(deprecated)] if let CompressionMethod::Unsupported(_) = data.compression_method { /* TODO: make this into its own EntryReadError error type! */ @@ -379,24 +381,29 @@ impl CryptoVariant { "Compression method not supported", )); } + + #[cfg(feature = "aes-crypto")] if let Some(info) = data.aes_mode { - #[cfg(not(feature = "aes-crypto"))] - /* TODO: make this into its own EntryReadError error type! */ - return Err(ZipError::UnsupportedArchive( - "AES encrypted files cannot be decrypted without the aes-crypto feature.", - )); - #[cfg(feature = "aes-crypto")] return Ok(Self::Aes { info, compressed_size: data.compressed_size, }); } + #[cfg(not(feature = "aes-crypto"))] + if data.aes_mode.is_some() { + /* TODO: make this into its own EntryReadError error type! */ + return Err(ZipError::UnsupportedArchive( + "AES encrypted files cannot be decrypted without the aes-crypto feature.", + )); + } + if let Some(last_modified_time) = data.last_modified_time { /* TODO: use let chains once stabilized! */ if data.using_data_descriptor { return Ok(Self::DateTime(last_modified_time)); } } + Ok(Self::Crc32(data.crc32)) } @@ -425,14 +432,11 @@ impl CryptoVariant { R: Read, { match self { + #[cfg(feature = "aes-crypto")] Self::Aes { info: AesModeInfo { aes_mode, .. }, compressed_size, } => { - assert!( - cfg!(feature = "aes-crypto"), - "should never get here unless aes support was enabled" - ); let aes_reader = AesReader::new(reader, aes_mode, compressed_size).validate(password)?; Ok(CryptoReader::Aes(aes_reader)) From c360b15a2b42727afb8e9078fb52e3ed9bb5881c Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 18:35:39 -0400 Subject: [PATCH 09/18] make ArchiveEntry more publicly accessible --- src/read.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/read.rs b/src/read.rs index 91320f7e1..0ba617e8f 100644 --- a/src/read.rs +++ b/src/read.rs @@ -82,6 +82,7 @@ pub(crate) mod zip_archive { /// /// ```no_run /// use std::io::prelude::*; + /// use zip::read::ArchiveEntry; /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> { /// let mut zip = zip::ZipArchive::new(reader)?; /// @@ -110,9 +111,9 @@ use crate::unstable::{path_to_string, LittleEndianReadExt}; use crate::crc32::Crc32Reader; use crate::unstable::read::{ - construct_decompressing_reader, find_entry_content_range, ArchiveEntry, CryptoEntryReader, - CryptoVariant, ZipEntry, + construct_decompressing_reader, find_entry_content_range, CryptoEntryReader, CryptoVariant, }; +pub use crate::unstable::read::{ArchiveEntry, ZipEntry}; pub use zip_archive::ZipArchive; From 8c7cae136909fabd4e43be8bd56d737f880af5f2 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 18:47:19 -0400 Subject: [PATCH 10/18] allow(private_interfaces) -> allow(warnings) --- src/unstable/read.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/unstable/read.rs b/src/unstable/read.rs index 8e5362a32..c6acdb878 100644 --- a/src/unstable/read.rs +++ b/src/unstable/read.rs @@ -119,7 +119,8 @@ mod sealed_data { use super::ZipFileData; #[doc(hidden)] - #[allow(private_interfaces)] + /* NB: This should be allow(private_interfaces), but that's currently unstable. */ + #[allow(warnings)] pub trait ArchiveData { fn data(&self) -> &ZipFileData; } @@ -264,7 +265,7 @@ pub trait ArchiveEntry: Read + sealed_data::ArchiveData { } impl<'a, R> sealed_data::ArchiveData for ZipEntry<'a, R> { - #[allow(private_interfaces)] + #[allow(warnings)] fn data(&self) -> &ZipFileData { self.data } @@ -725,7 +726,7 @@ pub mod streaming { } impl sealed_data::ArchiveData for StreamingZipEntry { - #[allow(private_interfaces)] + #[allow(warnings)] fn data(&self) -> &ZipFileData { &self.data } @@ -738,7 +739,7 @@ pub mod streaming { pub struct ZipStreamFileMetadata(ZipFileData); impl sealed_data::ArchiveData for ZipStreamFileMetadata { - #[allow(private_interfaces)] + #[allow(warnings)] fn data(&self) -> &ZipFileData { let Self(data) = self; data From fb37849cc0c65e81bea7cfa3ffd3e181224a1741 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 18:54:15 -0400 Subject: [PATCH 11/18] fix impl trait return --- src/unstable/read.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/unstable/read.rs b/src/unstable/read.rs index c6acdb878..4bb76acf4 100644 --- a/src/unstable/read.rs +++ b/src/unstable/read.rs @@ -31,6 +31,7 @@ use zstd::stream::read::Decoder as ZstdDecoder; use std::io::{self, Read, Seek}; use std::path::PathBuf; +use std::slice; pub(crate) enum EntryReader { Stored(R), @@ -259,7 +260,7 @@ pub trait ArchiveEntry: Read + sealed_data::ArchiveData { } /// iterate through all extra fields - fn extra_data_fields(&self) -> impl Iterator { + fn extra_data_fields(&self) -> slice::Iter<'_, ExtraField> { self.data().extra_fields.iter() } } From 872bd359cb1d5c15543aefa0cb0402adac1ce59b Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 18:57:02 -0400 Subject: [PATCH 12/18] fix private bounds on stable --- src/crc32.rs | 1 + src/types.rs | 4 ++-- src/unstable/read.rs | 5 ----- src/zipcrypto.rs | 1 + 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/crc32.rs b/src/crc32.rs index 770b148b0..9ad7e4b2f 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -32,6 +32,7 @@ impl Crc32Reader { } } + #[allow(dead_code)] pub fn into_inner(self) -> R { self.inner } diff --git a/src/types.rs b/src/types.rs index 800d337fe..1aff01b7c 100644 --- a/src/types.rs +++ b/src/types.rs @@ -421,7 +421,7 @@ pub const MIN_VERSION: u8 = 10; pub const DEFAULT_VERSION: u8 = 45; #[derive(Debug, Copy, Clone)] -pub(crate) struct AesModeInfo { +pub struct AesModeInfo { pub aes_mode: AesMode, pub vendor_version: AesVendorVersion, pub compression_method: CompressionMethod, @@ -429,7 +429,7 @@ pub(crate) struct AesModeInfo { /// Structure representing a ZIP file. #[derive(Debug, Clone, Default)] -pub(crate) struct ZipFileData { +pub struct ZipFileData { /// Compatibility of the file attribute information pub system: System, /// Specification version diff --git a/src/unstable/read.rs b/src/unstable/read.rs index 4bb76acf4..d4712468f 100644 --- a/src/unstable/read.rs +++ b/src/unstable/read.rs @@ -120,8 +120,6 @@ mod sealed_data { use super::ZipFileData; #[doc(hidden)] - /* NB: This should be allow(private_interfaces), but that's currently unstable. */ - #[allow(warnings)] pub trait ArchiveData { fn data(&self) -> &ZipFileData; } @@ -266,7 +264,6 @@ pub trait ArchiveEntry: Read + sealed_data::ArchiveData { } impl<'a, R> sealed_data::ArchiveData for ZipEntry<'a, R> { - #[allow(warnings)] fn data(&self) -> &ZipFileData { self.data } @@ -727,7 +724,6 @@ pub mod streaming { } impl sealed_data::ArchiveData for StreamingZipEntry { - #[allow(warnings)] fn data(&self) -> &ZipFileData { &self.data } @@ -740,7 +736,6 @@ pub mod streaming { pub struct ZipStreamFileMetadata(ZipFileData); impl sealed_data::ArchiveData for ZipStreamFileMetadata { - #[allow(warnings)] fn data(&self) -> &ZipFileData { let Self(data) = self; data diff --git a/src/zipcrypto.rs b/src/zipcrypto.rs index 5561ac3d5..42d0e58b6 100644 --- a/src/zipcrypto.rs +++ b/src/zipcrypto.rs @@ -204,6 +204,7 @@ impl std::io::Read for ZipCryptoReaderValid { impl ZipCryptoReaderValid { /// Consumes this decoder, returning the underlying reader. + #[allow(dead_code)] pub fn into_inner(self) -> R { self.reader.file } From fd11f2892e83928d0f00191dbc155c7d259eb83d Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 19:07:17 -0400 Subject: [PATCH 13/18] remove now-dead code --- src/aes.rs | 7 ------- src/crc32.rs | 5 ----- src/read/lzma.rs | 7 +------ src/read/xz.rs | 6 ------ src/zipcrypto.rs | 8 -------- 5 files changed, 1 insertion(+), 32 deletions(-) diff --git a/src/aes.rs b/src/aes.rs index c86a8e3a2..1773f068f 100644 --- a/src/aes.rs +++ b/src/aes.rs @@ -216,13 +216,6 @@ impl Read for AesReaderValid { } } -impl AesReaderValid { - /// Consumes this decoder, returning the underlying reader. - pub fn into_inner(self) -> R { - self.reader - } -} - pub struct AesWriter { writer: W, cipher: Cipher, diff --git a/src/crc32.rs b/src/crc32.rs index 9ad7e4b2f..a62349eb6 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -31,11 +31,6 @@ impl Crc32Reader { Err("Invalid checksum") } } - - #[allow(dead_code)] - pub fn into_inner(self) -> R { - self.inner - } } impl Read for Crc32Reader { diff --git a/src/read/lzma.rs b/src/read/lzma.rs index 04b6edd8e..d2b5449d6 100644 --- a/src/read/lzma.rs +++ b/src/read/lzma.rs @@ -1,6 +1,6 @@ use lzma_rs::decompress::{Options, Stream, UnpackedSize}; use std::collections::VecDeque; -use std::io::{copy, Error, Read, Result, Write}; +use std::io::{Read, Result, Write}; const COMPRESSED_BYTES_TO_BUFFER: usize = 4096; @@ -23,11 +23,6 @@ impl LzmaDecoder { stream: Stream::new_with_options(&OPTIONS, VecDeque::new()), } } - - pub fn finish(mut self) -> Result> { - copy(&mut self.compressed_reader, &mut self.stream)?; - self.stream.finish().map_err(Error::from) - } } impl Read for LzmaDecoder { diff --git a/src/read/xz.rs b/src/read/xz.rs index 478ae1024..339169243 100644 --- a/src/read/xz.rs +++ b/src/read/xz.rs @@ -262,9 +262,3 @@ impl Read for XzDecoder { Ok(written) } } - -impl XzDecoder { - pub fn into_inner(self) -> R { - self.compressed_reader.into_inner() - } -} diff --git a/src/zipcrypto.rs b/src/zipcrypto.rs index 42d0e58b6..64c4cc31d 100644 --- a/src/zipcrypto.rs +++ b/src/zipcrypto.rs @@ -202,14 +202,6 @@ impl std::io::Read for ZipCryptoReaderValid { } } -impl ZipCryptoReaderValid { - /// Consumes this decoder, returning the underlying reader. - #[allow(dead_code)] - pub fn into_inner(self) -> R { - self.reader.file - } -} - static CRCTABLE: [u32; 256] = [ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, From a28a43d4127ca0261136fa2d067a4f49652e57cd Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 19:45:59 -0400 Subject: [PATCH 14/18] fix for windows --- src/read.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/read.rs b/src/read.rs index 0ba617e8f..bdcd861bc 100644 --- a/src/read.rs +++ b/src/read.rs @@ -715,7 +715,7 @@ impl ZipArchive { }; let target = target.into_boxed_str(); let target_is_dir_from_archive = - self.shared.files.contains_key(&target) && is_dir(&target); + self.shared.files.contains_key(&target) && spec::is_dir(&target); let target_path = directory.as_ref().join(OsString::from(target.to_string())); let target_is_dir = if target_is_dir_from_archive { true From 423a263641ba549150a0353fae08979da3ae55d5 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 20:33:52 -0400 Subject: [PATCH 15/18] Revert "remove now-dead code" This reverts commit fd11f2892e83928d0f00191dbc155c7d259eb83d. --- src/aes.rs | 7 +++++++ src/crc32.rs | 5 +++++ src/read/lzma.rs | 7 ++++++- src/read/xz.rs | 6 ++++++ src/zipcrypto.rs | 8 ++++++++ 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/aes.rs b/src/aes.rs index 1773f068f..c86a8e3a2 100644 --- a/src/aes.rs +++ b/src/aes.rs @@ -216,6 +216,13 @@ impl Read for AesReaderValid { } } +impl AesReaderValid { + /// Consumes this decoder, returning the underlying reader. + pub fn into_inner(self) -> R { + self.reader + } +} + pub struct AesWriter { writer: W, cipher: Cipher, diff --git a/src/crc32.rs b/src/crc32.rs index a62349eb6..9ad7e4b2f 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -31,6 +31,11 @@ impl Crc32Reader { Err("Invalid checksum") } } + + #[allow(dead_code)] + pub fn into_inner(self) -> R { + self.inner + } } impl Read for Crc32Reader { diff --git a/src/read/lzma.rs b/src/read/lzma.rs index d2b5449d6..04b6edd8e 100644 --- a/src/read/lzma.rs +++ b/src/read/lzma.rs @@ -1,6 +1,6 @@ use lzma_rs::decompress::{Options, Stream, UnpackedSize}; use std::collections::VecDeque; -use std::io::{Read, Result, Write}; +use std::io::{copy, Error, Read, Result, Write}; const COMPRESSED_BYTES_TO_BUFFER: usize = 4096; @@ -23,6 +23,11 @@ impl LzmaDecoder { stream: Stream::new_with_options(&OPTIONS, VecDeque::new()), } } + + pub fn finish(mut self) -> Result> { + copy(&mut self.compressed_reader, &mut self.stream)?; + self.stream.finish().map_err(Error::from) + } } impl Read for LzmaDecoder { diff --git a/src/read/xz.rs b/src/read/xz.rs index 339169243..478ae1024 100644 --- a/src/read/xz.rs +++ b/src/read/xz.rs @@ -262,3 +262,9 @@ impl Read for XzDecoder { Ok(written) } } + +impl XzDecoder { + pub fn into_inner(self) -> R { + self.compressed_reader.into_inner() + } +} diff --git a/src/zipcrypto.rs b/src/zipcrypto.rs index 64c4cc31d..42d0e58b6 100644 --- a/src/zipcrypto.rs +++ b/src/zipcrypto.rs @@ -202,6 +202,14 @@ impl std::io::Read for ZipCryptoReaderValid { } } +impl ZipCryptoReaderValid { + /// Consumes this decoder, returning the underlying reader. + #[allow(dead_code)] + pub fn into_inner(self) -> R { + self.reader.file + } +} + static CRCTABLE: [u32; 256] = [ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, From c83e9b60aa2f7747192d4fcece9f566be5fba537 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 20:34:20 -0400 Subject: [PATCH 16/18] Revert "remove the old ZipFile API entirely" This reverts commit f0faf2446375fc8e28cbc5f046c40649c3ece739. --- benches/merge_archive.rs | 4 +- examples/extract.rs | 2 - examples/file_info.rs | 2 - src/crc32.rs | 187 ++++++-- src/read.rs | 734 ++++++++++++++++++++++++++++++-- src/unstable/read.rs | 2 +- src/write.rs | 36 +- tests/aes_encryption.rs | 5 +- tests/deflate64.rs | 2 +- tests/end_to_end.rs | 9 +- tests/lzma.rs | 2 +- tests/xz.rs | 2 +- tests/zip64_large.rs | 2 - tests/zip_crypto.rs | 1 - tests/zip_extended_timestamp.rs | 2 +- 15 files changed, 894 insertions(+), 98 deletions(-) mode change 100755 => 100644 tests/aes_encryption.rs mode change 100755 => 100644 tests/deflate64.rs mode change 100755 => 100644 tests/lzma.rs mode change 100755 => 100644 tests/xz.rs diff --git a/benches/merge_archive.rs b/benches/merge_archive.rs index 698e0a80c..ff07e1a6c 100644 --- a/benches/merge_archive.rs +++ b/benches/merge_archive.rs @@ -41,8 +41,8 @@ fn perform_raw_copy_file( mut target: ZipWriter, ) -> ZipResult> { for i in 0..src.len() { - let entry = src.by_index_raw(i)?; - target.copy_file(entry)?; + let entry = src.by_index(i)?; + target.raw_copy_file(entry)?; } Ok(target) } diff --git a/examples/extract.rs b/examples/extract.rs index aaa652663..57cfba0d7 100644 --- a/examples/extract.rs +++ b/examples/extract.rs @@ -1,8 +1,6 @@ use std::fs; use std::io; -use zip::unstable::read::ArchiveEntry; - fn main() { std::process::exit(real_main()); } diff --git a/examples/file_info.rs b/examples/file_info.rs index c29c356a4..84878c583 100644 --- a/examples/file_info.rs +++ b/examples/file_info.rs @@ -1,8 +1,6 @@ use std::fs; use std::io::BufReader; -use zip::unstable::read::ArchiveEntry; - fn main() { std::process::exit(real_main()); } diff --git a/src/crc32.rs b/src/crc32.rs index 9ad7e4b2f..878b2ee61 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -10,26 +10,25 @@ pub struct Crc32Reader { inner: R, hasher: Hasher, check: u32, + /// Signals if `inner` stores aes encrypted data. + /// AE-2 encrypted data doesn't use crc and sets the value to 0. + enabled: bool, } impl Crc32Reader { /// Get a new Crc32Reader which checks the inner reader against checksum. - pub(crate) fn new(inner: R, checksum: u32) -> Self { + /// The check is disabled if `ae2_encrypted == true`. + pub(crate) fn new(inner: R, checksum: u32, ae2_encrypted: bool) -> Crc32Reader { Crc32Reader { inner, hasher: Hasher::new(), check: checksum, + enabled: !ae2_encrypted, } } - fn check_matches(&self) -> Result<(), &'static str> { - let res = self.hasher.clone().finalize(); - if self.check == res { - Ok(()) - } else { - /* TODO: make this into our own Crc32Error error type! */ - Err("Invalid checksum") - } + fn check_matches(&self) -> bool { + self.check == self.hasher.clone().finalize() } #[allow(dead_code)] @@ -38,27 +37,159 @@ impl Crc32Reader { } } +#[cold] +fn invalid_checksum() -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, "Invalid checksum") +} + impl Read for Crc32Reader { fn read(&mut self, buf: &mut [u8]) -> io::Result { - /* We want to make sure we only check the hash when the input stream is exhausted. */ - if buf.is_empty() { - /* If the input buf is empty (this shouldn't happen, but isn't guaranteed), we - * still want to "pull" from the source in case it surfaces an i/o error. This will - * always return a count of Ok(0) if successful. */ - return self.inner.read(buf); - } - let count = self.inner.read(buf)?; - if count == 0 { - return self - .check_matches() - .map(|()| 0) - /* TODO: use io::Error::other for MSRV >=1.74 */ - .map_err(|e| io::Error::new(io::ErrorKind::Other, e)); + + if self.enabled { + if count == 0 && !buf.is_empty() && !self.check_matches() { + return Err(invalid_checksum()); + } + self.hasher.update(&buf[..count]); } - self.hasher.update(&buf[..count]); Ok(count) } + + fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { + let start = buf.len(); + let n = self.inner.read_to_end(buf)?; + + if self.enabled { + self.hasher.update(&buf[start..]); + if !self.check_matches() { + return Err(invalid_checksum()); + } + } + + Ok(n) + } + + fn read_to_string(&mut self, buf: &mut String) -> io::Result { + let start = buf.len(); + let n = self.inner.read_to_string(buf)?; + + if self.enabled { + self.hasher.update(&buf.as_bytes()[start..]); + if !self.check_matches() { + return Err(invalid_checksum()); + } + } + + Ok(n) + } +} + +pub(crate) mod non_crypto { + use std::io; + use std::io::prelude::*; + + use crc32fast::Hasher; + + /// Reader that validates the CRC32 when it reaches the EOF. + pub struct Crc32Reader { + inner: R, + hasher: Hasher, + check: u32, + } + + impl Crc32Reader { + /// Get a new Crc32Reader which checks the inner reader against checksum. + pub(crate) fn new(inner: R, checksum: u32) -> Self { + Crc32Reader { + inner, + hasher: Hasher::new(), + check: checksum, + } + } + + fn check_matches(&self) -> Result<(), &'static str> { + let res = self.hasher.clone().finalize(); + if self.check == res { + Ok(()) + } else { + /* TODO: make this into our own Crc32Error error type! */ + Err("Invalid checksum") + } + } + + pub fn into_inner(self) -> R { + self.inner + } + } + + impl Read for Crc32Reader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + /* We want to make sure we only check the hash when the input stream is exhausted. */ + if buf.is_empty() { + /* If the input buf is empty (this shouldn't happen, but isn't guaranteed), we + * still want to "pull" from the source in case it surfaces an i/o error. This will + * always return a count of Ok(0) if successful. */ + return self.inner.read(buf); + } + + let count = self.inner.read(buf)?; + if count == 0 { + return self + .check_matches() + .map(|()| 0) + /* TODO: use io::Error::other for MSRV >=1.74 */ + .map_err(|e| io::Error::new(io::ErrorKind::Other, e)); + } + self.hasher.update(&buf[..count]); + Ok(count) + } + } + + #[cfg(test)] + mod test { + use super::*; + + #[test] + fn test_empty_reader() { + let data: &[u8] = b""; + let mut buf = [0; 1]; + + let mut reader = Crc32Reader::new(data, 0); + assert_eq!(reader.read(&mut buf).unwrap(), 0); + + let mut reader = Crc32Reader::new(data, 1); + assert!(reader + .read(&mut buf) + .unwrap_err() + .to_string() + .contains("Invalid checksum")); + } + + #[test] + fn test_byte_by_byte() { + let data: &[u8] = b"1234"; + let mut buf = [0; 1]; + + let mut reader = Crc32Reader::new(data, 0x9be3e0a3); + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.read(&mut buf).unwrap(), 0); + // Can keep reading 0 bytes after the end + assert_eq!(reader.read(&mut buf).unwrap(), 0); + } + + #[test] + fn test_zero_read() { + let data: &[u8] = b"1234"; + let mut buf = [0; 5]; + + let mut reader = Crc32Reader::new(data, 0x9be3e0a3); + assert_eq!(reader.read(&mut buf[..0]).unwrap(), 0); + assert_eq!(reader.read(&mut buf).unwrap(), 4); + } + } } #[cfg(test)] @@ -70,10 +201,10 @@ mod test { let data: &[u8] = b""; let mut buf = [0; 1]; - let mut reader = Crc32Reader::new(data, 0); + let mut reader = Crc32Reader::new(data, 0, false); assert_eq!(reader.read(&mut buf).unwrap(), 0); - let mut reader = Crc32Reader::new(data, 1); + let mut reader = Crc32Reader::new(data, 1, false); assert!(reader .read(&mut buf) .unwrap_err() @@ -86,7 +217,7 @@ mod test { let data: &[u8] = b"1234"; let mut buf = [0; 1]; - let mut reader = Crc32Reader::new(data, 0x9be3e0a3); + let mut reader = Crc32Reader::new(data, 0x9be3e0a3, false); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); @@ -101,7 +232,7 @@ mod test { let data: &[u8] = b"1234"; let mut buf = [0; 5]; - let mut reader = Crc32Reader::new(data, 0x9be3e0a3); + let mut reader = Crc32Reader::new(data, 0x9be3e0a3, false); assert_eq!(reader.read(&mut buf[..0]).unwrap(), 0); assert_eq!(reader.read(&mut buf).unwrap(), 4); } diff --git a/src/read.rs b/src/read.rs index bdcd861bc..7e13066c3 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1,7 +1,10 @@ //! Types for reading ZIP archives +#[cfg(feature = "aes-crypto")] +use crate::aes::{AesReader, AesReaderValid}; use crate::compression::CompressionMethod; use crate::cp437::FromCp437; +use crate::crc32::Crc32Reader; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::read::zip_archive::{Shared, SharedBuilder}; use crate::result::{ZipError, ZipResult}; @@ -10,16 +13,30 @@ use crate::types::{ AesMode, AesModeInfo, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData, ZipLocalEntryBlock, }; +use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; +use std::borrow::Cow; use std::ffi::OsString; use std::fs::create_dir_all; -use std::io::{self, prelude::*, SeekFrom}; +use std::io::{self, copy, prelude::*, sink, SeekFrom}; use std::mem; use std::mem::size_of; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::rc::Rc; use std::sync::{Arc, OnceLock}; +#[cfg(feature = "deflate-flate2")] +use flate2::read::DeflateDecoder; + +#[cfg(feature = "deflate64")] +use deflate64::Deflate64Decoder; + +#[cfg(feature = "bzip2")] +use bzip2::read::BzDecoder; + +#[cfg(feature = "zstd")] +use zstd::stream::read::Decoder as ZstdDecoder; + mod config; pub use config::*; @@ -106,10 +123,15 @@ pub(crate) mod zip_archive { #[cfg(feature = "aes-crypto")] use crate::aes::PWD_VERIFY_LENGTH; use crate::extra_fields::UnicodeExtraField; -use crate::result::ZipError::InvalidArchive; +#[cfg(feature = "lzma")] +use crate::read::lzma::LzmaDecoder; +#[cfg(feature = "xz")] +use crate::read::xz::XzDecoder; +use crate::result::ZipError::{InvalidArchive, InvalidPassword, UnsupportedArchive}; +use crate::types::ffi::S_IFLNK; use crate::unstable::{path_to_string, LittleEndianReadExt}; -use crate::crc32::Crc32Reader; +use crate::crc32::non_crypto::Crc32Reader as NewCrc32Reader; use crate::unstable::read::{ construct_decompressing_reader, find_entry_content_range, CryptoEntryReader, CryptoVariant, }; @@ -117,6 +139,224 @@ pub use crate::unstable::read::{ArchiveEntry, ZipEntry}; pub use zip_archive::ZipArchive; +#[allow(clippy::large_enum_variant)] +pub(crate) enum CryptoReader<'a> { + Plaintext(io::Take<&'a mut dyn Read>), + ZipCrypto(ZipCryptoReaderValid>), + #[cfg(feature = "aes-crypto")] + Aes { + reader: AesReaderValid>, + vendor_version: AesVendorVersion, + }, +} + +impl<'a> Read for CryptoReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + CryptoReader::Plaintext(r) => r.read(buf), + CryptoReader::ZipCrypto(r) => r.read(buf), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes { reader: r, .. } => r.read(buf), + } + } + + fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { + match self { + CryptoReader::Plaintext(r) => r.read_to_end(buf), + CryptoReader::ZipCrypto(r) => r.read_to_end(buf), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes { reader: r, .. } => r.read_to_end(buf), + } + } + + fn read_to_string(&mut self, buf: &mut String) -> io::Result { + match self { + CryptoReader::Plaintext(r) => r.read_to_string(buf), + CryptoReader::ZipCrypto(r) => r.read_to_string(buf), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes { reader: r, .. } => r.read_to_string(buf), + } + } +} + +impl<'a> CryptoReader<'a> { + /// Consumes this decoder, returning the underlying reader. + pub fn into_inner(self) -> io::Take<&'a mut dyn Read> { + match self { + CryptoReader::Plaintext(r) => r, + CryptoReader::ZipCrypto(r) => r.into_inner(), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes { reader: r, .. } => r.into_inner(), + } + } + + /// Returns `true` if the data is encrypted using AE2. + pub const fn is_ae2_encrypted(&self) -> bool { + #[cfg(feature = "aes-crypto")] + return matches!( + self, + CryptoReader::Aes { + vendor_version: AesVendorVersion::Ae2, + .. + } + ); + #[cfg(not(feature = "aes-crypto"))] + false + } +} + +pub(crate) enum ZipFileReader<'a> { + NoReader, + Raw(io::Take<&'a mut dyn Read>), + Stored(Crc32Reader>), + #[cfg(feature = "_deflate-any")] + Deflated(Crc32Reader>>), + #[cfg(feature = "deflate64")] + Deflate64(Crc32Reader>>>), + #[cfg(feature = "bzip2")] + Bzip2(Crc32Reader>>), + #[cfg(feature = "zstd")] + Zstd(Crc32Reader>>>), + #[cfg(feature = "lzma")] + Lzma(Crc32Reader>>>), + #[cfg(feature = "xz")] + Xz(Crc32Reader>>), +} + +impl<'a> Read for ZipFileReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::Raw(r) => r.read(buf), + ZipFileReader::Stored(r) => r.read(buf), + #[cfg(feature = "_deflate-any")] + ZipFileReader::Deflated(r) => r.read(buf), + #[cfg(feature = "deflate64")] + ZipFileReader::Deflate64(r) => r.read(buf), + #[cfg(feature = "bzip2")] + ZipFileReader::Bzip2(r) => r.read(buf), + #[cfg(feature = "zstd")] + ZipFileReader::Zstd(r) => r.read(buf), + #[cfg(feature = "lzma")] + ZipFileReader::Lzma(r) => r.read(buf), + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.read(buf), + } + } + + fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { + match self { + ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::Raw(r) => r.read_exact(buf), + ZipFileReader::Stored(r) => r.read_exact(buf), + #[cfg(feature = "_deflate-any")] + ZipFileReader::Deflated(r) => r.read_exact(buf), + #[cfg(feature = "deflate64")] + ZipFileReader::Deflate64(r) => r.read_exact(buf), + #[cfg(feature = "bzip2")] + ZipFileReader::Bzip2(r) => r.read_exact(buf), + #[cfg(feature = "zstd")] + ZipFileReader::Zstd(r) => r.read_exact(buf), + #[cfg(feature = "lzma")] + ZipFileReader::Lzma(r) => r.read_exact(buf), + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.read_exact(buf), + } + } + + fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { + match self { + ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::Raw(r) => r.read_to_end(buf), + ZipFileReader::Stored(r) => r.read_to_end(buf), + #[cfg(feature = "_deflate-any")] + ZipFileReader::Deflated(r) => r.read_to_end(buf), + #[cfg(feature = "deflate64")] + ZipFileReader::Deflate64(r) => r.read_to_end(buf), + #[cfg(feature = "bzip2")] + ZipFileReader::Bzip2(r) => r.read_to_end(buf), + #[cfg(feature = "zstd")] + ZipFileReader::Zstd(r) => r.read_to_end(buf), + #[cfg(feature = "lzma")] + ZipFileReader::Lzma(r) => r.read_to_end(buf), + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.read_to_end(buf), + } + } + + fn read_to_string(&mut self, buf: &mut String) -> io::Result { + match self { + ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::Raw(r) => r.read_to_string(buf), + ZipFileReader::Stored(r) => r.read_to_string(buf), + #[cfg(feature = "_deflate-any")] + ZipFileReader::Deflated(r) => r.read_to_string(buf), + #[cfg(feature = "deflate64")] + ZipFileReader::Deflate64(r) => r.read_to_string(buf), + #[cfg(feature = "bzip2")] + ZipFileReader::Bzip2(r) => r.read_to_string(buf), + #[cfg(feature = "zstd")] + ZipFileReader::Zstd(r) => r.read_to_string(buf), + #[cfg(feature = "lzma")] + ZipFileReader::Lzma(r) => r.read_to_string(buf), + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.read_to_string(buf), + } + } +} + +impl<'a> ZipFileReader<'a> { + /// Consumes this decoder, returning the underlying reader. + pub fn drain(self) { + let mut inner = match self { + ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::Raw(r) => r, + ZipFileReader::Stored(r) => r.into_inner().into_inner(), + #[cfg(feature = "_deflate-any")] + ZipFileReader::Deflated(r) => r.into_inner().into_inner().into_inner(), + #[cfg(feature = "deflate64")] + ZipFileReader::Deflate64(r) => r.into_inner().into_inner().into_inner().into_inner(), + #[cfg(feature = "bzip2")] + ZipFileReader::Bzip2(r) => r.into_inner().into_inner().into_inner(), + #[cfg(feature = "zstd")] + ZipFileReader::Zstd(r) => r.into_inner().finish().into_inner().into_inner(), + #[cfg(feature = "lzma")] + ZipFileReader::Lzma(r) => { + // Lzma reader owns its buffer rather than mutably borrowing it, so we have to drop + // it separately + if let Ok(mut remaining) = r.into_inner().finish() { + let _ = copy(&mut remaining, &mut sink()); + } + return; + } + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.into_inner().into_inner().into_inner(), + }; + let _ = copy(&mut inner, &mut sink()); + } +} + +/// A struct for reading a zip file +pub struct ZipFile<'a> { + pub(crate) data: Cow<'a, ZipFileData>, + pub(crate) crypto_reader: Option>, + pub(crate) reader: ZipFileReader<'a>, +} + +pub(crate) fn find_content<'a>( + data: &ZipFileData, + reader: &'a mut (impl Read + Seek), +) -> ZipResult> { + // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result! + let data_start = match data.data_start.get() { + Some(data_start) => *data_start, + None => find_data_start(data, reader)?, + }; + + reader.seek(io::SeekFrom::Start(data_start))?; + Ok((reader as &mut dyn Read).take(data.compressed_size)) +} + pub(crate) fn find_data_start( data: &ZipFileData, reader: &mut (impl Read + Seek), @@ -146,6 +386,131 @@ pub(crate) fn find_data_start( Ok(data_start) } +#[allow(clippy::too_many_arguments)] +pub(crate) fn make_crypto_reader<'a>( + compression_method: CompressionMethod, + crc32: u32, + mut last_modified_time: Option, + using_data_descriptor: bool, + reader: io::Take<&'a mut dyn Read>, + password: Option<&[u8]>, + aes_info: Option, + #[cfg(feature = "aes-crypto")] compressed_size: u64, +) -> ZipResult> { + #[allow(deprecated)] + { + if let CompressionMethod::Unsupported(_) = compression_method { + return unsupported_zip_error("Compression method not supported"); + } + } + + let reader = match (password, aes_info) { + #[cfg(not(feature = "aes-crypto"))] + (Some(_), Some(_)) => { + return Err(ZipError::UnsupportedArchive( + "AES encrypted files cannot be decrypted without the aes-crypto feature.", + )) + } + #[cfg(feature = "aes-crypto")] + ( + Some(password), + Some(AesModeInfo { + aes_mode, + vendor_version, + .. + }), + ) => CryptoReader::Aes { + reader: AesReader::new(reader, aes_mode, compressed_size).validate(password)?, + vendor_version, + }, + (Some(password), None) => { + if !using_data_descriptor { + last_modified_time = None; + } + let validator = if let Some(last_modified_time) = last_modified_time { + ZipCryptoValidator::InfoZipMsdosTime(last_modified_time.timepart()) + } else { + ZipCryptoValidator::PkzipCrc32(crc32) + }; + CryptoReader::ZipCrypto(ZipCryptoReader::new(reader, password).validate(validator)?) + } + (None, Some(_)) => return Err(InvalidPassword), + (None, None) => CryptoReader::Plaintext(reader), + }; + Ok(reader) +} + +pub(crate) fn make_reader( + compression_method: CompressionMethod, + crc32: u32, + reader: CryptoReader, +) -> ZipResult { + let ae2_encrypted = reader.is_ae2_encrypted(); + + match compression_method { + CompressionMethod::Stored => Ok(ZipFileReader::Stored(Crc32Reader::new( + reader, + crc32, + ae2_encrypted, + ))), + #[cfg(feature = "_deflate-any")] + CompressionMethod::Deflated => { + let deflate_reader = DeflateDecoder::new(reader); + Ok(ZipFileReader::Deflated(Crc32Reader::new( + deflate_reader, + crc32, + ae2_encrypted, + ))) + } + #[cfg(feature = "deflate64")] + CompressionMethod::Deflate64 => { + let deflate64_reader = Deflate64Decoder::new(reader); + Ok(ZipFileReader::Deflate64(Crc32Reader::new( + deflate64_reader, + crc32, + ae2_encrypted, + ))) + } + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2 => { + let bzip2_reader = BzDecoder::new(reader); + Ok(ZipFileReader::Bzip2(Crc32Reader::new( + bzip2_reader, + crc32, + ae2_encrypted, + ))) + } + #[cfg(feature = "zstd")] + CompressionMethod::Zstd => { + let zstd_reader = ZstdDecoder::new(reader).unwrap(); + Ok(ZipFileReader::Zstd(Crc32Reader::new( + zstd_reader, + crc32, + ae2_encrypted, + ))) + } + #[cfg(feature = "lzma")] + CompressionMethod::Lzma => { + let reader = LzmaDecoder::new(reader); + Ok(ZipFileReader::Lzma(Crc32Reader::new( + Box::new(reader), + crc32, + ae2_encrypted, + ))) + } + #[cfg(feature = "xz")] + CompressionMethod::Xz => { + let reader = XzDecoder::new(reader); + Ok(ZipFileReader::Xz(Crc32Reader::new( + reader, + crc32, + ae2_encrypted, + ))) + } + _ => Err(UnsupportedArchive("Compression method not supported")), + } +} + #[derive(Debug)] pub(crate) struct CentralDirectoryInfo { pub(crate) archive_offset: u64, @@ -632,7 +997,7 @@ impl ZipArchive { &mut self, file_number: usize, ) -> ZipResult> { - let entry = self.by_index_raw(file_number)?; + let entry = self.by_index_raw_new(file_number)?; entry.get_aes_verification_key_and_salt() } @@ -676,7 +1041,7 @@ impl ZipArchive { #[cfg(unix)] let mut files_by_unix_mode = Vec::new(); for i in 0..self.len() { - let mut file = self.by_index(i)?; + let mut file = self.by_index_new(i)?; let filepath = file .enclosed_name() .ok_or(ZipError::InvalidArchive("Invalid file path"))?; @@ -732,7 +1097,7 @@ impl ZipArchive { } continue; } - let mut file = self.by_index(i)?; + let mut file = self.by_index_new(i)?; let mut outfile = fs::File::create(&outpath)?; io::copy(&mut file, &mut outfile)?; #[cfg(unix)] @@ -775,6 +1140,114 @@ impl ZipArchive { Ok(()) } + /// Search for a file entry by name, decrypt with given password + /// + /// # Warning + /// + /// The implementation of the cryptographic algorithms has not + /// gone through a correctness review, and you should assume it is insecure: + /// passwords used with this API may be compromised. + /// + /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us + /// to check for a 1/256 chance that the password is correct. + /// There are many passwords out there that will also pass the validity checks + /// we are able to perform. This is a weakness of the ZipCrypto algorithm, + /// due to its fairly primitive approach to cryptography. + pub fn by_name_decrypt(&mut self, name: &str, password: &[u8]) -> ZipResult { + self.by_name_with_optional_password(name, Some(password)) + } + + /// Search for a file entry by name + pub fn by_name(&mut self, name: &str) -> ZipResult { + self.by_name_with_optional_password(name, None) + } + + fn by_name_with_optional_password<'a>( + &'a mut self, + name: &str, + password: Option<&[u8]>, + ) -> ZipResult> { + let index = self.index_for_name_err(name)?; + self.by_index_with_optional_password(index, password) + } + + /// Get a contained file by index, decrypt with given password + /// + /// # Warning + /// + /// The implementation of the cryptographic algorithms has not + /// gone through a correctness review, and you should assume it is insecure: + /// passwords used with this API may be compromised. + /// + /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us + /// to check for a 1/256 chance that the password is correct. + /// There are many passwords out there that will also pass the validity checks + /// we are able to perform. This is a weakness of the ZipCrypto algorithm, + /// due to its fairly primitive approach to cryptography. + pub fn by_index_decrypt( + &mut self, + file_number: usize, + password: &[u8], + ) -> ZipResult> { + self.by_index_with_optional_password(file_number, Some(password)) + } + + /// Get a contained file by index + pub fn by_index(&mut self, file_number: usize) -> ZipResult> { + self.by_index_with_optional_password(file_number, None) + } + + /// Get a contained file by index without decompressing it + pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult> { + let reader = &mut self.reader; + let (_, data) = self + .shared + .files + .get_index(file_number) + .ok_or(ZipError::FileNotFound)?; + Ok(ZipFile { + crypto_reader: None, + reader: ZipFileReader::Raw(find_content(data, reader)?), + data: Cow::Borrowed(data), + }) + } + + fn by_index_with_optional_password( + &mut self, + file_number: usize, + mut password: Option<&[u8]>, + ) -> ZipResult> { + let (_, data) = self + .shared + .files + .get_index(file_number) + .ok_or(ZipError::FileNotFound)?; + + match (password, data.encrypted) { + (None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)), + (Some(_), false) => password = None, //Password supplied, but none needed! Discard. + _ => {} + } + let limit_reader = find_content(data, &mut self.reader)?; + + let crypto_reader = make_crypto_reader( + data.compression_method, + data.crc32, + data.last_modified_time, + data.using_data_descriptor, + limit_reader, + password, + data.aes_mode, + #[cfg(feature = "aes-crypto")] + data.compressed_size, + )?; + Ok(ZipFile { + crypto_reader: Some(crypto_reader), + reader: ZipFileReader::NoReader, + data: Cow::Borrowed(data), + }) + } + /// Unwrap and return the inner reader object /// /// The position of the reader is undefined. @@ -788,13 +1261,16 @@ where R: Read + Seek, { /// Search for a file entry by name - pub fn by_name(&mut self, name: impl AsRef) -> ZipResult> { + pub fn by_name_new( + &mut self, + name: impl AsRef, + ) -> ZipResult> { let index = self.index_for_name_err(name)?; - self.by_index(index) + self.by_index_new(index) } /// Get a contained file by index - pub fn by_index(&mut self, file_number: usize) -> ZipResult> { + pub fn by_index_new(&mut self, file_number: usize) -> ZipResult> { let Self { ref mut reader, ref shared, @@ -804,15 +1280,9 @@ where .files .get_index(file_number) .ok_or(ZipError::FileNotFound)?; - - /* Don't allow users to read out an encrypted entry without providing a password. */ - if data.encrypted { - return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)); - } - let content = find_entry_content_range(data, reader)?; let entry_reader = construct_decompressing_reader(&data.compression_method, content)?; - let crc32_reader = Crc32Reader::new(entry_reader, data.crc32); + let crc32_reader = NewCrc32Reader::new(entry_reader, data.crc32); Ok(ZipEntry { data, reader: crc32_reader, @@ -820,16 +1290,19 @@ where } /// Get a contained file by name without decompressing it - pub fn by_name_raw( + pub fn by_name_raw_new( &mut self, name: impl AsRef, ) -> ZipResult> { let index = self.index_for_name_err(name)?; - self.by_index_raw(index) + self.by_index_raw_new(index) } /// Get a contained file by index without decompressing it - pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult> { + pub fn by_index_raw_new( + &mut self, + file_number: usize, + ) -> ZipResult> { let Self { ref mut reader, ref shared, @@ -859,13 +1332,13 @@ where /// There are many passwords out there that will also pass the validity checks /// we are able to perform. This is a weakness of the ZipCrypto algorithm, /// due to its fairly primitive approach to cryptography. - pub fn by_name_decrypt( + pub fn by_name_decrypt_new( &mut self, name: impl AsRef, password: &[u8], ) -> ZipResult> { let index = self.index_for_name_err(name)?; - self.by_index_decrypt(index, password) + self.by_index_decrypt_new(index, password) } /// Get a contained file by index, decrypt with given password @@ -881,7 +1354,7 @@ where /// There are many passwords out there that will also pass the validity checks /// we are able to perform. This is a weakness of the ZipCrypto algorithm, /// due to its fairly primitive approach to cryptography. - pub fn by_index_decrypt( + pub fn by_index_decrypt_new( &mut self, file_number: usize, password: &[u8], @@ -908,12 +1381,12 @@ where /* Ae2 voids crc checking: https://www.winzip.com/en/support/aes-encryption/ */ CryptoEntryReader::Ae2Encrypted(entry_reader) } else { - CryptoEntryReader::NonAe2Encrypted(Crc32Reader::new(entry_reader, data.crc32)) + CryptoEntryReader::NonAe2Encrypted(NewCrc32Reader::new(entry_reader, data.crc32)) } } else { - /* Not encrypted, so do the same as in .by_index(): */ + /* Not encrypted, so do the same as in .by_index_new(): */ let entry_reader = construct_decompressing_reader(&data.compression_method, content)?; - CryptoEntryReader::Unencrypted(Crc32Reader::new(entry_reader, data.crc32)) + CryptoEntryReader::Unencrypted(NewCrc32Reader::new(entry_reader, data.crc32)) }; Ok(ZipEntry { @@ -1219,10 +1692,217 @@ pub(crate) fn parse_single_extra_field( Ok(false) } +/// Methods for retrieving information on zip files +impl<'a> ZipFile<'a> { + fn get_reader(&mut self) -> ZipResult<&mut ZipFileReader<'a>> { + if let ZipFileReader::NoReader = self.reader { + let data = &self.data; + let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); + self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)?; + } + Ok(&mut self.reader) + } + + pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read { + if let ZipFileReader::NoReader = self.reader { + let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); + self.reader = ZipFileReader::Raw(crypto_reader.into_inner()) + } + &mut self.reader + } + + /// Get the version of the file + pub fn version_made_by(&self) -> (u8, u8) { + ( + self.data.version_made_by / 10, + self.data.version_made_by % 10, + ) + } + + /// Get the name of the file + /// + /// # Warnings + /// + /// It is dangerous to use this name directly when extracting an archive. + /// It may contain an absolute path (`/etc/shadow`), or break out of the + /// current directory (`../runtime`). Carelessly writing to these paths + /// allows an attacker to craft a ZIP archive that will overwrite critical + /// files. + /// + /// You can use the [`ZipFile::enclosed_name`] method to validate the name + /// as a safe path. + pub fn name(&self) -> &str { + &self.data.file_name + } + + /// Get the name of the file, in the raw (internal) byte representation. + /// + /// The encoding of this data is currently undefined. + pub fn name_raw(&self) -> &[u8] { + &self.data.file_name_raw + } + + /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte, + /// removes a leading '/' and removes '..' parts. + #[deprecated( + since = "0.5.7", + note = "by stripping `..`s from the path, the meaning of paths can change. + `mangled_name` can be used if this behaviour is desirable" + )] + pub fn sanitized_name(&self) -> PathBuf { + self.mangled_name() + } + + /// Rewrite the path, ignoring any path components with special meaning. + /// + /// - Absolute paths are made relative + /// - [`ParentDir`]s are ignored + /// - Truncates the filename at a NULL byte + /// + /// This is appropriate if you need to be able to extract *something* from + /// any archive, but will easily misrepresent trivial paths like + /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, + /// [`ZipFile::enclosed_name`] is the better option in most scenarios. + /// + /// [`ParentDir`]: `Component::ParentDir` + pub fn mangled_name(&self) -> PathBuf { + self.data.file_name_sanitized() + } + + /// Ensure the file path is safe to use as a [`Path`]. + /// + /// - It can't contain NULL bytes + /// - It can't resolve to a path outside the current directory + /// > `foo/../bar` is fine, `foo/../../bar` is not. + /// - It can't be an absolute path + /// + /// This will read well-formed ZIP files correctly, and is resistant + /// to path-based exploits. It is recommended over + /// [`ZipFile::mangled_name`]. + pub fn enclosed_name(&self) -> Option { + self.data.enclosed_name() + } + + /// Get the comment of the file + pub fn comment(&self) -> &str { + &self.data.file_comment + } + + /// Get the compression method used to store the file + pub fn compression(&self) -> CompressionMethod { + self.data.compression_method + } + + /// Get the size of the file, in bytes, in the archive + pub fn compressed_size(&self) -> u64 { + self.data.compressed_size + } + + /// Get the size of the file, in bytes, when uncompressed + pub fn size(&self) -> u64 { + self.data.uncompressed_size + } + + /// Get the time the file was last modified + pub fn last_modified(&self) -> Option { + self.data.last_modified_time + } + + /// Returns whether the file is actually a directory + pub fn is_dir(&self) -> bool { + self.data.is_dir() + } + + /// Returns whether the file is actually a symbolic link + pub fn is_symlink(&self) -> bool { + self.unix_mode() + .is_some_and(|mode| mode & S_IFLNK == S_IFLNK) + } + + /// Returns whether the file is a normal file (i.e. not a directory or symlink) + pub fn is_file(&self) -> bool { + !self.is_dir() && !self.is_symlink() + } + + /// Get unix mode for the file + pub fn unix_mode(&self) -> Option { + self.data.unix_mode() + } + + /// Get the CRC32 hash of the original file + pub fn crc32(&self) -> u32 { + self.data.crc32 + } + + /// Get the extra data of the zip header for this file + pub fn extra_data(&self) -> Option<&[u8]> { + self.data.extra_field.as_deref().map(|v| v.as_ref()) + } + + /// Get the starting offset of the data of the compressed file + pub fn data_start(&self) -> u64 { + *self.data.data_start.get().unwrap() + } + + /// Get the starting offset of the zip header for this file + pub fn header_start(&self) -> u64 { + self.data.header_start + } + /// Get the starting offset of the zip header in the central directory for this file + pub fn central_header_start(&self) -> u64 { + self.data.central_header_start + } + + /// iterate through all extra fields + pub fn extra_data_fields(&self) -> impl Iterator { + self.data.extra_fields.iter() + } +} + +impl<'a> Read for ZipFile<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.get_reader()?.read(buf) + } + + fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { + self.get_reader()?.read_exact(buf) + } + + fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { + self.get_reader()?.read_to_end(buf) + } + + fn read_to_string(&mut self, buf: &mut String) -> io::Result { + self.get_reader()?.read_to_string(buf) + } +} + +impl<'a> Drop for ZipFile<'a> { + fn drop(&mut self) { + // self.data is Owned, this reader is constructed by a streaming reader. + // In this case, we want to exhaust the reader so that the next file is accessible. + if let Cow::Owned(_) = self.data { + // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped. + match &mut self.reader { + ZipFileReader::NoReader => { + let innerreader = self.crypto_reader.take(); + let _ = copy( + &mut innerreader.expect("Invalid reader state").into_inner(), + &mut sink(), + ); + } + reader => { + let innerreader = std::mem::replace(reader, ZipFileReader::NoReader); + innerreader.drain(); + } + }; + } + } +} + #[cfg(test)] mod test { use crate::result::ZipResult; - use crate::unstable::read::ArchiveEntry; use crate::write::SimpleFileOptions; use crate::CompressionMethod::Stored; use crate::{ZipArchive, ZipWriter}; diff --git a/src/unstable/read.rs b/src/unstable/read.rs index d4712468f..86f608ea6 100644 --- a/src/unstable/read.rs +++ b/src/unstable/read.rs @@ -1,7 +1,7 @@ //! Alternate implementation of [`crate::read`]. use crate::compression::CompressionMethod; -use crate::crc32::Crc32Reader; +use crate::crc32::non_crypto::Crc32Reader; use crate::extra_fields::ExtraField; use crate::read::find_data_start; use crate::result::{ZipError, ZipResult}; diff --git a/src/write.rs b/src/write.rs index 9faa9ac94..d719b0b2b 100644 --- a/src/write.rs +++ b/src/write.rs @@ -3,6 +3,7 @@ #[cfg(feature = "aes-crypto")] use crate::aes::AesWriter; use crate::compression::CompressionMethod; +use crate::read::ZipFile; use crate::read::{parse_single_extra_field, Config, ZipArchive}; use crate::result::{ZipError, ZipResult}; use crate::spec::{self, FixedSizeBlock, Zip32CDEBlock}; @@ -12,7 +13,7 @@ use crate::types::{ ffi, AesModeInfo, AesVendorVersion, DateTime, ZipFileData, ZipLocalEntryBlock, ZipRawValues, MIN_VERSION, }; -use crate::unstable::read::{find_entry_content_range, ArchiveEntry, ZipEntry}; +use crate::unstable::read::find_entry_content_range; use crate::write::ffi::S_IFLNK; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64; @@ -1168,7 +1169,7 @@ impl ZipWriter { /// This method extracts file metadata from the `source` archive, then simply performs a single /// big [`io::copy()`](io::copy) to transfer all the actual file contents without any /// decompression or decryption. This is more performant than the equivalent operation of - /// calling [`Self::copy_file()`] for each entry from the `source` archive in sequence. + /// calling [`Self::raw_copy_file()`] for each entry from the `source` archive in sequence. /// ///``` /// # fn main() -> Result<(), zip::result::ZipError> { @@ -1209,7 +1210,7 @@ impl ZipWriter { self.finish_file()?; /* Ensure we accept the file contents on faith (and avoid overwriting the data). - * See copy_file_rename(). */ + * See raw_copy_file_rename(). */ self.writing_to_file = true; self.writing_raw = true; @@ -1264,19 +1265,15 @@ impl ZipWriter { /// W: Write + Seek, /// { /// // Retrieve file entry by name - /// let file = src.by_name_raw("src_file.txt")?; + /// let file = src.by_name("src_file.txt")?; /// /// // Copy and rename the previously obtained file entry to the destination zip archive - /// dst.copy_file_rename(file, "new_name.txt")?; + /// dst.raw_copy_file_rename(file, "new_name.txt")?; /// /// Ok(()) /// } /// ``` - pub fn copy_file_rename( - &mut self, - mut file: ZipEntry<'_, impl Read>, - name: S, - ) -> ZipResult<()> + pub fn raw_copy_file_rename(&mut self, mut file: ZipFile, name: S) -> ZipResult<()> where S: Into> + ToOwned, SToOwned: Into>, @@ -1303,22 +1300,22 @@ impl ZipWriter { self.writing_to_file = true; self.writing_raw = true; - io::copy(&mut file, self)?; + io::copy(file.get_raw_reader(), self)?; Ok(()) } - /// Like `copy_file_rename`, but uses Path arguments. + /// Like `raw_copy_file_to_path`, but uses Path arguments. /// /// This function ensures that the '/' path separator is used and normalizes `.` and `..`. It /// ignores any `..` or Windows drive letter that would produce a path outside the ZIP file's /// root. - pub fn copy_file_to_path>( + pub fn raw_copy_file_to_path>( &mut self, - file: ZipEntry<'_, impl Read>, + file: ZipFile, path: P, ) -> ZipResult<()> { - self.copy_file_rename(file, path_to_string(path)) + self.raw_copy_file_rename(file, path_to_string(path)) } /// Add a new file using the already compressed data from a ZIP file being read, this allows faster @@ -1336,17 +1333,17 @@ impl ZipWriter { /// W: Write + Seek, /// { /// // Retrieve file entry by name - /// let file = src.by_name_raw("src_file.txt")?; + /// let file = src.by_name("src_file.txt")?; /// /// // Copy the previously obtained file entry to the destination zip archive - /// dst.copy_file(file)?; + /// dst.raw_copy_file(file)?; /// /// Ok(()) /// } /// ``` - pub fn copy_file(&mut self, file: ZipEntry<'_, impl Read>) -> ZipResult<()> { + pub fn raw_copy_file(&mut self, file: ZipFile) -> ZipResult<()> { let name = file.name().to_owned(); - self.copy_file_rename(file, name) + self.raw_copy_file_rename(file, name) } /// Add a directory entry. @@ -1982,7 +1979,6 @@ mod test { use crate::compression::CompressionMethod; use crate::result::ZipResult; use crate::types::DateTime; - use crate::unstable::read::ArchiveEntry; use crate::write::EncryptWith::ZipCrypto; use crate::write::SimpleFileOptions; use crate::zipcrypto::ZipCryptoKeys; diff --git a/tests/aes_encryption.rs b/tests/aes_encryption.rs old mode 100755 new mode 100644 index e605b4eda..c135914d0 --- a/tests/aes_encryption.rs +++ b/tests/aes_encryption.rs @@ -1,10 +1,7 @@ #![cfg(feature = "aes-crypto")] use std::io::{self, Read, Write}; -use zip::{ - result::ZipError, unstable::read::ArchiveEntry, write::SimpleFileOptions, AesMode, - CompressionMethod, ZipArchive, -}; +use zip::{result::ZipError, write::SimpleFileOptions, AesMode, CompressionMethod, ZipArchive}; const SECRET_CONTENT: &str = "Lorem ipsum dolor sit amet"; diff --git a/tests/deflate64.rs b/tests/deflate64.rs old mode 100755 new mode 100644 index 4989f3a91..b0cd95a95 --- a/tests/deflate64.rs +++ b/tests/deflate64.rs @@ -1,7 +1,7 @@ #![cfg(feature = "deflate64")] use std::io::{self, Read}; -use zip::{unstable::read::ArchiveEntry, ZipArchive}; +use zip::ZipArchive; #[test] fn decompress_deflate64() { diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs index 97b969421..7943a2ac3 100644 --- a/tests/end_to_end.rs +++ b/tests/end_to_end.rs @@ -2,7 +2,6 @@ use std::collections::HashSet; use std::io::prelude::*; use std::io::Cursor; use zip::result::ZipResult; -use zip::unstable::read::ArchiveEntry; use zip::unstable::LittleEndianWriteExt; use zip::write::ExtendedFileOptions; use zip::write::FileOptions; @@ -47,18 +46,18 @@ fn copy() { { let file = src_archive - .by_name_raw(ENTRY_NAME) + .by_name(ENTRY_NAME) .expect("Missing expected file"); - zip.copy_file(file).expect("Couldn't copy file"); + zip.raw_copy_file(file).expect("Couldn't copy file"); } { let file = src_archive - .by_name_raw(ENTRY_NAME) + .by_name(ENTRY_NAME) .expect("Missing expected file"); - zip.copy_file_rename(file, COPY_ENTRY_NAME) + zip.raw_copy_file_rename(file, COPY_ENTRY_NAME) .expect("Couldn't copy and rename file"); } } diff --git a/tests/lzma.rs b/tests/lzma.rs old mode 100755 new mode 100644 index 8ab2166e6..01a14a2e0 --- a/tests/lzma.rs +++ b/tests/lzma.rs @@ -1,7 +1,7 @@ #![cfg(feature = "lzma")] use std::io::{self, Read}; -use zip::{unstable::read::ArchiveEntry, ZipArchive}; +use zip::ZipArchive; #[test] fn decompress_lzma() { diff --git a/tests/xz.rs b/tests/xz.rs old mode 100755 new mode 100644 index 94881fac9..110b40859 --- a/tests/xz.rs +++ b/tests/xz.rs @@ -1,7 +1,7 @@ #![cfg(feature = "xz")] use std::io::{self, Read}; -use zip::{unstable::read::ArchiveEntry, ZipArchive}; +use zip::ZipArchive; #[test] fn decompress_xz() -> io::Result<()> { diff --git a/tests/zip64_large.rs b/tests/zip64_large.rs index 9864ee7ff..468ef198f 100644 --- a/tests/zip64_large.rs +++ b/tests/zip64_large.rs @@ -189,8 +189,6 @@ impl Read for Zip64File { #[test] fn zip64_large() { - use zip::unstable::read::ArchiveEntry; - let zipfile = Zip64File::new(); let mut archive = zip::ZipArchive::new(zipfile).unwrap(); let mut buf = [0u8; 32]; diff --git a/tests/zip_crypto.rs b/tests/zip_crypto.rs index 12f091724..4c4cc8b29 100644 --- a/tests/zip_crypto.rs +++ b/tests/zip_crypto.rs @@ -34,7 +34,6 @@ const ZIP_CRYPTO_FILE: &[u8] = &[ use std::io::Cursor; use zip::result::ZipError; -use zip::unstable::read::ArchiveEntry; #[test] fn encrypting_file() { diff --git a/tests/zip_extended_timestamp.rs b/tests/zip_extended_timestamp.rs index 58fb2953f..9657028f9 100644 --- a/tests/zip_extended_timestamp.rs +++ b/tests/zip_extended_timestamp.rs @@ -1,5 +1,5 @@ use std::io; -use zip::{unstable::read::ArchiveEntry, ZipArchive}; +use zip::ZipArchive; #[test] fn test_extended_timestamp() { From 3259c5e39b7fe872a3d2844d67a3dbc2d2ff1c0d Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 20:35:57 -0400 Subject: [PATCH 17/18] Revert "remove read_zipfile_from_stream()" This reverts commit 3abac52d1999350bb2959ac1e6ee60bc4650b7d9. --- benches/read_metadata.rs | 31 ++------ examples/stdin_info.rs | 7 +- fuzz/fuzz_targets/fuzz_read.rs | 24 ++----- src/read.rs | 76 ++++++++++++++++++-- src/read/stream.rs | 127 +++++++++++++++++++++++++++++---- src/unstable/read.rs | 1 - src/write.rs | 2 +- 7 files changed, 197 insertions(+), 71 deletions(-) mode change 100755 => 100644 fuzz/fuzz_targets/fuzz_read.rs diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index 1c11d7302..73f2b26ed 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -7,14 +7,7 @@ use bencher::Bencher; use getrandom::getrandom; use tempdir::TempDir; use zip::write::SimpleFileOptions; -use zip::{ - result::ZipResult, - unstable::{ - read::streaming::{StreamingZipEntry, ZipStreamFileMetadata}, - stream::{ZipStreamReader, ZipStreamVisitor}, - }, - CompressionMethod, ZipArchive, ZipWriter, -}; +use zip::{result::ZipResult, CompressionMethod, ZipArchive, ZipWriter}; const FILE_COUNT: usize = 15_000; const FILE_SIZE: usize = 1024; @@ -113,24 +106,12 @@ fn parse_stream_archive(bench: &mut Bencher) { let out = dir.path().join("bench-out.zip"); fs::write(&out, &bytes).unwrap(); - struct V; - impl ZipStreamVisitor for V { - fn visit_file(&mut self, _file: &mut StreamingZipEntry) -> ZipResult<()> { - Ok(()) - } - - fn visit_additional_metadata( - &mut self, - _metadata: &ZipStreamFileMetadata, - ) -> ZipResult<()> { - Ok(()) - } - } - bench.iter(|| { - let f = fs::File::open(&out).unwrap(); - let archive = ZipStreamReader::new(f); - archive.visit(&mut V).unwrap(); + let mut f = fs::File::open(&out).unwrap(); + while zip::read::read_zipfile_from_stream(&mut f) + .unwrap() + .is_some() + {} }); bench.bytes = bytes.len() as u64; } diff --git a/examples/stdin_info.rs b/examples/stdin_info.rs index 25f8f895b..a609916a0 100644 --- a/examples/stdin_info.rs +++ b/examples/stdin_info.rs @@ -1,19 +1,16 @@ use std::io::{self, Read}; -use zip::unstable::read::{streaming::StreamingArchive, ArchiveEntry}; - fn main() { std::process::exit(real_main()); } fn real_main() -> i32 { let stdin = io::stdin(); - let stdin_handle = stdin.lock(); + let mut stdin_handle = stdin.lock(); let mut buf = [0u8; 16]; - let mut archive = StreamingArchive::new(stdin_handle); loop { - match archive.next_entry() { + match zip::read::read_zipfile_from_stream(&mut stdin_handle) { Ok(Some(mut file)) => { println!( "{}: {} bytes ({} bytes packed)", diff --git a/fuzz/fuzz_targets/fuzz_read.rs b/fuzz/fuzz_targets/fuzz_read.rs old mode 100755 new mode 100644 index 4a4327528..78fe670ec --- a/fuzz/fuzz_targets/fuzz_read.rs +++ b/fuzz/fuzz_targets/fuzz_read.rs @@ -3,10 +3,7 @@ use libfuzzer_sys::fuzz_target; use std::io::{Read, Seek, SeekFrom}; use tikv_jemallocator::Jemalloc; -use zip::unstable::{ - read::streaming::{StreamingArchive, StreamingZipEntry, ZipStreamFileMetadata}, - stream::ZipStreamVisitor, -}; +use zip::read::read_zipfile_from_stream; const MAX_BYTES_TO_READ: u64 = 1 << 24; @@ -21,24 +18,11 @@ fn decompress_all(data: &[u8]) -> Result<(), Box> { let mut file = zip.by_index(i)?.take(MAX_BYTES_TO_READ); std::io::copy(&mut file, &mut std::io::sink())?; } - let mut reader = zip.into_inner(); - reader.rewind()?; - - struct V; - impl ZipStreamVisitor for V { - fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()> { - std::io::copy(&mut file, &mut std::io::sink())? - } - - fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()> { - Ok(()) - } + reader.seek(SeekFrom::Start(0))?; + while let Ok(Some(mut file)) = read_zipfile_from_stream(&mut reader) { + std::io::copy(&mut file, &mut std::io::sink())?; } - - let archive = StreamingArchive::new(reader)?; - archive.visit(&mut V)?; - Ok(()) } diff --git a/src/read.rs b/src/read.rs index 7e13066c3..9fb3555a0 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1900,6 +1900,71 @@ impl<'a> Drop for ZipFile<'a> { } } +/// Read ZipFile structures from a non-seekable reader. +/// +/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions +/// as some information will be missing when reading this manner. +/// +/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is +/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory +/// is encountered. No more files should be read after this. +/// +/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after +/// the structure is done. +/// +/// Missing fields are: +/// * `comment`: set to an empty string +/// * `data_start`: set to 0 +/// * `external_attributes`: `unix_mode()`: will return None +pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult>> { + // We can't use the typical ::parse() method, as we follow separate code paths depending on the + // "magic" value (since the magic value will be from the central directory header if we've + // finished iterating over all the actual files). + /* TODO: smallvec? */ + let mut block = [0u8; mem::size_of::()]; + reader.read_exact(&mut block)?; + let block: Box<[u8]> = block.into(); + + let signature = spec::Magic::from_first_le_bytes(&block); + + match signature { + spec::Magic::LOCAL_FILE_HEADER_SIGNATURE => (), + spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None), + _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR), + } + + let block = ZipLocalEntryBlock::interpret(&block)?; + + let mut result = ZipFileData::from_local_block(block, reader)?; + + match parse_extra_field(&mut result) { + Ok(..) | Err(ZipError::Io(..)) => {} + Err(e) => return Err(e), + } + + let limit_reader = (reader as &'a mut dyn Read).take(result.compressed_size); + + let result_crc32 = result.crc32; + let result_compression_method = result.compression_method; + let crypto_reader = make_crypto_reader( + result_compression_method, + result_crc32, + result.last_modified_time, + result.using_data_descriptor, + limit_reader, + None, + None, + #[cfg(feature = "aes-crypto")] + result.compressed_size, + )?; + + Ok(Some(ZipFile { + data: Cow::Owned(result), + crypto_reader: None, + reader: make_reader(result_compression_method, result_crc32, crypto_reader)?, + })) +} + #[cfg(test)] mod test { use crate::result::ZipResult; @@ -1952,13 +2017,16 @@ mod test { #[test] fn zip_read_streaming() { - use crate::unstable::read::streaming::StreamingArchive; + use super::read_zipfile_from_stream; let mut v = Vec::new(); v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip")); - let reader = Cursor::new(v); - let mut archive = StreamingArchive::new(reader); - while archive.next_entry().unwrap().is_some() {} + let mut reader = Cursor::new(v); + loop { + if read_zipfile_from_stream(&mut reader).unwrap().is_none() { + break; + } + } } #[test] diff --git a/src/read/stream.rs b/src/read/stream.rs index c6c27aca4..7fb76e70c 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -1,33 +1,50 @@ use std::fs; use std::io::{self, Read}; -use std::path::Path; +use std::path::{Path, PathBuf}; -use super::{ZipError, ZipResult}; -use crate::unstable::read::{ - streaming::{StreamingArchive, StreamingZipEntry, ZipStreamFileMetadata}, - ArchiveEntry, +use super::{ + central_header_to_zip_file_inner, read_zipfile_from_stream, ZipCentralEntryBlock, ZipError, + ZipFile, ZipFileData, ZipResult, }; +use crate::spec::FixedSizeBlock; /// Stream decoder for zip. #[derive(Debug)] -pub struct ZipStreamReader(StreamingArchive); +pub struct ZipStreamReader(R); impl ZipStreamReader { /// Create a new ZipStreamReader pub const fn new(reader: R) -> Self { - Self(StreamingArchive::new(reader)) + Self(reader) } } impl ZipStreamReader { + fn parse_central_directory(&mut self) -> ZipResult { + // Give archive_offset and central_header_start dummy value 0, since + // they are not used in the output. + let archive_offset = 0; + let central_header_start = 0; + + // Parse central header + let block = ZipCentralEntryBlock::parse(&mut self.0)?; + let file = central_header_to_zip_file_inner( + &mut self.0, + archive_offset, + central_header_start, + block, + )?; + Ok(ZipStreamFileMetadata(file)) + } + /// Iterate over the stream and extract all file and their /// metadata. pub fn visit(mut self, visitor: &mut V) -> ZipResult<()> { - while let Some(mut file) = self.0.next_entry()? { + while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? { visitor.visit_file(&mut file)?; } - while let Some(metadata) = self.0.next_metadata_entry()? { + while let Ok(metadata) = self.parse_central_directory() { visitor.visit_additional_metadata(&metadata)?; } @@ -42,7 +59,7 @@ impl ZipStreamReader { pub fn extract>(self, directory: P) -> ZipResult<()> { struct Extractor<'a>(&'a Path); impl ZipStreamVisitor for Extractor<'_> { - fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()> { + fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { let filepath = file .enclosed_name() .ok_or(ZipError::InvalidArchive("Invalid file path"))?; @@ -96,7 +113,7 @@ pub trait ZipStreamVisitor { /// - `comment`: set to an empty string /// - `data_start`: set to 0 /// - `external_attributes`: `unix_mode()`: will return None - fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()>; + fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>; /// This function is guranteed to be called after all `visit_file`s. /// @@ -104,6 +121,86 @@ pub trait ZipStreamVisitor { fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>; } +/// Additional metadata for the file. +#[derive(Debug)] +pub struct ZipStreamFileMetadata(ZipFileData); + +impl ZipStreamFileMetadata { + /// Get the name of the file + /// + /// # Warnings + /// + /// It is dangerous to use this name directly when extracting an archive. + /// It may contain an absolute path (`/etc/shadow`), or break out of the + /// current directory (`../runtime`). Carelessly writing to these paths + /// allows an attacker to craft a ZIP archive that will overwrite critical + /// files. + /// + /// You can use the [`ZipFile::enclosed_name`] method to validate the name + /// as a safe path. + pub fn name(&self) -> &str { + &self.0.file_name + } + + /// Get the name of the file, in the raw (internal) byte representation. + /// + /// The encoding of this data is currently undefined. + pub fn name_raw(&self) -> &[u8] { + &self.0.file_name_raw + } + + /// Rewrite the path, ignoring any path components with special meaning. + /// + /// - Absolute paths are made relative + /// - [std::path::Component::ParentDir]s are ignored + /// - Truncates the filename at a NULL byte + /// + /// This is appropriate if you need to be able to extract *something* from + /// any archive, but will easily misrepresent trivial paths like + /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, + /// [`ZipFile::enclosed_name`] is the better option in most scenarios. + pub fn mangled_name(&self) -> PathBuf { + self.0.file_name_sanitized() + } + + /// Ensure the file path is safe to use as a [`Path`]. + /// + /// - It can't contain NULL bytes + /// - It can't resolve to a path outside the current directory + /// > `foo/../bar` is fine, `foo/../../bar` is not. + /// - It can't be an absolute path + /// + /// This will read well-formed ZIP files correctly, and is resistant + /// to path-based exploits. It is recommended over + /// [`ZipFile::mangled_name`]. + pub fn enclosed_name(&self) -> Option { + self.0.enclosed_name() + } + + /// Returns whether the file is actually a directory + pub fn is_dir(&self) -> bool { + self.name() + .chars() + .next_back() + .map_or(false, |c| c == '/' || c == '\\') + } + + /// Returns whether the file is a regular file + pub fn is_file(&self) -> bool { + !self.is_dir() + } + + /// Get the comment of the file + pub fn comment(&self) -> &str { + &self.0.file_comment + } + + /// Get unix mode for the file + pub const fn unix_mode(&self) -> Option { + self.0.unix_mode() + } +} + #[cfg(test)] mod test { use super::*; @@ -111,7 +208,7 @@ mod test { struct DummyVisitor; impl ZipStreamVisitor for DummyVisitor { - fn visit_file(&mut self, _file: &mut StreamingZipEntry) -> ZipResult<()> { + fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { Ok(()) } @@ -127,7 +224,7 @@ mod test { #[derive(Default, Debug, Eq, PartialEq)] struct CounterVisitor(u64, u64); impl ZipStreamVisitor for CounterVisitor { - fn visit_file(&mut self, _file: &mut StreamingZipEntry) -> ZipResult<()> { + fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { self.0 += 1; Ok(()) } @@ -170,7 +267,7 @@ mod test { filenames: BTreeSet>, } impl ZipStreamVisitor for V { - fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()> { + fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { if file.is_file() { self.filenames.insert(file.name().into()); } @@ -207,7 +304,7 @@ mod test { filenames: BTreeSet>, } impl ZipStreamVisitor for V { - fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()> { + fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { let full_name = file.enclosed_name().unwrap(); let file_name = full_name.file_name().unwrap().to_str().unwrap(); assert!( diff --git a/src/unstable/read.rs b/src/unstable/read.rs index 86f608ea6..a4d5e4092 100644 --- a/src/unstable/read.rs +++ b/src/unstable/read.rs @@ -487,7 +487,6 @@ pub mod streaming { use std::mem; use std::ops; - #[derive(Debug)] pub struct StreamingArchive { reader: R, remaining_before_next_entry: u64, diff --git a/src/write.rs b/src/write.rs index d719b0b2b..aab92327f 100644 --- a/src/write.rs +++ b/src/write.rs @@ -13,7 +13,7 @@ use crate::types::{ ffi, AesModeInfo, AesVendorVersion, DateTime, ZipFileData, ZipLocalEntryBlock, ZipRawValues, MIN_VERSION, }; -use crate::unstable::read::find_entry_content_range; +use crate::unstable::read::{find_entry_content_range, ArchiveEntry, ZipEntry}; use crate::write::ffi::S_IFLNK; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64; From bbee45bcbd218bf5a6819804113ffc87f66cf8f7 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 20:36:59 -0400 Subject: [PATCH 18/18] remove unused --- src/crc32.rs | 4 ---- src/write.rs | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/crc32.rs b/src/crc32.rs index 878b2ee61..73ce52292 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -116,10 +116,6 @@ pub(crate) mod non_crypto { Err("Invalid checksum") } } - - pub fn into_inner(self) -> R { - self.inner - } } impl Read for Crc32Reader { diff --git a/src/write.rs b/src/write.rs index aab92327f..d719b0b2b 100644 --- a/src/write.rs +++ b/src/write.rs @@ -13,7 +13,7 @@ use crate::types::{ ffi, AesModeInfo, AesVendorVersion, DateTime, ZipFileData, ZipLocalEntryBlock, ZipRawValues, MIN_VERSION, }; -use crate::unstable::read::{find_entry_content_range, ArchiveEntry, ZipEntry}; +use crate::unstable::read::find_entry_content_range; use crate::write::ffi::S_IFLNK; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64;