diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b283bcc9..ca8f3e9c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## [2.1.5](https://github.com/zip-rs/zip2/compare/v2.1.4...v2.1.5) - 2024-07-20 + +### 🚜 Refactor +- change invalid_state() return type to io::Result + ## [2.1.4](https://github.com/zip-rs/zip2/compare/v2.1.3...v2.1.4) - 2024-07-18 ### 🐛 Bug Fixes diff --git a/Cargo.toml b/Cargo.toml index dabe22c8c..c3160ac2f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "zip" -version = "2.1.4" +version = "2.1.5" authors = [ "Mathijs van de Nes ", "Marli Frost ", diff --git a/fuzz/fuzz_targets/fuzz_write.rs b/fuzz/fuzz_targets/fuzz_write.rs index fa281a4ff..20f31e8a9 100755 --- a/fuzz/fuzz_targets/fuzz_write.rs +++ b/fuzz/fuzz_targets/fuzz_write.rs @@ -1,14 +1,18 @@ #![no_main] use arbitrary::Arbitrary; -use core::fmt::{Debug, Formatter}; +use core::fmt::{Debug}; use libfuzzer_sys::fuzz_target; use replace_with::replace_with_or_abort; use std::borrow::Cow; -use std::io::{Cursor, Read, Seek, Write}; +use std::fmt::{Arguments, Formatter, Write}; +use std::io::Cursor; +use std::io::Write as IoWrite; use std::path::PathBuf; use tikv_jemallocator::Jemalloc; +use zip::result::{ZipError, ZipResult}; use zip::unstable::path_to_string; +use zip::write::FullFileOptions; #[global_allocator] static GLOBAL: Jemalloc = Jemalloc; @@ -17,12 +21,12 @@ static GLOBAL: Jemalloc = Jemalloc; pub enum BasicFileOperation<'k> { WriteNormalFile { contents: Box<[Box<[u8]>]>, - options: zip::write::FullFileOptions<'k>, + options: FullFileOptions<'k>, }, - WriteDirectory(zip::write::FullFileOptions<'k>), + WriteDirectory(FullFileOptions<'k>), WriteSymlinkWithTarget { target: PathBuf, - options: zip::write::FullFileOptions<'k>, + options: FullFileOptions<'k>, }, ShallowCopy(Box>), DeepCopy(Box>), @@ -61,123 +65,12 @@ impl<'k> FileOperation<'k> { } } -impl<'k> Debug for FileOperation<'k> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match &self.basic { - BasicFileOperation::WriteNormalFile { contents, options } => { - f.write_fmt(format_args!( - "let options = {:?};\n\ - writer.start_file_from_path({:?}, options)?;\n", - options, self.path - ))?; - for content_slice in contents { - f.write_fmt(format_args!("writer.write_all(&({:?}))?;\n", content_slice))?; - } - } - BasicFileOperation::WriteDirectory(options) => { - f.write_fmt(format_args!( - "let options = {:?};\n\ - writer.add_directory_from_path({:?}, options)?;\n", - options, self.path - ))?; - } - BasicFileOperation::WriteSymlinkWithTarget { target, options } => { - f.write_fmt(format_args!( - "let options = {:?};\n\ - writer.add_symlink_from_path({:?}, {:?}, options)?;\n", - options, - self.path, - target.to_owned() - ))?; - } - BasicFileOperation::ShallowCopy(base) => { - let Some(base_path) = base.get_path() else { - return Ok(()); - }; - f.write_fmt(format_args!( - "{:?}writer.shallow_copy_file_from_path({:?}, {:?})?;\n", - base, base_path, self.path - ))?; - } - BasicFileOperation::DeepCopy(base) => { - let Some(base_path) = base.get_path() else { - return Ok(()); - }; - f.write_fmt(format_args!( - "{:?}writer.deep_copy_file_from_path({:?}, {:?})?;\n", - base, base_path, self.path - ))?; - } - BasicFileOperation::MergeWithOtherFile { operations } => { - f.write_str( - "let sub_writer = {\n\ - let mut writer = ZipWriter::new(Cursor::new(Vec::new()));\n\ - writer.set_flush_on_finish_file(false);\n", - )?; - operations - .iter() - .map(|op| { - f.write_fmt(format_args!("{:?}", op.0))?; - if op.1 { - f.write_str("writer.abort_file()?;\n") - } else { - Ok(()) - } - }) - .collect::>()?; - f.write_str( - "writer\n\ - };\n\ - writer.merge_archive(sub_writer.finish_into_readable()?)?;\n", - )?; - } - BasicFileOperation::SetArchiveComment(comment) => { - f.write_fmt(format_args!( - "writer.set_raw_comment({:?}.into());\n", - comment - ))?; - } - } - match &self.reopen { - ReopenOption::DoNotReopen => Ok(()), - ReopenOption::ViaFinish => { - f.write_str("writer = ZipWriter::new_append(writer.finish()?)?;\n") - } - ReopenOption::ViaFinishIntoReadable => f.write_str( - "writer = ZipWriter::new_append(writer.finish_into_readable()?.into_inner())?;\n", - ), - } - } -} - #[derive(Arbitrary, Clone)] pub struct FuzzTestCase<'k> { operations: Box<[(FileOperation<'k>, bool)]>, flush_on_finish_file: bool, } -impl<'k> Debug for FuzzTestCase<'k> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( - "let mut writer = ZipWriter::new(Cursor::new(Vec::new()));\n\ - writer.set_flush_on_finish_file({:?});\n", - self.flush_on_finish_file - ))?; - self.operations - .iter() - .map(|op| { - f.write_fmt(format_args!("{:?}", op.0))?; - if op.1 { - f.write_str("writer.abort_file()?;\n") - } else { - Ok(()) - } - }) - .collect::>()?; - f.write_str("writer\n") - } -} - fn deduplicate_paths(copy: &mut Cow, original: &PathBuf) { if path_to_string(&**copy) == path_to_string(original) { let new_path = match original.file_name() { @@ -192,16 +85,15 @@ fn deduplicate_paths(copy: &mut Cow, original: &PathBuf) { } } -fn do_operation<'k, T>( - writer: &mut zip::ZipWriter, +fn do_operation<'k>( + writer: &mut zip::ZipWriter>>, operation: &FileOperation<'k>, abort: bool, flush_on_finish_file: bool, files_added: &mut usize, -) -> Result<(), Box> -where - T: Read + Write + Seek, -{ + stringifier: &mut impl Write, + panic_on_error: bool +) -> Result<(), Box> { writer.set_flush_on_finish_file(flush_on_finish_file); let mut path = Cow::Borrowed(&operation.path); match &operation.basic { @@ -213,17 +105,25 @@ where if uncompressed_size >= u32::MAX as usize { options = options.large_file(true); } + if options == FullFileOptions::default() { + writeln!(stringifier, "writer.start_file_from_path({:?}, Default::default())?;", path)?; + } else { + writeln!(stringifier, "writer.start_file_from_path({:?}, {:?})?;", path, options)?; + } writer.start_file_from_path(&*path, options)?; for chunk in contents.iter() { + writeln!(stringifier, "writer.write_all(&{:?})?;", chunk)?; writer.write_all(&chunk)?; } *files_added += 1; } BasicFileOperation::WriteDirectory(options) => { + writeln!(stringifier, "writer.add_directory_from_path(&{:?}, {:?})?;", path, options)?; writer.add_directory_from_path(&*path, options.to_owned())?; *files_added += 1; } BasicFileOperation::WriteSymlinkWithTarget { target, options } => { + writeln!(stringifier, "writer.add_symlink_from_path(&{:?}, {:?}, {:?});", path, target, options)?; writer.add_symlink_from_path(&*path, target, options.to_owned())?; *files_added += 1; } @@ -232,7 +132,8 @@ where return Ok(()); }; deduplicate_paths(&mut path, &base_path); - do_operation(writer, &base, false, flush_on_finish_file, files_added)?; + do_operation(writer, &base, false, flush_on_finish_file, files_added, stringifier, panic_on_error)?; + writeln!(stringifier, "writer.shallow_copy_file_from_path({:?}, {:?});", base_path, path)?; writer.shallow_copy_file_from_path(&*base_path, &*path)?; *files_added += 1; } @@ -241,13 +142,16 @@ where return Ok(()); }; deduplicate_paths(&mut path, &base_path); - do_operation(writer, &base, false, flush_on_finish_file, files_added)?; + do_operation(writer, &base, false, flush_on_finish_file, files_added, stringifier, panic_on_error)?; + writeln!(stringifier, "writer.deep_copy_file_from_path({:?}, {:?});", base_path, path)?; writer.deep_copy_file_from_path(&*base_path, &*path)?; *files_added += 1; } BasicFileOperation::MergeWithOtherFile { operations } => { let mut other_writer = zip::ZipWriter::new(Cursor::new(Vec::new())); let mut inner_files_added = 0; + writeln!(stringifier, + "let sub_writer = {{\nlet mut writer = ZipWriter::new(Cursor::new(Vec::new()));")?; operations.iter().for_each(|(operation, abort)| { let _ = do_operation( &mut other_writer, @@ -255,16 +159,21 @@ where *abort, false, &mut inner_files_added, + stringifier, + panic_on_error ); }); + writeln!(stringifier, "writer\n}};\nwriter.merge_archive(sub_writer.finish_into_readable()?)?;")?; writer.merge_archive(other_writer.finish_into_readable()?)?; *files_added += inner_files_added; } BasicFileOperation::SetArchiveComment(comment) => { + writeln!(stringifier, "writer.set_raw_comment({:?})?;", comment)?; writer.set_raw_comment(comment.clone()); } } if abort && *files_added != 0 { + writeln!(stringifier, "writer.abort_file()?;")?; writer.abort_file()?; *files_added -= 1; } @@ -272,19 +181,39 @@ where // comment, then there will be junk after the new comment that we can't get rid of. Thus, we // can only check that the expected is a prefix of the actual match operation.reopen { - ReopenOption::DoNotReopen => return Ok(()), + ReopenOption::DoNotReopen => { + writeln!(stringifier, "writer")?; + return Ok(()) + }, ReopenOption::ViaFinish => { let old_comment = writer.get_raw_comment().to_owned(); - replace_with_or_abort(writer, |old_writer: zip::ZipWriter| { - zip::ZipWriter::new_append(old_writer.finish().unwrap()).unwrap() + writeln!(stringifier, "let mut writer = ZipWriter::new_append(writer.finish()?)?;")?; + replace_with_or_abort(writer, |old_writer: zip::ZipWriter>>| { + (|| -> ZipResult>>> { + zip::ZipWriter::new_append(old_writer.finish()?) + })().unwrap_or_else(|_| { + if panic_on_error { + panic!("Failed to create new ZipWriter") + } + zip::ZipWriter::new(Cursor::new(Vec::new())) + }) }); - assert!(writer.get_raw_comment().starts_with(&old_comment)); + if panic_on_error { + assert!(writer.get_raw_comment().starts_with(&old_comment)); + } } ReopenOption::ViaFinishIntoReadable => { let old_comment = writer.get_raw_comment().to_owned(); - replace_with_or_abort(writer, |old_writer: zip::ZipWriter| { - zip::ZipWriter::new_append(old_writer.finish_into_readable().unwrap().into_inner()) - .unwrap() + writeln!(stringifier, "let mut writer = ZipWriter::new_append(writer.finish()?)?;")?; + replace_with_or_abort(writer, |old_writer| { + (|| -> ZipResult>>> { + zip::ZipWriter::new_append(old_writer.finish()?) + })().unwrap_or_else(|_| { + if panic_on_error { + panic!("Failed to create new ZipWriter") + } + zip::ZipWriter::new(Cursor::new(Vec::new())) + }) }); assert!(writer.get_raw_comment().starts_with(&old_comment)); } @@ -292,27 +221,63 @@ where Ok(()) } -fuzz_target!(|test_case: FuzzTestCase| { - let mut files_added = 0; - let mut writer = zip::ZipWriter::new(Cursor::new(Vec::new())); - let mut final_reopen = false; - if let Some((last_op, _)) = test_case.operations.last() { - if last_op.reopen != ReopenOption::ViaFinishIntoReadable { - final_reopen = true; +impl <'k> FuzzTestCase<'k> { + fn execute(&self, stringifier: &mut impl Write, panic_on_error: bool) -> ZipResult<()> { + let mut files_added = 0; + let mut writer = zip::ZipWriter::new(Cursor::new(Vec::new())); + let mut final_reopen = false; + if let Some((last_op, _)) = self.operations.last() { + if last_op.reopen != ReopenOption::ViaFinishIntoReadable { + final_reopen = true; + } + } + #[allow(unknown_lints)] + #[allow(boxed_slice_into_iter)] + for (operation, abort) in self.operations.iter() { + let _ = do_operation( + &mut writer, + &operation, + *abort, + self.flush_on_finish_file, + &mut files_added, + stringifier, + panic_on_error + ); } + if final_reopen { + writeln!(stringifier, "let _ = writer.finish_into_readable()?;") + .map_err(|_| ZipError::InvalidArchive(""))?; + let _ = writer.finish_into_readable()?; + } + Ok(()) } - #[allow(unknown_lints)] - #[allow(boxed_slice_into_iter)] - for (operation, abort) in test_case.operations.into_iter() { - let _ = do_operation( - &mut writer, - &operation, - *abort, - test_case.flush_on_finish_file, - &mut files_added, - ); +} + +impl <'k> Debug for FuzzTestCase<'k> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + writeln!(f, "let mut writer = ZipWriter::new(Cursor::new(Vec::new()));")?; + let _ = self.execute(f, false); + Ok(()) } - if final_reopen { - let _ = writer.finish_into_readable().unwrap(); +} + +#[derive(Default, Eq, PartialEq)] +struct NoopWrite {} + +impl Write for NoopWrite { + fn write_str(&mut self, _: &str) -> std::fmt::Result { + Ok(()) } + + fn write_char(&mut self, _: char) -> std::fmt::Result { + Ok(()) + } + + fn write_fmt(&mut self, _: Arguments<'_>) -> std::fmt::Result { + Ok(()) + } +} + +fuzz_target!(|test_case: FuzzTestCase| { + test_case.execute(&mut NoopWrite::default(), true).unwrap() }); diff --git a/pull_request_template.md b/pull_request_template.md index 329cb7f05..9b3e77c17 100644 --- a/pull_request_template.md +++ b/pull_request_template.md @@ -30,8 +30,9 @@ These are our requirements for PRs, in addition to the usual functionality and r - `cargo fmt --check --all` must pass. - If the above checks force you to add a new `#[allow]` attribute, please place a comment on the same line or just above it, explaining what the exception applies to and why it's needed. -- Commit messages and the PR title must conform to [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) and start +- The PR title must conform to [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) and start with one of the types specified by the [Angular convention](https://github.com/angular/angular/blob/22b96b9/CONTRIBUTING.md#type). + This is also recommended for commit messages; but it's not required, because they'll be replaced when the PR is squash-merged. Thanks in advance for submitting a bug fix or proposed feature that meets these requirements! --> diff --git a/src/compression.rs b/src/compression.rs index 8fe45d1bf..1e3178f34 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -1,6 +1,6 @@ //! Possible ZIP compression methods. -use std::fmt; +use std::{fmt, io}; #[allow(deprecated)] /// Identifies the storage format used to compress a file within a ZIP archive. @@ -193,6 +193,92 @@ pub const SUPPORTED_COMPRESSION_METHODS: &[CompressionMethod] = &[ CompressionMethod::Xz, ]; +pub(crate) enum Decompressor { + Stored(R), + #[cfg(feature = "_deflate-any")] + Deflated(flate2::bufread::DeflateDecoder), + #[cfg(feature = "deflate64")] + Deflate64(deflate64::Deflate64Decoder), + #[cfg(feature = "bzip2")] + Bzip2(bzip2::bufread::BzDecoder), + #[cfg(feature = "zstd")] + Zstd(zstd::Decoder<'static, R>), + #[cfg(feature = "lzma")] + Lzma(Box>), + #[cfg(feature = "xz")] + Xz(crate::read::xz::XzDecoder), +} + +impl io::Read for Decompressor { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + Decompressor::Stored(r) => r.read(buf), + #[cfg(feature = "_deflate-any")] + Decompressor::Deflated(r) => r.read(buf), + #[cfg(feature = "deflate64")] + Decompressor::Deflate64(r) => r.read(buf), + #[cfg(feature = "bzip2")] + Decompressor::Bzip2(r) => r.read(buf), + #[cfg(feature = "zstd")] + Decompressor::Zstd(r) => r.read(buf), + #[cfg(feature = "lzma")] + Decompressor::Lzma(r) => r.read(buf), + #[cfg(feature = "xz")] + Decompressor::Xz(r) => r.read(buf), + } + } +} + +impl Decompressor { + pub fn new(reader: R, compression_method: CompressionMethod) -> crate::result::ZipResult { + Ok(match compression_method { + CompressionMethod::Stored => Decompressor::Stored(reader), + #[cfg(feature = "_deflate-any")] + CompressionMethod::Deflated => { + Decompressor::Deflated(flate2::bufread::DeflateDecoder::new(reader)) + } + #[cfg(feature = "deflate64")] + CompressionMethod::Deflate64 => { + Decompressor::Deflate64(deflate64::Deflate64Decoder::with_buffer(reader)) + } + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2 => Decompressor::Bzip2(bzip2::bufread::BzDecoder::new(reader)), + #[cfg(feature = "zstd")] + CompressionMethod::Zstd => Decompressor::Zstd(zstd::Decoder::with_buffer(reader)?), + #[cfg(feature = "lzma")] + CompressionMethod::Lzma => { + Decompressor::Lzma(Box::new(crate::read::lzma::LzmaDecoder::new(reader))) + } + #[cfg(feature = "xz")] + CompressionMethod::Xz => Decompressor::Xz(crate::read::xz::XzDecoder::new(reader)), + _ => { + return Err(crate::result::ZipError::UnsupportedArchive( + "Compression method not supported", + )) + } + }) + } + + /// Consumes this decoder, returning the underlying reader. + pub fn into_inner(self) -> R { + match self { + Decompressor::Stored(r) => r, + #[cfg(feature = "_deflate-any")] + Decompressor::Deflated(r) => r.into_inner(), + #[cfg(feature = "deflate64")] + Decompressor::Deflate64(r) => r.into_inner(), + #[cfg(feature = "bzip2")] + Decompressor::Bzip2(r) => r.into_inner(), + #[cfg(feature = "zstd")] + Decompressor::Zstd(r) => r.finish(), + #[cfg(feature = "lzma")] + Decompressor::Lzma(r) => r.into_inner(), + #[cfg(feature = "xz")] + Decompressor::Xz(r) => r.into_inner(), + } + } +} + #[cfg(test)] mod test { use super::{CompressionMethod, SUPPORTED_COMPRESSION_METHODS}; diff --git a/src/lib.rs b/src/lib.rs index a5f6b2bf5..130333084 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,6 +32,7 @@ pub use crate::compression::{CompressionMethod, SUPPORTED_COMPRESSION_METHODS}; pub use crate::read::HasZipMetadata; pub use crate::read::ZipArchive; +pub use crate::spec::{ZIP64_BYTES_THR, ZIP64_ENTRY_THR}; pub use crate::types::{AesMode, DateTime}; pub use crate::write::ZipWriter; diff --git a/src/read.rs b/src/read.rs index 8fb33f09b..95d20935e 100644 --- a/src/read.rs +++ b/src/read.rs @@ -2,7 +2,7 @@ #[cfg(feature = "aes-crypto")] use crate::aes::{AesReader, AesReaderValid}; -use crate::compression::CompressionMethod; +use crate::compression::{CompressionMethod, Decompressor}; use crate::cp437::FromCp437; use crate::crc32::Crc32Reader; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; @@ -26,18 +26,6 @@ use std::path::{Path, PathBuf}; use std::rc::Rc; use std::sync::{Arc, OnceLock}; -#[cfg(feature = "deflate-flate2")] -use flate2::read::DeflateDecoder; - -#[cfg(feature = "deflate64")] -use deflate64::Deflate64Decoder; - -#[cfg(feature = "bzip2")] -use bzip2::read::BzDecoder; - -#[cfg(feature = "zstd")] -use zstd::stream::read::Decoder as ZstdDecoder; - mod config; pub use config::*; @@ -124,11 +112,7 @@ pub(crate) mod zip_archive { #[cfg(feature = "aes-crypto")] use crate::aes::PWD_VERIFY_LENGTH; use crate::extra_fields::UnicodeExtraField; -#[cfg(feature = "lzma")] -use crate::read::lzma::LzmaDecoder; -#[cfg(feature = "xz")] -use crate::read::xz::XzDecoder; -use crate::result::ZipError::{InvalidArchive, InvalidPassword, UnsupportedArchive}; +use crate::result::ZipError::{InvalidArchive, InvalidPassword}; use crate::spec::is_dir; use crate::types::ffi::S_IFLNK; use crate::unstable::{path_to_string, LittleEndianReadExt}; @@ -200,141 +184,69 @@ impl<'a> CryptoReader<'a> { } } +#[cold] +fn invalid_state() -> io::Result { + Err(io::Error::new( + io::ErrorKind::Other, + "ZipFileReader was in an invalid state", + )) +} + pub(crate) enum ZipFileReader<'a> { NoReader, Raw(io::Take<&'a mut dyn Read>), - Stored(Crc32Reader>), - #[cfg(feature = "_deflate-any")] - Deflated(Crc32Reader>>), - #[cfg(feature = "deflate64")] - Deflate64(Crc32Reader>>>), - #[cfg(feature = "bzip2")] - Bzip2(Crc32Reader>>), - #[cfg(feature = "zstd")] - Zstd(Crc32Reader>>>), - #[cfg(feature = "lzma")] - Lzma(Crc32Reader>>>), - #[cfg(feature = "xz")] - Xz(Crc32Reader>>), + Compressed(Box>>>>), } impl<'a> Read for ZipFileReader<'a> { fn read(&mut self, buf: &mut [u8]) -> io::Result { match self { - ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::NoReader => invalid_state(), ZipFileReader::Raw(r) => r.read(buf), - ZipFileReader::Stored(r) => r.read(buf), - #[cfg(feature = "_deflate-any")] - ZipFileReader::Deflated(r) => r.read(buf), - #[cfg(feature = "deflate64")] - ZipFileReader::Deflate64(r) => r.read(buf), - #[cfg(feature = "bzip2")] - ZipFileReader::Bzip2(r) => r.read(buf), - #[cfg(feature = "zstd")] - ZipFileReader::Zstd(r) => r.read(buf), - #[cfg(feature = "lzma")] - ZipFileReader::Lzma(r) => r.read(buf), - #[cfg(feature = "xz")] - ZipFileReader::Xz(r) => r.read(buf), + ZipFileReader::Compressed(r) => r.read(buf), } } fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { match self { - ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::NoReader => invalid_state(), ZipFileReader::Raw(r) => r.read_exact(buf), - ZipFileReader::Stored(r) => r.read_exact(buf), - #[cfg(feature = "_deflate-any")] - ZipFileReader::Deflated(r) => r.read_exact(buf), - #[cfg(feature = "deflate64")] - ZipFileReader::Deflate64(r) => r.read_exact(buf), - #[cfg(feature = "bzip2")] - ZipFileReader::Bzip2(r) => r.read_exact(buf), - #[cfg(feature = "zstd")] - ZipFileReader::Zstd(r) => r.read_exact(buf), - #[cfg(feature = "lzma")] - ZipFileReader::Lzma(r) => r.read_exact(buf), - #[cfg(feature = "xz")] - ZipFileReader::Xz(r) => r.read_exact(buf), + ZipFileReader::Compressed(r) => r.read_exact(buf), } } fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { match self { - ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::NoReader => invalid_state(), ZipFileReader::Raw(r) => r.read_to_end(buf), - ZipFileReader::Stored(r) => r.read_to_end(buf), - #[cfg(feature = "_deflate-any")] - ZipFileReader::Deflated(r) => r.read_to_end(buf), - #[cfg(feature = "deflate64")] - ZipFileReader::Deflate64(r) => r.read_to_end(buf), - #[cfg(feature = "bzip2")] - ZipFileReader::Bzip2(r) => r.read_to_end(buf), - #[cfg(feature = "zstd")] - ZipFileReader::Zstd(r) => r.read_to_end(buf), - #[cfg(feature = "lzma")] - ZipFileReader::Lzma(r) => r.read_to_end(buf), - #[cfg(feature = "xz")] - ZipFileReader::Xz(r) => r.read_to_end(buf), + ZipFileReader::Compressed(r) => r.read_to_end(buf), } } fn read_to_string(&mut self, buf: &mut String) -> io::Result { match self { - ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::NoReader => invalid_state(), ZipFileReader::Raw(r) => r.read_to_string(buf), - ZipFileReader::Stored(r) => r.read_to_string(buf), - #[cfg(feature = "_deflate-any")] - ZipFileReader::Deflated(r) => r.read_to_string(buf), - #[cfg(feature = "deflate64")] - ZipFileReader::Deflate64(r) => r.read_to_string(buf), - #[cfg(feature = "bzip2")] - ZipFileReader::Bzip2(r) => r.read_to_string(buf), - #[cfg(feature = "zstd")] - ZipFileReader::Zstd(r) => r.read_to_string(buf), - #[cfg(feature = "lzma")] - ZipFileReader::Lzma(r) => r.read_to_string(buf), - #[cfg(feature = "xz")] - ZipFileReader::Xz(r) => r.read_to_string(buf), + ZipFileReader::Compressed(r) => r.read_to_string(buf), } } } impl<'a> ZipFileReader<'a> { - /// Consumes this decoder, returning the underlying reader. - pub fn drain(self) { - let mut inner = match self { - ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), - ZipFileReader::Raw(r) => r, - ZipFileReader::Stored(r) => r.into_inner().into_inner(), - #[cfg(feature = "_deflate-any")] - ZipFileReader::Deflated(r) => r.into_inner().into_inner().into_inner(), - #[cfg(feature = "deflate64")] - ZipFileReader::Deflate64(r) => r.into_inner().into_inner().into_inner().into_inner(), - #[cfg(feature = "bzip2")] - ZipFileReader::Bzip2(r) => r.into_inner().into_inner().into_inner(), - #[cfg(feature = "zstd")] - ZipFileReader::Zstd(r) => r.into_inner().finish().into_inner().into_inner(), - #[cfg(feature = "lzma")] - ZipFileReader::Lzma(r) => { - // Lzma reader owns its buffer rather than mutably borrowing it, so we have to drop - // it separately - if let Ok(mut remaining) = r.into_inner().finish() { - let _ = copy(&mut remaining, &mut sink()); - } - return; + fn into_inner(self) -> io::Result> { + match self { + ZipFileReader::NoReader => invalid_state(), + ZipFileReader::Raw(r) => Ok(r), + ZipFileReader::Compressed(r) => { + Ok(r.into_inner().into_inner().into_inner().into_inner()) } - #[cfg(feature = "xz")] - ZipFileReader::Xz(r) => r.into_inner().into_inner().into_inner(), - }; - let _ = copy(&mut inner, &mut sink()); + } } } /// A struct for reading a zip file pub struct ZipFile<'a> { pub(crate) data: Cow<'a, ZipFileData>, - pub(crate) crypto_reader: Option>, pub(crate) reader: ZipFileReader<'a>, } @@ -459,18 +371,14 @@ fn find_data_start( #[allow(clippy::too_many_arguments)] pub(crate) fn make_crypto_reader<'a>( - compression_method: CompressionMethod, - crc32: u32, - mut last_modified_time: Option, - using_data_descriptor: bool, + data: &ZipFileData, reader: io::Take<&'a mut dyn Read>, password: Option<&[u8]>, aes_info: Option<(AesMode, AesVendorVersion, CompressionMethod)>, - #[cfg(feature = "aes-crypto")] compressed_size: u64, ) -> ZipResult> { #[allow(deprecated)] { - if let CompressionMethod::Unsupported(_) = compression_method { + if let CompressionMethod::Unsupported(_) = data.compression_method { return unsupported_zip_error("Compression method not supported"); } } @@ -484,17 +392,18 @@ pub(crate) fn make_crypto_reader<'a>( } #[cfg(feature = "aes-crypto")] (Some(password), Some((aes_mode, vendor_version, _))) => CryptoReader::Aes { - reader: AesReader::new(reader, aes_mode, compressed_size).validate(password)?, + reader: AesReader::new(reader, aes_mode, data.compressed_size).validate(password)?, vendor_version, }, (Some(password), None) => { - if !using_data_descriptor { + let mut last_modified_time = data.last_modified_time; + if !data.using_data_descriptor { last_modified_time = None; } let validator = if let Some(last_modified_time) = last_modified_time { ZipCryptoValidator::InfoZipMsdosTime(last_modified_time.timepart()) } else { - ZipCryptoValidator::PkzipCrc32(crc32) + ZipCryptoValidator::PkzipCrc32(data.crc32) }; CryptoReader::ZipCrypto(ZipCryptoReader::new(reader, password).validate(validator)?) } @@ -511,68 +420,11 @@ pub(crate) fn make_reader( ) -> ZipResult { let ae2_encrypted = reader.is_ae2_encrypted(); - match compression_method { - CompressionMethod::Stored => Ok(ZipFileReader::Stored(Crc32Reader::new( - reader, - crc32, - ae2_encrypted, - ))), - #[cfg(feature = "_deflate-any")] - CompressionMethod::Deflated => { - let deflate_reader = DeflateDecoder::new(reader); - Ok(ZipFileReader::Deflated(Crc32Reader::new( - deflate_reader, - crc32, - ae2_encrypted, - ))) - } - #[cfg(feature = "deflate64")] - CompressionMethod::Deflate64 => { - let deflate64_reader = Deflate64Decoder::new(reader); - Ok(ZipFileReader::Deflate64(Crc32Reader::new( - deflate64_reader, - crc32, - ae2_encrypted, - ))) - } - #[cfg(feature = "bzip2")] - CompressionMethod::Bzip2 => { - let bzip2_reader = BzDecoder::new(reader); - Ok(ZipFileReader::Bzip2(Crc32Reader::new( - bzip2_reader, - crc32, - ae2_encrypted, - ))) - } - #[cfg(feature = "zstd")] - CompressionMethod::Zstd => { - let zstd_reader = ZstdDecoder::new(reader).unwrap(); - Ok(ZipFileReader::Zstd(Crc32Reader::new( - zstd_reader, - crc32, - ae2_encrypted, - ))) - } - #[cfg(feature = "lzma")] - CompressionMethod::Lzma => { - let reader = LzmaDecoder::new(reader); - Ok(ZipFileReader::Lzma(Crc32Reader::new( - Box::new(reader), - crc32, - ae2_encrypted, - ))) - } - #[cfg(feature = "xz")] - CompressionMethod::Xz => { - let reader = XzDecoder::new(reader); - Ok(ZipFileReader::Xz(Crc32Reader::new( - reader, - crc32, - ae2_encrypted, - ))) - } - _ => Err(UnsupportedArchive("Compression method not supported")), - } + Ok(ZipFileReader::Compressed(Box::new(Crc32Reader::new( + Decompressor::new(io::BufReader::new(reader), compression_method)?, + crc32, + ae2_encrypted, + )))) } #[derive(Debug)] @@ -1313,7 +1165,6 @@ impl ZipArchive { .get_index(file_number) .ok_or(ZipError::FileNotFound)?; Ok(ZipFile { - crypto_reader: None, reader: ZipFileReader::Raw(find_content(data, reader)?), data: Cow::Borrowed(data), }) @@ -1337,21 +1188,11 @@ impl ZipArchive { } let limit_reader = find_content(data, &mut self.reader)?; - let crypto_reader = make_crypto_reader( - data.compression_method, - data.crc32, - data.last_modified_time, - data.using_data_descriptor, - limit_reader, - password, - data.aes_mode, - #[cfg(feature = "aes-crypto")] - data.compressed_size, - )?; + let crypto_reader = make_crypto_reader(data, limit_reader, password, data.aes_mode)?; + Ok(ZipFile { - crypto_reader: Some(crypto_reader), - reader: ZipFileReader::NoReader, data: Cow::Borrowed(data), + reader: make_reader(data.compression_method, data.crc32, crypto_reader)?, }) } @@ -1646,21 +1487,8 @@ pub trait HasZipMetadata { /// Methods for retrieving information on zip files impl<'a> ZipFile<'a> { - fn get_reader(&mut self) -> ZipResult<&mut ZipFileReader<'a>> { - if let ZipFileReader::NoReader = self.reader { - let data = &self.data; - let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); - self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)?; - } - Ok(&mut self.reader) - } - - pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read { - if let ZipFileReader::NoReader = self.reader { - let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); - self.reader = ZipFileReader::Raw(crypto_reader.into_inner()) - } - &mut self.reader + pub(crate) fn take_raw_reader(&mut self) -> io::Result> { + std::mem::replace(&mut self.reader, ZipFileReader::NoReader).into_inner() } /// Get the version of the file @@ -1829,19 +1657,19 @@ impl<'a> HasZipMetadata for ZipFile<'a> { impl<'a> Read for ZipFile<'a> { fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.get_reader()?.read(buf) + self.reader.read(buf) } fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { - self.get_reader()?.read_exact(buf) + self.reader.read_exact(buf) } fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { - self.get_reader()?.read_to_end(buf) + self.reader.read_to_end(buf) } fn read_to_string(&mut self, buf: &mut String) -> io::Result { - self.get_reader()?.read_to_string(buf) + self.reader.read_to_string(buf) } } @@ -1873,19 +1701,9 @@ impl<'a> Drop for ZipFile<'a> { // In this case, we want to exhaust the reader so that the next file is accessible. if let Cow::Owned(_) = self.data { // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped. - match &mut self.reader { - ZipFileReader::NoReader => { - let innerreader = self.crypto_reader.take(); - let _ = copy( - &mut innerreader.expect("Invalid reader state").into_inner(), - &mut sink(), - ); - } - reader => { - let innerreader = std::mem::replace(reader, ZipFileReader::NoReader); - innerreader.drain(); - } - }; + if let Ok(mut inner) = self.take_raw_reader() { + let _ = copy(&mut inner, &mut sink()); + } } } } @@ -1934,21 +1752,10 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult LzmaDecoder { } } - pub fn finish(mut self) -> Result> { - copy(&mut self.compressed_reader, &mut self.stream)?; - self.stream.finish().map_err(Error::from) + pub fn into_inner(self) -> R { + self.compressed_reader } } -impl Read for LzmaDecoder { +impl Read for LzmaDecoder { fn read(&mut self, buf: &mut [u8]) -> Result { let mut bytes_read = self.stream.get_output_mut().unwrap().read(buf)?; while bytes_read < buf.len() { - let mut next_compressed = [0u8; COMPRESSED_BYTES_TO_BUFFER]; - let compressed_bytes_read = self.compressed_reader.read(&mut next_compressed)?; - if compressed_bytes_read == 0 { + let compressed_bytes = self.compressed_reader.fill_buf()?; + if compressed_bytes.is_empty() { break; } - self.stream - .write_all(&next_compressed[..compressed_bytes_read])?; + self.stream.write_all(compressed_bytes)?; bytes_read += self .stream .get_output_mut() diff --git a/src/read/xz.rs b/src/read/xz.rs index 478ae1024..991df62b0 100644 --- a/src/read/xz.rs +++ b/src/read/xz.rs @@ -2,12 +2,12 @@ use crc32fast::Hasher; use lzma_rs::decompress::raw::Lzma2Decoder; use std::{ collections::VecDeque, - io::{BufRead, BufReader, Error, Read, Result, Write}, + io::{BufRead, Error, Read, Result, Write}, }; #[derive(Debug)] -pub struct XzDecoder { - compressed_reader: BufReader, +pub struct XzDecoder { + compressed_reader: R, stream_size: usize, buf: VecDeque, check_size: usize, @@ -15,10 +15,10 @@ pub struct XzDecoder { flags: [u8; 2], } -impl XzDecoder { +impl XzDecoder { pub fn new(inner: R) -> Self { XzDecoder { - compressed_reader: BufReader::new(inner), + compressed_reader: inner, stream_size: 0, buf: VecDeque::new(), check_size: 0, @@ -83,7 +83,7 @@ fn error(s: &'static str) -> Result { Err(Error::new(std::io::ErrorKind::InvalidData, s)) } -fn get_multibyte(input: &mut R, hasher: &mut Hasher) -> Result { +fn get_multibyte(input: &mut R, hasher: &mut Hasher) -> Result { let mut result = 0; for i in 0..9 { let mut b = [0u8; 1]; @@ -98,7 +98,7 @@ fn get_multibyte(input: &mut R, hasher: &mut Hasher) -> Result { error("Invalid multi-byte encoding") } -impl Read for XzDecoder { +impl Read for XzDecoder { fn read(&mut self, buf: &mut [u8]) -> Result { if !self.buf.is_empty() { let len = std::cmp::min(buf.len(), self.buf.len()); @@ -263,8 +263,8 @@ impl Read for XzDecoder { } } -impl XzDecoder { +impl XzDecoder { pub fn into_inner(self) -> R { - self.compressed_reader.into_inner() + self.compressed_reader } } diff --git a/src/spec.rs b/src/spec.rs old mode 100644 new mode 100755 index 80cbdee0e..a75c4481e --- a/src/spec.rs +++ b/src/spec.rs @@ -87,8 +87,56 @@ impl ExtraFieldMagic { pub const ZIP64_EXTRA_FIELD_TAG: Self = Self::literal(0x0001); } -/// This should be equal to `0xFFFFFFFF`. +/// The file size at which a ZIP64 record becomes necessary. +/// +/// If a file larger than this threshold attempts to be written, compressed or uncompressed, and +/// [`FileOptions::large_file()`](crate::write::FileOptions) was not true, then [`ZipWriter`] will +/// raise an [`io::Error`] with [`io::ErrorKind::Other`]. +/// +/// If the zip file itself is larger than this value, then a zip64 central directory record will be +/// written to the end of the file. +/// +///``` +/// # fn main() -> Result<(), zip::result::ZipError> { +/// use std::io::{self, Cursor, prelude::*}; +/// use std::error::Error; +/// use zip::{ZipWriter, write::SimpleFileOptions}; +/// +/// let mut zip = ZipWriter::new(Cursor::new(Vec::new())); +/// // Writing an extremely large file for this test is faster without compression. +/// let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); +/// +/// let big_len: usize = (zip::ZIP64_BYTES_THR as usize) + 1; +/// let big_buf = vec![0u8; big_len]; +/// zip.start_file("zero.dat", options)?; +/// // This is too big! +/// let res = zip.write_all(&big_buf[..]).err().unwrap(); +/// assert_eq!(res.kind(), io::ErrorKind::Other); +/// let description = format!("{}", &res); +/// assert_eq!(description, "Large file option has not been set"); +/// // Attempting to write anything further to the same zip will still succeed, but the previous +/// // failing entry has been removed. +/// zip.start_file("one.dat", options)?; +/// let zip = zip.finish_into_readable()?; +/// let names: Vec<_> = zip.file_names().collect(); +/// assert_eq!(&names, &["one.dat"]); +/// +/// // Create a new zip output. +/// let mut zip = ZipWriter::new(Cursor::new(Vec::new())); +/// // This time, create a zip64 record for the file. +/// let options = options.large_file(true); +/// zip.start_file("zero.dat", options)?; +/// // This succeeds because we specified that it could be a large file. +/// assert!(zip.write_all(&big_buf[..]).is_ok()); +/// # Ok(()) +/// # } +///``` pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64; +/// The number of entries within a single zip necessary to allocate a zip64 central +/// directory record. +/// +/// If more than this number of entries is written to a [`ZipWriter`], then [`ZipWriter::finish()`] +/// will write out extra zip64 data to the end of the zip file. pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize; /// # Safety diff --git a/src/types.rs b/src/types.rs index c1b7adcec..31cb31f39 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1050,7 +1050,7 @@ pub enum AesVendorVersion { } /// AES variant used. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] #[cfg_attr(fuzzing, derive(arbitrary::Arbitrary))] #[repr(u8)] pub enum AesMode { diff --git a/src/write.rs b/src/write.rs index ae179fb3b..5d977cd9d 100644 --- a/src/write.rs +++ b/src/write.rs @@ -225,7 +225,7 @@ mod sealed { } } -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] pub(crate) enum EncryptWith<'k> { #[cfg(feature = "aes-crypto")] Aes { @@ -254,7 +254,7 @@ impl<'a> arbitrary::Arbitrary<'a> for EncryptWith<'a> { } /// Metadata for a file to be written -#[derive(Clone, Debug, Copy)] +#[derive(Clone, Debug, Copy, Eq, PartialEq)] pub struct FileOptions<'k, T: FileOptionExtension> { pub(crate) compression_method: CompressionMethod, pub(crate) compression_level: Option, @@ -272,7 +272,7 @@ pub type SimpleFileOptions = FileOptions<'static, ()>; /// Adds Extra Data and Central Extra Data. It does not implement copy. pub type FullFileOptions<'k> = FileOptions<'k, ExtendedFileOptions>; /// The Extension for Extra Data and Central Extra Data -#[derive(Clone, Default)] +#[derive(Clone, Default, Eq, PartialEq)] pub struct ExtendedFileOptions { extra_data: Arc>, central_extra_data: Arc>, @@ -1315,7 +1315,7 @@ impl ZipWriter { self.writing_to_file = true; self.writing_raw = true; - io::copy(file.get_raw_reader(), self)?; + io::copy(&mut file.take_raw_reader()?, self)?; Ok(()) }