diff --git a/Cargo.toml b/Cargo.toml index c54aee0..805094b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,12 @@ [package] name = "whitespace-sifter" -version = "0.1.1" +version = "0.2.0" edition = "2021" authors = ["JumperBot_"] description = "Sift duplicate whitespaces away!" license = "MIT" -keywords = ["whitespace", "duplicate", "sifter", "strainer", "string"] -categories = ["compression", "development-tools", "text-processing", "value-formatting"] +keywords = ["duplicate", "sifter", "string", "text", "whitespace"] +categories = ["compression", "development-tools", "text-processing", "value-formatting", "visualization"] repository = "https://github.com/JumperBot/whitespace-sifter/" [lib] @@ -15,3 +15,7 @@ test = true doctest = true doc = true crate-type = ["lib"] + +[features] +default = ["preserve_newline"] +preserve_newline = [] diff --git a/README.md b/README.md index f23261c..1fade19 100644 --- a/README.md +++ b/README.md @@ -10,11 +10,23 @@ Other than that, it naturally removes the whitespaces at the start and end of th ```rust use whitespace_sifter::*; -fn main() { - // This prints `1.. 2.. 3.. 4.. 5..`. - println!("{}", sift("1.. \n2.. \n\n3.. \n\n\n4.. \n\n\n\n5.. \n\n\n\n\n")); +// This prints `1.. 2.. 3.. 4.. 5..`. +println!("{}", sift( + "1.. \n2.. \n\n3.. \n\n\n4.. \n\n\n\n5.. \n\n\n\n\n" +)); - // This prints `A..\r\nB..\r\nC..\r\nD..\r\nE..`. - println!("{}", sift_with_carriage_return("A..\r\n B..\r\n\r\n C..\r\n\r\n\r\n D..\r\n\r\n\r\n\r\n E..\r\n\r\n\r\n\r\n\r\n")); -} +// This prints `A..\r\nB..\r\nC..\r\nD..\r\nE..`. +println!("{}", sift_with_carriage_return( + "A..\r\n B..\r\n\r\n C..\r\n\r\n\r\n D..\r\n\r\n\r\n\r\n E..\r\n\r\n\r\n\r\n\r\n" +)); + +// This prints `1..\n2..\n3..\n4..\n5..`. +println!("{}", preserve_newline::sift( + "1.. \n2.. \n\n3.. \n\n\n4.. \n\n\n\n5.. \n\n\n\n\n" +)); + +// This prints `A..\r\nB..\r\nC..\r\nD..\r\nE..`. +println!("{}", preserve_newline::sift_with_carriage_return( + "A.. \r\n B.. \r\n\r\n C.. \r\n\r\n\r\n D.. \r\n\r\n\r\n\r\n E.. \r\n\r\n\r\n\r\n\r\n" +)); ``` diff --git a/src/whitespace_sifter/lib.rs b/src/whitespace_sifter/lib.rs index b4ad412..7450b15 100644 --- a/src/whitespace_sifter/lib.rs +++ b/src/whitespace_sifter/lib.rs @@ -5,14 +5,28 @@ //! //! # Examples //! -//! ``` +//! ```rust //! use whitespace_sifter::*; //! //! // This prints `1.. 2.. 3.. 4.. 5..`. -//! println!("{}", sift("1.. \n2.. \n\n3.. \n\n\n4.. \n\n\n\n5.. \n\n\n\n\n")); +//! println!("{}", sift( +//! "1.. \n2.. \n\n3.. \n\n\n4.. \n\n\n\n5.. \n\n\n\n\n" +//! )); +//! +//! // This prints `A..\r\nB..\r\nC..\r\nD..\r\nE..`. +//! println!("{}", sift_with_carriage_return( +//! "A..\r\n B..\r\n\r\n C..\r\n\r\n\r\n D..\r\n\r\n\r\n\r\n E..\r\n\r\n\r\n\r\n\r\n" +//! )); +//! +//! // This prints `1..\n2..\n3..\n4..\n5..`. +//! println!("{}", preserve_newline::sift( +//! "1.. \n2.. \n\n3.. \n\n\n4.. \n\n\n\n5.. \n\n\n\n\n" +//! )); //! //! // This prints `A..\r\nB..\r\nC..\r\nD..\r\nE..`. -//! println!("{}", sift_with_carriage_return("A..\r\n B..\r\n\r\n C..\r\n\r\n\r\n D..\r\n\r\n\r\n\r\n E..\r\n\r\n\r\n\r\n\r\n")); +//! println!("{}", preserve_newline::sift_with_carriage_return( +//! "A.. \r\n B.. \r\n\r\n C.. \r\n\r\n\r\n D.. \r\n\r\n\r\n\r\n E.. \r\n\r\n\r\n\r\n\r\n" +//! )); //! ``` /// This remove duplicate [whitespaces](https://doc.rust-lang.org/reference/whitespace.html) within the `&str`. @@ -64,9 +78,77 @@ pub fn sift_with_carriage_return(input: &str) -> String { out } +#[cfg(feature = "preserve_newline")] +/// Sift through all the lines in the `&str` while preserving deduplicated newlines. +/// This is only available if the `preserve_newline` feature is explicitly turned on. (default) +pub mod preserve_newline { + /// This remove duplicate [whitespaces](https://doc.rust-lang.org/reference/whitespace.html) within the `&str`. + /// + /// If the `&str` contains carriage-returns do not use this. + /// Use [`whitespace-sifter::sift_with_carriage_return(...)`](./fn.sift_with_carriage_return.html) instead. + pub fn sift(input: &str) -> String { + let mut temp: Vec<&str>=input.split('\n').collect(); + temp.retain(|&x| !x.trim().is_empty()); + let mut output: String=String::new(); + for x in temp{ + let mut buf: &str = x; + let mut out: String = String::new(); + while !buf.is_empty() { + out.push_str(&buf[..1]); + buf = &buf[1..]; + if buf.is_empty() { + break; + } + let next: &str = &buf[..1]; + if next.trim().is_empty() { + out.push_str(next); + } + buf = buf.trim(); + } + if !out.trim().is_empty(){ + output.push_str(&out.trim()); + output.push('\n'); + } + } + output.trim().to_string() + } + + /// This remove duplicate [whitespaces](https://doc.rust-lang.org/reference/whitespace.html) within the `&str` that contains carriage-returns. + /// + /// This treats carriage-returns as just one `char` in the `&str`. + /// If the `&str` does not contain carriage-returns do not use this. + /// Use [`whitespace-sifter::sift(...)`](./fn.sift.html) instead. + pub fn sift_with_carriage_return(input: &str) -> String { + let mut temp: Vec<&str>=input.split("\r\n").collect(); + temp.retain(|&x| !x.trim().is_empty()); + let mut output: String=String::new(); + for x in temp{ + let mut buf: &str = x; + let mut out: String = String::new(); + while !buf.is_empty() { + out.push_str(&buf[..1]); + buf = &buf[1..]; + if buf.is_empty() { + break; + } + let next: &str = &buf[..1]; + if next.trim().is_empty() { + out.push_str(next); + } + buf = buf.trim(); + } + if !out.trim().is_empty(){ + output.push_str(&out.trim()); + output.push_str("\r\n"); + } + } + output.trim().to_string() + } +} + #[cfg(test)] mod tests { - use super::*; + use super::{sift, sift_with_carriage_return}; #[test] fn test_sift() { @@ -95,4 +177,32 @@ mod tests { "This is a sentence...\r\nWith some duplicate...\r\nWhitespaces." ); } + + #[test] + fn test_sift_preserved() { + let input: &str = &format!( + "{}\n\n{}\n\n{}\n\n\n", + "This. \n\nis. \n\na. \n\nsentence... \n\n", + "With. \n\nsome. \n\nduplicate... \n\n", + "Whitespaces. \n\n" + ); + assert_eq!( + &super::preserve_newline::sift(input), + "This.\nis.\na.\nsentence...\nWith.\nsome.\nduplicate...\nWhitespaces." + ); + } + + #[test] + fn test_sift_with_carriage_return_preserved() { + let input: &str = &format!( + "{}\r\n\r\n{}\r\n\r\n{}\r\n\r\n\r\n", + "This. \r\n\r\nis. \r\n\r\na. \r\n\r\nsentence... \r\n\r\n", + "With. \r\n\r\nsome. \r\n\r\nduplicate... \r\n\r\n", + "Whitespaces. \r\n\r\n" + ); + assert_eq!( + &super::preserve_newline::sift_with_carriage_return(input), + "This.\r\nis.\r\na.\r\nsentence...\r\nWith.\r\nsome.\r\nduplicate...\r\nWhitespaces." + ); + } }