Skip to content

Commit

Permalink
Add --greedy-delimiter
Browse files Browse the repository at this point in the history
  • Loading branch information
riquito committed May 23, 2022
1 parent 19f0654 commit 9e3d690
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 20 deletions.
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ USAGE:
tuc [FLAGS] [OPTIONS]
FLAGS:
-g, --greedy-delimiter Split fields using a greedy delimiter
-p, --compress-delimiter Collapse any sequence of delimiters
-s, --only-delimited Do not print lines not containing delimiters
-V, --version Prints version information
Expand Down Expand Up @@ -73,10 +74,22 @@ Notes:

## Examples

```sh
# Cut and rearrange fields
echo "foo bar baz" | tuc -d ' ' -f 3,2,1
bazbarfoo
```

```sh
# Keep ranges
echo "foo bar baz" | tuc -d ' ' -f 2:
bar baz
```

```sh
# Cut using a greedy delimiter
echo "foo bar baz" | tuc -d ' ' -f 2:
bar baz
echo "foo bar" | tuc -g -d ' ' -f 1,2
foobar
```

```sh
Expand Down
2 changes: 2 additions & 0 deletions src/bin/tuc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ USAGE:
tuc [FLAGS] [OPTIONS]
FLAGS:
-g, --greedy-delimiter Split fields using a greedy delimiter
-p, --compress-delimiter Collapse any sequence of delimiters
-s, --only-delimited Do not print lines not containing delimiters
-V, --version Prints version information
Expand Down Expand Up @@ -101,6 +102,7 @@ fn parse_args() -> Result<Opt, pico_args::Error> {
let args = Opt {
complement: pargs.contains(["-m", "--complement"]),
only_delimited: pargs.contains(["-s", "--only-delimited"]),
greedy_delimiter: pargs.contains(["-g", "--greedy-delimiter"]),
compress_delimiter: pargs.contains(["-p", "--compress-delimiter"]),
version: pargs.contains(["-V", "--version"]),
join: pargs.contains(["-j", "--join"]),
Expand Down
102 changes: 84 additions & 18 deletions src/cut_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,31 @@ fn complement_std_range(parts_length: usize, r: &Range<usize>) -> Vec<Range<usiz
}
}

/*
* Split a string into parts and build a vector of ranges that match those parts.
*
* `buffer` - empty vector that will be filled with ranges
* `line` - the string to split
* `delimiter` - what to search to split the string
*/
fn build_ranges_vec(buffer: &mut Vec<Range<usize>>, line: &str, delimiter: &str) {
// Split a string into parts and build a vector of ranges that match those parts.
//
// `buffer` - vector that will be filled with ranges
// `line` - the string to split
// `delimiter` - what to search to split the string
// `greedy` - whether to consider consecutive delimiters as one or not
fn build_ranges_vec(buffer: &mut Vec<Range<usize>>, line: &str, delimiter: &str, greedy: bool) {
buffer.clear();

if line.is_empty() {
return;
}

let delimiter_length = delimiter.len();
let mut next_part_start = 0;

for mat in line.match_indices(&delimiter) {
buffer.push(Range {
start: next_part_start,
end: mat.0,
});
next_part_start = mat.0 + delimiter_length;
for (idx, _) in line.match_indices(&delimiter) {
if !(greedy && idx == next_part_start) {
buffer.push(Range {
start: next_part_start,
end: idx,
});
}

next_part_start = idx + delimiter_length;
}

buffer.push(Range {
Expand Down Expand Up @@ -91,17 +99,15 @@ pub fn cut_str(
return Ok(());
}

bounds_as_ranges.clear();
build_ranges_vec(bounds_as_ranges, line, &opt.delimiter);
build_ranges_vec(bounds_as_ranges, line, &opt.delimiter, opt.greedy_delimiter);

if opt.compress_delimiter
&& (opt.bounds_type == BoundsType::Fields || opt.bounds_type == BoundsType::Lines)
{
compressed_line_buf.clear();
compress_delimiter(bounds_as_ranges, line, &opt.delimiter, compressed_line_buf);
line = compressed_line_buf;
bounds_as_ranges.clear();
build_ranges_vec(bounds_as_ranges, line, &opt.delimiter);
build_ranges_vec(bounds_as_ranges, line, &opt.delimiter, opt.greedy_delimiter);
}

if opt.bounds_type == BoundsType::Characters && bounds_as_ranges.len() > 2 {
Expand Down Expand Up @@ -231,4 +237,64 @@ mod tests {
assert_eq!(complement_std_range(2, &(0..1)), vec![1..2]);
assert_eq!(complement_std_range(2, &(1..2)), vec![0..1]);
}

#[test]
fn test_build_ranges_vec() {
let mut v_range: Vec<Range<usize>> = Vec::new();

// non greedy

v_range.clear();
build_ranges_vec(&mut v_range, "", "-", false);
assert_eq!(v_range, vec![]);

v_range.clear();
build_ranges_vec(&mut v_range, "a", "-", false);
assert_eq!(v_range, vec![Range { start: 0, end: 1 }]);

v_range.clear();
build_ranges_vec(&mut v_range, "a-b", "-", false);
assert_eq!(
v_range,
vec![Range { start: 0, end: 1 }, Range { start: 2, end: 3 }]
);

v_range.clear();
build_ranges_vec(&mut v_range, "-a-", "-", false);
assert_eq!(
v_range,
vec![
Range { start: 0, end: 0 },
Range { start: 1, end: 2 },
Range { start: 3, end: 3 }
]
);

v_range.clear();
build_ranges_vec(&mut v_range, "a--", "-", false);
assert_eq!(
v_range,
vec![
Range { start: 0, end: 1 },
Range { start: 2, end: 2 },
Range { start: 3, end: 3 }
]
);

// greedy

v_range.clear();
build_ranges_vec(&mut v_range, "a--b", "-", true);
assert_eq!(
v_range,
vec![Range { start: 0, end: 1 }, Range { start: 3, end: 4 }]
);

v_range.clear();
build_ranges_vec(&mut v_range, "a--", "-", true);
assert_eq!(
v_range,
vec![Range { start: 0, end: 1 }, Range { start: 3, end: 3 }]
);
}
}
1 change: 1 addition & 0 deletions src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pub struct Opt {
pub bounds: UserBoundsList,
pub bounds_type: BoundsType,
pub only_delimited: bool,
pub greedy_delimiter: bool,
pub compress_delimiter: bool,
pub replace_delimiter: Option<String>,
pub trim: Option<Trim>,
Expand Down
12 changes: 12 additions & 0 deletions tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,15 @@ fn it_format_fields() {
.success()
.stdout("Say hello to our world.\nJust {saying}\n");
}

#[test]
fn it_cuts_using_a_greedy_delimiter() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();

let assert = cmd
.args(&["-g", "-d", "-", "-f", "1,2"])
.write_stdin("a---b")
.assert();

assert.success().stdout("ab\n");
}

0 comments on commit 9e3d690

Please sign in to comment.