From e035fef1563fd6849439838d156a1c40c66c8de5 Mon Sep 17 00:00:00 2001 From: Ilya Grigoriev Date: Thu, 11 Jul 2024 16:53:19 -0700 Subject: [PATCH] conflicts: encode unmaterializeable lines Fixes #3968 Fixes #3975 --- CHANGELOG.md | 10 ++- lib/src/conflicts.rs | 120 +++++++++++++++++++++++++++++++++++- lib/tests/test_conflicts.rs | 116 ++++++++++++++++++++++++++++++++-- 3 files changed, 239 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bd458b711..1a533e5e2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,14 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). * Error on `trunk()` revset resolution is now handled gracefully. [#4616](https://github.com/martinvonz/jj/issues/4616) +* Conflicts involving non-empty files that do not end in a newline no longer + look broken when materialized. + [#3968](https://github.com/martinvonz/jj/issues/3968) + +* Conflicts involving sides that themselves contain conflict markers can now be + materialized in a way that `jj` can parse correctly. + [#3975](https://github.com/martinvonz/jj/issues/3968) + ## [0.22.0] - 2024-10-02 ### Breaking changes @@ -370,7 +378,7 @@ Thanks to the people who made this release happen! * Vladimír Čunát (@vcunat) * Vladimir (@0xdeafbeef) * Yuya Nishihara (@yuja) - + ## [0.19.0] - 2024-07-03 ### Breaking changes diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 71306ac381..a2154653d4 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -19,6 +19,7 @@ use std::io::Write; use std::iter::zip; use bstr::BString; +use bstr::ByteVec; use futures::stream::BoxStream; use futures::try_join; use futures::Stream; @@ -71,6 +72,12 @@ static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy = once_cell::sync::La .build() .unwrap() }); +static DIRECTIVE_LINE_REGEX: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { + RegexBuilder::new(r"^\\JJ") + .multi_line(true) + .build() + .unwrap() +}); fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> std::io::Result<()> { for hunk in hunks { @@ -235,6 +242,11 @@ pub fn materialize_merge_result( single_hunk: Merge, output: &mut dyn Write, ) -> std::io::Result<()> { + let single_hunk = single_hunk.map_owned(|side| encode_unmaterializeable_lines(side.into())); + + // From now on, we assume some invariants guaranteed by the encoding function + // above. See its docstring for details. + let merge_result = files::merge(&single_hunk); match merge_result { MergeResult::Resolved(content) => { @@ -393,7 +405,7 @@ pub fn parse_merge_result(input: &[u8], num_sides: usize) -> Option> = + once_cell::sync::Lazy::new(|| format!("{JJ_VERBATIM_LINE}$0").into_bytes()); +static VERBATIM_LINE_REGEX: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { + RegexBuilder::new(&format!("^{}", regex::escape(JJ_VERBATIM_LINE))) + .multi_line(true) + .build() + .unwrap() +}); + +/// Encode a side of a conflict to satisfy some invariants +/// +/// - The result will not contain any conflict markers +/// - The result will contain 0 or more newline-terminated lines. In other +/// words, it is either empty or ends in a newline. +/// +/// This transformation is reversible and it is hoped that the result is +/// human-readable. See the tests below for examples. +fn encode_unmaterializeable_lines(mut side: Vec) -> Vec { + replace_all_avoid_cloning( + &DIRECTIVE_LINE_REGEX, + &mut side, + JJ_VERBATIM_LINE_REPLACEMENT.as_slice(), + ); + replace_all_avoid_cloning( + &CONFLICT_MARKER_REGEX, + &mut side, + JJ_VERBATIM_LINE_REPLACEMENT.as_slice(), + ); + if !side.is_empty() && !side.ends_with(b"\n") { + side.push_str(JJ_NO_NEWLINE_AT_EOF); + } + side +} + +/// Undo the transformation done by `encode_unmaterializeable_lines` +fn decode_materialized_side(mut side: BString) -> BString { + if side.ends_with(JJ_NO_NEWLINE_AT_EOF) { + let len = side.len(); + side.truncate(len - JJ_NO_NEWLINE_AT_EOF.len()); + } + replace_all_avoid_cloning(&VERBATIM_LINE_REGEX, &mut side, b""); + side +} + +fn replace_all_avoid_cloning(regex: &Regex, side: &mut Vec, replacement: &[u8]) { + // TODO(ilyagr): The optimization to avoid the clone in the common case + // might be premature. See also + // https://github.com/rust-lang/regex/issues/676 for other options. I like + // my proposal there in theory, but it would take work that I haven't done + // to make sure it works, and ideally it would be a crate. + if regex.is_match(side) { + *side = regex.replace_all(side, replacement).to_vec(); + } +} + +#[cfg(test)] +mod test { + use bstr::BString; + use indoc::indoc; + + use super::{decode_materialized_side, encode_unmaterializeable_lines}; + + #[test] + fn test_conflict_side_encoding_and_decoding() { + let initial_text: BString = indoc! {br" + <<<<<<< + blahblah"} + .into(); + + let encoded_text: BString = encode_unmaterializeable_lines(initial_text.to_vec()).into(); + insta::assert_snapshot!(encoded_text, @r###" + \JJ Verbatim Line:<<<<<<< + blahblah + \JJ: No newline at the end of file + "###); + assert_eq!(decode_materialized_side(encoded_text.clone()), initial_text); + + let doubly_encoded_text: BString = + encode_unmaterializeable_lines(encoded_text.clone().into()).into(); + insta::assert_snapshot!(doubly_encoded_text, @r###" + \JJ Verbatim Line:\JJ Verbatim Line:<<<<<<< + blahblah + \JJ Verbatim Line:\JJ: No newline at the end of file + "###); + // The above should (and does) decode to a file with a newline at the end + assert_eq!(decode_materialized_side(doubly_encoded_text), encoded_text); + } + + #[test] + fn test_conflict_side_encoding_and_decoding2() { + let initial_text = br"\JJ: No newline at the end of file"; + + let encoded_text: BString = encode_unmaterializeable_lines(initial_text.to_vec()).into(); + insta::assert_snapshot!(encoded_text, @r###" + \JJ Verbatim Line:\JJ: No newline at the end of file + \JJ: No newline at the end of file + "###); + assert_eq!( + decode_materialized_side(encoded_text.clone()), + BString::from(initial_text) + ); + } +} diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index 71d7018702..9c2b9c74c1 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -327,6 +327,99 @@ fn test_materialize_parse_roundtrip() { "###); } +// Important TODO (maybe not for this file): add a test of what happens when the +// user removes conflict markers and runs `jj diff` + +#[test] +fn test_materialize_parse_roundtrip_tricky() { + let test_repo = TestRepo::init(); + let store = test_repo.repo.store(); + + let path = RepoPath::from_internal_string("file"); + let base_id = testutils::write_file( + store, + path, + indoc! {" + \\JJ Verbatim Line: fake verbatim line + line 1 + line 2 <<<<<<< + <<<<<<< line 3 + line 4 + line 5"}, + ); + let left_id = testutils::write_file( + store, + path, + indoc! {" + \\JJ Verbatim Line: fake verbatim line + line 1 left + line 2 left + <<<<<<<< line 3 + line 4 + line 5 left"}, + ); + let right_id = testutils::write_file( + store, + path, + indoc! {" + \\JJ Verbatim Line: fake verbatim line + line 1 right + line 2 + line 3 + line 4 right + line 5 right + "}, + ); + + let conflict = Merge::from_removes_adds( + vec![Some(base_id.clone())], + vec![Some(left_id.clone()), Some(right_id.clone())], + ); + let materialized = materialize_conflict_string(store, path, &conflict); + insta::assert_snapshot!( + materialized, + @r###" + \JJ Verbatim Line:\JJ Verbatim Line: fake verbatim line + <<<<<<< Conflict 1 of 1 + %%%%%%% Changes from base to side #1 + -line 1 + -line 2 <<<<<<< + -\JJ Verbatim Line:<<<<<<< line 3 + +line 1 left + +line 2 left + +<<<<<<<< line 3 + line 4 + -line 5 + +line 5 left + \JJ: No newline at the end of file + +++++++ Contents of side #2 + line 1 right + line 2 + line 3 + line 4 right + line 5 right + >>>>>>> Conflict 1 of 1 ends + "### + ); + + // The first add should always be from the left side + insta::assert_debug_snapshot!( + parse_merge_result(materialized.as_bytes(), conflict.num_sides()), + @r###" + Some( + Conflicted( + [ + "\\JJ Verbatim Line: fake verbatim line\nline 1 left\nline 2 left\n<<<<<<<< line 3\nline 4\nline 5 left", + "\\JJ Verbatim Line: fake verbatim line\nline 1\nline 2 <<<<<<<\n<<<<<<< line 3\nline 4\nline 5", + "\\JJ Verbatim Line: fake verbatim line\nline 1 right\nline 2\nline 3\nline 4 right\nline 5 right\n", + ], + ), + ) + "###); + + // TODO: update_from_conflict? +} + #[test] fn test_materialize_conflict_no_newlines_at_eof() { let test_repo = TestRepo::init(); @@ -345,16 +438,29 @@ fn test_materialize_conflict_no_newlines_at_eof() { insta::assert_snapshot!(materialized, @r###" <<<<<<< Conflict 1 of 1 - %%%%%%% Changes from base to side #1 - -base+++++++ Contents of side #2 - right>>>>>>> Conflict 1 of 1 ends + +++++++ Contents of side #1 + %%%%%%% Changes from base to side #2 + -base + +right + \JJ: No newline at the end of file + >>>>>>> Conflict 1 of 1 ends "### ); - // BUG(#3968): These conflict markers cannot be parsed + // These conflict markers can be parsed (issue #3968) insta::assert_debug_snapshot!(parse_merge_result( materialized.as_bytes(), conflict.num_sides() - ),@"None"); + ),@r###" + Some( + Conflicted( + [ + "", + "base", + "right", + ], + ), + ) + "###); } #[test]