Skip to content

Commit

Permalink
conflicts: encode unmaterializeable lines
Browse files Browse the repository at this point in the history
  • Loading branch information
ilyagr committed Oct 13, 2024
1 parent 1897f47 commit e035fef
Show file tree
Hide file tree
Showing 3 changed files with 239 additions and 7 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
* Error on `trunk()` revset resolution is now handled gracefully.
[#4616](https://github.com/martinvonz/jj/issues/4616)

* Conflicts involving non-empty files that do not end in a newline no longer
look broken when materialized.
[#3968](https://github.com/martinvonz/jj/issues/3968)

* Conflicts involving sides that themselves contain conflict markers can now be
materialized in a way that `jj` can parse correctly.
[#3975](https://github.com/martinvonz/jj/issues/3968)

## [0.22.0] - 2024-10-02

### Breaking changes
Expand Down Expand Up @@ -370,7 +378,7 @@ Thanks to the people who made this release happen!
* Vladimír Čunát (@vcunat)
* Vladimir (@0xdeafbeef)
* Yuya Nishihara (@yuja)

## [0.19.0] - 2024-07-03

### Breaking changes
Expand Down
120 changes: 119 additions & 1 deletion lib/src/conflicts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::io::Write;
use std::iter::zip;

use bstr::BString;
use bstr::ByteVec;
use futures::stream::BoxStream;
use futures::try_join;
use futures::Stream;
Expand Down Expand Up @@ -71,6 +72,12 @@ static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy<Regex> = once_cell::sync::La
.build()
.unwrap()
});
static DIRECTIVE_LINE_REGEX: once_cell::sync::Lazy<Regex> = once_cell::sync::Lazy::new(|| {
RegexBuilder::new(r"^\\JJ")
.multi_line(true)
.build()
.unwrap()
});

fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> std::io::Result<()> {
for hunk in hunks {
Expand Down Expand Up @@ -235,6 +242,11 @@ pub fn materialize_merge_result(
single_hunk: Merge<BString>,
output: &mut dyn Write,
) -> std::io::Result<()> {
let single_hunk = single_hunk.map_owned(|side| encode_unmaterializeable_lines(side.into()));

// From now on, we assume some invariants guaranteed by the encoding function
// above. See its docstring for details.

let merge_result = files::merge(&single_hunk);
match merge_result {
MergeResult::Resolved(content) => {
Expand Down Expand Up @@ -393,7 +405,7 @@ pub fn parse_merge_result(input: &[u8], num_sides: usize) -> Option<Merge<BStrin
}
}

Some(result)
Some(result.map_owned(decode_materialized_side))
}

fn parse_conflict_into_list_of_hunks(
Expand Down Expand Up @@ -588,3 +600,109 @@ pub async fn update_from_content(
};
Ok(new_file_ids)
}

const JJ_NO_NEWLINE_AT_EOF: &[u8] = b"\n\\JJ: No newline at the end of file\n";
const JJ_VERBATIM_LINE: &str = "\\JJ Verbatim Line:";
static JJ_VERBATIM_LINE_REPLACEMENT: once_cell::sync::Lazy<Vec<u8>> =
once_cell::sync::Lazy::new(|| format!("{JJ_VERBATIM_LINE}$0").into_bytes());
static VERBATIM_LINE_REGEX: once_cell::sync::Lazy<Regex> = once_cell::sync::Lazy::new(|| {
RegexBuilder::new(&format!("^{}", regex::escape(JJ_VERBATIM_LINE)))
.multi_line(true)
.build()
.unwrap()
});

/// Encode a side of a conflict to satisfy some invariants
///
/// - The result will not contain any conflict markers
/// - The result will contain 0 or more newline-terminated lines. In other
/// words, it is either empty or ends in a newline.
///
/// This transformation is reversible and it is hoped that the result is
/// human-readable. See the tests below for examples.
fn encode_unmaterializeable_lines(mut side: Vec<u8>) -> Vec<u8> {
replace_all_avoid_cloning(
&DIRECTIVE_LINE_REGEX,
&mut side,
JJ_VERBATIM_LINE_REPLACEMENT.as_slice(),
);
replace_all_avoid_cloning(
&CONFLICT_MARKER_REGEX,
&mut side,
JJ_VERBATIM_LINE_REPLACEMENT.as_slice(),
);
if !side.is_empty() && !side.ends_with(b"\n") {
side.push_str(JJ_NO_NEWLINE_AT_EOF);
}
side
}

/// Undo the transformation done by `encode_unmaterializeable_lines`
fn decode_materialized_side(mut side: BString) -> BString {
if side.ends_with(JJ_NO_NEWLINE_AT_EOF) {
let len = side.len();
side.truncate(len - JJ_NO_NEWLINE_AT_EOF.len());
}
replace_all_avoid_cloning(&VERBATIM_LINE_REGEX, &mut side, b"");
side
}

fn replace_all_avoid_cloning(regex: &Regex, side: &mut Vec<u8>, replacement: &[u8]) {
// TODO(ilyagr): The optimization to avoid the clone in the common case
// might be premature. See also
// https://github.com/rust-lang/regex/issues/676 for other options. I like
// my proposal there in theory, but it would take work that I haven't done
// to make sure it works, and ideally it would be a crate.
if regex.is_match(side) {
*side = regex.replace_all(side, replacement).to_vec();
}
}

#[cfg(test)]
mod test {
use bstr::BString;
use indoc::indoc;

use super::{decode_materialized_side, encode_unmaterializeable_lines};

#[test]
fn test_conflict_side_encoding_and_decoding() {
let initial_text: BString = indoc! {br"
<<<<<<<
blahblah"}
.into();

let encoded_text: BString = encode_unmaterializeable_lines(initial_text.to_vec()).into();
insta::assert_snapshot!(encoded_text, @r###"
\JJ Verbatim Line:<<<<<<<
blahblah
\JJ: No newline at the end of file
"###);
assert_eq!(decode_materialized_side(encoded_text.clone()), initial_text);

let doubly_encoded_text: BString =
encode_unmaterializeable_lines(encoded_text.clone().into()).into();
insta::assert_snapshot!(doubly_encoded_text, @r###"
\JJ Verbatim Line:\JJ Verbatim Line:<<<<<<<
blahblah
\JJ Verbatim Line:\JJ: No newline at the end of file
"###);
// The above should (and does) decode to a file with a newline at the end
assert_eq!(decode_materialized_side(doubly_encoded_text), encoded_text);
}

#[test]
fn test_conflict_side_encoding_and_decoding2() {
let initial_text = br"\JJ: No newline at the end of file";

let encoded_text: BString = encode_unmaterializeable_lines(initial_text.to_vec()).into();
insta::assert_snapshot!(encoded_text, @r###"
\JJ Verbatim Line:\JJ: No newline at the end of file
\JJ: No newline at the end of file
"###);
assert_eq!(
decode_materialized_side(encoded_text.clone()),
BString::from(initial_text)
);
}
}
116 changes: 111 additions & 5 deletions lib/tests/test_conflicts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,99 @@ fn test_materialize_parse_roundtrip() {
"###);
}

// Important TODO (maybe not for this file): add a test of what happens when the
// user removes conflict markers and runs `jj diff`

#[test]
fn test_materialize_parse_roundtrip_tricky() {
let test_repo = TestRepo::init();
let store = test_repo.repo.store();

let path = RepoPath::from_internal_string("file");
let base_id = testutils::write_file(
store,
path,
indoc! {"
\\JJ Verbatim Line: fake verbatim line
line 1
line 2 <<<<<<<
<<<<<<< line 3
line 4
line 5"},
);
let left_id = testutils::write_file(
store,
path,
indoc! {"
\\JJ Verbatim Line: fake verbatim line
line 1 left
line 2 left
<<<<<<<< line 3
line 4
line 5 left"},
);
let right_id = testutils::write_file(
store,
path,
indoc! {"
\\JJ Verbatim Line: fake verbatim line
line 1 right
line 2
line 3
line 4 right
line 5 right
"},
);

let conflict = Merge::from_removes_adds(
vec![Some(base_id.clone())],
vec![Some(left_id.clone()), Some(right_id.clone())],
);
let materialized = materialize_conflict_string(store, path, &conflict);
insta::assert_snapshot!(
materialized,
@r###"
\JJ Verbatim Line:\JJ Verbatim Line: fake verbatim line
<<<<<<< Conflict 1 of 1
%%%%%%% Changes from base to side #1
-line 1
-line 2 <<<<<<<
-\JJ Verbatim Line:<<<<<<< line 3
+line 1 left
+line 2 left
+<<<<<<<< line 3
line 4
-line 5
+line 5 left
\JJ: No newline at the end of file
+++++++ Contents of side #2
line 1 right
line 2
line 3
line 4 right
line 5 right
>>>>>>> Conflict 1 of 1 ends
"###
);

// The first add should always be from the left side
insta::assert_debug_snapshot!(
parse_merge_result(materialized.as_bytes(), conflict.num_sides()),
@r###"
Some(
Conflicted(
[
"\\JJ Verbatim Line: fake verbatim line\nline 1 left\nline 2 left\n<<<<<<<< line 3\nline 4\nline 5 left",
"\\JJ Verbatim Line: fake verbatim line\nline 1\nline 2 <<<<<<<\n<<<<<<< line 3\nline 4\nline 5",
"\\JJ Verbatim Line: fake verbatim line\nline 1 right\nline 2\nline 3\nline 4 right\nline 5 right\n",
],
),
)
"###);

// TODO: update_from_conflict?
}

#[test]
fn test_materialize_conflict_no_newlines_at_eof() {
let test_repo = TestRepo::init();
Expand All @@ -345,16 +438,29 @@ fn test_materialize_conflict_no_newlines_at_eof() {
insta::assert_snapshot!(materialized,
@r###"
<<<<<<< Conflict 1 of 1
%%%%%%% Changes from base to side #1
-base+++++++ Contents of side #2
right>>>>>>> Conflict 1 of 1 ends
+++++++ Contents of side #1
%%%%%%% Changes from base to side #2
-base
+right
\JJ: No newline at the end of file
>>>>>>> Conflict 1 of 1 ends
"###
);
// BUG(#3968): These conflict markers cannot be parsed
// These conflict markers can be parsed (issue #3968)
insta::assert_debug_snapshot!(parse_merge_result(
materialized.as_bytes(),
conflict.num_sides()
),@"None");
),@r###"
Some(
Conflicted(
[
"",
"base",
"right",
],
),
)
"###);
}

#[test]
Expand Down

0 comments on commit e035fef

Please sign in to comment.