Skip to content

Commit

Permalink
conflicts: allow missing newlines at end of file
Browse files Browse the repository at this point in the history
Currently, conflict markers are materialized in a format that cannot be
parsed if a conflict appears at the end of the file, and there is a
non-empty line which doesn't end with a newline character. To fix this
issue, we can add an extra newline to every term of the conflict when
this occurs and then ignore the added newlines when parsing the
conflict. To ensure unambiguous parsing, we can add a "[noeol]" tag to
the start conflict marker (`<<<<<<<`) when this occurs.

I think this is a good solution to the problem because it should remain
understandable to users even if they aren't familiar with file
encodings. Many editors represent newlines at the end of files by
showing an empty line at the end of the editor, so it should be
intuitive that a term ending with an empty line represents a newline,
while a term that doesn't end with an empty line represents the lack of
a newline. Also, adding a newline to every term of the hunk is easier
than keeping track of which terms have added newlines and which don't,
so it makes materialization/parsing easier.

For instance, a conflict with terms `["left\n", "base", "base\nright"]`
would look like:

```
<<<<<<< Conflict 1 of 1 [noeol]
+++++++ Contents of side jj-vcs#1
left

%%%%%%% Changes from base to side jj-vcs#2
 base
+right
>>>>>>>
```
  • Loading branch information
scott2000 committed Jan 16, 2025
1 parent 8a6221b commit 5e04c1e
Show file tree
Hide file tree
Showing 3 changed files with 302 additions and 29 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
* Conditional configuration now applies when initializing new repository.
[#5144](https://github.com/jj-vcs/jj/issues/5144)

* Conflicts at the end of files which don't end with a newline character are
now materialized in a way that can be parsed correctly.
[#3968](https://github.com/jj-vcs/jj/issues/3968)

## [0.25.0] - 2025-01-01

### Release highlights
Expand Down
105 changes: 85 additions & 20 deletions lib/src/conflicts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use std::iter::zip;

use bstr::BString;
use bstr::ByteSlice;
use bstr::ByteVec;
use futures::stream::BoxStream;
use futures::try_join;
use futures::Stream;
Expand Down Expand Up @@ -58,6 +59,11 @@ pub const MIN_CONFLICT_MARKER_LEN: usize = 7;
/// existing markers.
const CONFLICT_MARKER_LEN_INCREMENT: usize = 4;

/// This marker will be added to the conflict start marker if the conflict
/// contained a non-empty term that didn't end with a newline. If any such terms
/// appear, we add a newline to every term to ensure no information is lost.
const NO_EOL_MARKER: &str = "[noeol]";

fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
for hunk in hunks {
match hunk.kind {
Expand Down Expand Up @@ -271,22 +277,41 @@ impl ConflictMarkerLineChar {
struct ConflictMarkerLine {
char: ConflictMarkerLineChar,
len: usize,
/// If true, the conflict marker line ends with the NO_EOL_MARKER,
/// indicating that a term in the hunk was missing a newline at the end of
/// the file.
no_eol: bool,
}

impl ConflictMarkerLine {
fn new(char: ConflictMarkerLineChar, len: usize) -> Self {
Self { char, len }
Self {
char,
len,
no_eol: false,
}
}

fn set_no_eol(mut self, no_eol: bool) -> Self {
self.no_eol = no_eol;
self
}

/// Write a conflict marker to an output file.
fn write(&self, output: &mut dyn Write, suffix_text: &str) -> io::Result<()> {
let conflict_marker = BString::new(vec![self.char.to_byte(); self.len]);
let mut conflict_marker = BString::new(vec![self.char.to_byte(); self.len]);

if suffix_text.is_empty() {
writeln!(output, "{conflict_marker}")
} else {
writeln!(output, "{conflict_marker} {suffix_text}")
if !suffix_text.is_empty() {
conflict_marker.push_byte(b' ');
conflict_marker.push_str(suffix_text);
}

if self.no_eol {
conflict_marker.push_byte(b' ');
conflict_marker.push_str(NO_EOL_MARKER);
}

writeln!(output, "{conflict_marker}")
}

/// Parse a conflict marker from a line of a file. The conflict marker may
Expand All @@ -303,7 +328,11 @@ impl ConflictMarkerLine {
}
}

Some(ConflictMarkerLine { char, len })
let no_eol = line[len..]
.trim_end_with(|ch| ch.is_ascii_whitespace())
.ends_with_str(NO_EOL_MARKER);

Some(ConflictMarkerLine { char, len, no_eol })
}

/// Parse a conflict marker, expecting it to be at least a certain length.
Expand Down Expand Up @@ -339,8 +368,8 @@ pub fn materialize_merge_result<T: AsRef<[u8]>>(
output: &mut dyn Write,
) -> io::Result<()> {
let merge_result = files::merge(single_hunk);
match &merge_result {
MergeResult::Resolved(content) => output.write_all(content),
match merge_result {
MergeResult::Resolved(content) => output.write_all(&content),
MergeResult::Conflict(hunks) => {
let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk);
materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output)
Expand All @@ -355,8 +384,8 @@ pub fn materialize_merge_result_with_marker_len<T: AsRef<[u8]>>(
output: &mut dyn Write,
) -> io::Result<()> {
let merge_result = files::merge(single_hunk);
match &merge_result {
MergeResult::Resolved(content) => output.write_all(content),
match merge_result {
MergeResult::Resolved(content) => output.write_all(&content),
MergeResult::Conflict(hunks) => {
materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output)
}
Expand All @@ -374,7 +403,7 @@ pub fn materialize_merge_result_to_bytes<T: AsRef<[u8]>>(
let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk);
let mut output = Vec::new();
materialize_conflict_hunks(
&hunks,
hunks,
conflict_marker_style,
conflict_marker_len,
&mut output,
Expand All @@ -396,7 +425,7 @@ pub fn materialize_merge_result_to_bytes_with_marker_len<T: AsRef<[u8]>>(
MergeResult::Conflict(hunks) => {
let mut output = Vec::new();
materialize_conflict_hunks(
&hunks,
hunks,
conflict_marker_style,
conflict_marker_len,
&mut output,
Expand All @@ -408,7 +437,7 @@ pub fn materialize_merge_result_to_bytes_with_marker_len<T: AsRef<[u8]>>(
}

fn materialize_conflict_hunks(
hunks: &[Merge<BString>],
hunks: Vec<Merge<BString>>,
conflict_marker_style: ConflictMarkerStyle,
conflict_marker_len: usize,
output: &mut dyn Write,
Expand All @@ -418,13 +447,25 @@ fn materialize_conflict_hunks(
.filter(|hunk| hunk.as_resolved().is_none())
.count();
let mut conflict_index = 0;
for hunk in hunks {
for mut hunk in hunks {
if let Some(content) = hunk.as_resolved() {
output.write_all(content)?;
} else {
conflict_index += 1;
let conflict_info = format!("Conflict {conflict_index} of {num_conflicts}");

// If any non-empty term doesn't end with a newline, add a newline to every term
// to ensure the file remains parsable. These newlines will be ignored when
// parsing due to the NO_EOL_MARKER we will add to the conflict start marker.
let no_eol = hunk
.iter()
.any(|term| term.last_byte().is_some_and(|last| last != b'\n'));
if no_eol {
for term in hunk.iter_mut() {
term.push_byte(b'\n');
}
}

match (conflict_marker_style, hunk.as_slice()) {
// 2-sided conflicts can use Git-style conflict markers
(ConflictMarkerStyle::Git, [left, base, right]) => {
Expand All @@ -434,15 +475,17 @@ fn materialize_conflict_hunks(
right,
&conflict_info,
conflict_marker_len,
no_eol,
output,
)?;
}
_ => {
materialize_jj_style_conflict(
hunk,
&hunk,
&conflict_info,
conflict_marker_style,
conflict_marker_len,
no_eol,
output,
)?;
}
Expand All @@ -458,9 +501,11 @@ fn materialize_git_style_conflict(
right: &[u8],
conflict_info: &str,
conflict_marker_len: usize,
no_eol: bool,
output: &mut dyn Write,
) -> io::Result<()> {
ConflictMarkerLine::new(ConflictMarkerLineChar::ConflictStart, conflict_marker_len)
.set_no_eol(no_eol)
.write(output, &format!("Side #1 ({conflict_info})"))?;
output.write_all(left)?;
ConflictMarkerLine::new(ConflictMarkerLineChar::GitAncestor, conflict_marker_len)
Expand All @@ -481,6 +526,7 @@ fn materialize_jj_style_conflict(
conflict_info: &str,
conflict_marker_style: ConflictMarkerStyle,
conflict_marker_len: usize,
no_eol: bool,
output: &mut dyn Write,
) -> io::Result<()> {
// Write a positive snapshot (side) of a conflict
Expand Down Expand Up @@ -508,6 +554,7 @@ fn materialize_jj_style_conflict(
};

ConflictMarkerLine::new(ConflictMarkerLineChar::ConflictStart, conflict_marker_len)
.set_no_eol(no_eol)
.write(output, conflict_info)?;
let mut add_index = 0;
for (base_index, left) in hunk.removes().enumerate() {
Expand Down Expand Up @@ -624,17 +671,35 @@ pub fn parse_conflict(
let mut resolved_start = 0;
let mut conflict_start = None;
let mut conflict_start_len = 0;
let mut conflict_no_eol = false;
for line in input.lines_with_terminator() {
match parse_conflict_marker_char(line, expected_marker_len) {
Some(ConflictMarkerLineChar::ConflictStart) => {
match ConflictMarkerLine::parse(line, expected_marker_len) {
Some(ConflictMarkerLine {
char: ConflictMarkerLineChar::ConflictStart,
no_eol,
..
}) => {
conflict_start = Some(pos);
conflict_start_len = line.len();
conflict_no_eol = no_eol;
}
Some(ConflictMarkerLineChar::ConflictEnd) => {
Some(ConflictMarkerLine {
char: ConflictMarkerLineChar::ConflictEnd,
..
}) => {
if let Some(conflict_start_index) = conflict_start.take() {
let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
let hunk = parse_conflict_hunk(conflict_body, expected_marker_len);
let mut hunk = parse_conflict_hunk(conflict_body, expected_marker_len);
if hunk.num_sides() == num_sides {
// Ignore any additional newlines which were added to make the conflict
// markers parsable
if conflict_no_eol {
for term in hunk.iter_mut() {
if term.last() == Some(&b'\n') {
term.pop();
}
}
}
let resolved_slice = &input[resolved_start..conflict_start_index];
if !resolved_slice.is_empty() {
hunks.push(Merge::resolved(BString::from(resolved_slice)));
Expand Down
Loading

0 comments on commit 5e04c1e

Please sign in to comment.