Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix broken spans in diffs #14678

Merged
merged 1 commit into from
Feb 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 210 additions & 0 deletions services/gitdiff/gitdiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ var (
removedCodePrefix = []byte(`<span class="removed-code">`)
codeTagSuffix = []byte(`</span>`)
)

var unfinishedtagRegex = regexp.MustCompile(`<[^>]*$`)
var trailingSpanRegex = regexp.MustCompile(`<span\s*[[:alpha:]="]*?[>]?$`)
var entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`)

Expand All @@ -196,10 +198,218 @@ func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool {
return false
}

func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff {

// Create a new array to store our fixed up blocks
fixedup := make([]diffmatchpatch.Diff, 0, len(diffs))

// semantically label some numbers
const insert, delete, equal = 0, 1, 2

// record the positions of the last type of each block in the fixedup blocks
last := []int{-1, -1, -1}
operation := []diffmatchpatch.Operation{diffmatchpatch.DiffInsert, diffmatchpatch.DiffDelete, diffmatchpatch.DiffEqual}

// create a writer for insert and deletes
toWrite := []strings.Builder{
{},
{},
}

// make some flags for insert and delete
unfinishedTag := []bool{false, false}
unfinishedEnt := []bool{false, false}

// store stores the provided text in the writer for the typ
store := func(text string, typ int) {
(&(toWrite[typ])).WriteString(text)
}

// hasStored returns true if there is stored content
hasStored := func(typ int) bool {
return (&toWrite[typ]).Len() > 0
}

// stored will return that content
stored := func(typ int) string {
return (&toWrite[typ]).String()
}

// empty will empty the stored content
empty := func(typ int) {
(&toWrite[typ]).Reset()
}

// pop will remove the stored content appending to a diff block for that typ
pop := func(typ int, fixedup []diffmatchpatch.Diff) []diffmatchpatch.Diff {
if hasStored(typ) {
if last[typ] > last[equal] {
fixedup[last[typ]].Text += stored(typ)
} else {
fixedup = append(fixedup, diffmatchpatch.Diff{
Type: operation[typ],
Text: stored(typ),
})
}
empty(typ)
}
return fixedup
}

// Now we walk the provided diffs and check the type of each block in turn
for _, diff := range diffs {

typ := delete // flag for handling insert or delete typs
switch diff.Type {
case diffmatchpatch.DiffEqual:
// First check if there is anything stored
if hasStored(insert) || hasStored(delete) {
// There are two reasons for storing content:
// 1. Unfinished Entity <- Could be more efficient here by not doing this if we're looking for a tag
if unfinishedEnt[insert] || unfinishedEnt[delete] {
// we look for a ';' to finish an entity
idx := strings.IndexRune(diff.Text, ';')
if idx >= 0 {
// if we find a ';' store the preceding content to both insert and delete
store(diff.Text[:idx+1], insert)
store(diff.Text[:idx+1], delete)

// and remove it from this block
diff.Text = diff.Text[idx+1:]

// reset the ent flags
unfinishedEnt[insert] = false
unfinishedEnt[delete] = false
} else {
// otherwise store it all on insert and delete
store(diff.Text, insert)
store(diff.Text, delete)
// and empty this block
diff.Text = ""
}
}
// 2. Unfinished Tag
if unfinishedTag[insert] || unfinishedTag[delete] {
// we look for a '>' to finish a tag
idx := strings.IndexRune(diff.Text, '>')
if idx >= 0 {
store(diff.Text[:idx+1], insert)
store(diff.Text[:idx+1], delete)
diff.Text = diff.Text[idx+1:]
unfinishedTag[insert] = false
unfinishedTag[delete] = false
} else {
store(diff.Text, insert)
store(diff.Text, delete)
diff.Text = ""
}
}

// If we've completed the required tag/entities
if !(unfinishedTag[insert] || unfinishedTag[delete] || unfinishedEnt[insert] || unfinishedEnt[delete]) {
// pop off the stack
fixedup = pop(insert, fixedup)
fixedup = pop(delete, fixedup)
}

// If that has left this diff block empty then shortcut
if len(diff.Text) == 0 {
continue
}
}

// check if this block ends in an unfinished tag?
idx := unfinishedtagRegex.FindStringIndex(diff.Text)
if idx != nil {
unfinishedTag[insert] = true
unfinishedTag[delete] = true
} else {
// otherwise does it end in an unfinished entity?
idx = entityRegex.FindStringIndex(diff.Text)
if idx != nil {
unfinishedEnt[insert] = true
unfinishedEnt[delete] = true
}
}

// If there is an unfinished component
if idx != nil {
// Store the fragment
store(diff.Text[idx[0]:], insert)
store(diff.Text[idx[0]:], delete)
// and remove it from this block
diff.Text = diff.Text[:idx[0]]
}

// If that hasn't left the block empty
if len(diff.Text) > 0 {
// store the position of the last equal block and store it in our diffs
last[equal] = len(fixedup)
fixedup = append(fixedup, diff)
}
continue
case diffmatchpatch.DiffInsert:
typ = insert
fallthrough
case diffmatchpatch.DiffDelete:
// First check if there is anything stored for this type
if hasStored(typ) {
// if there is prepend it to this block, empty the storage and reset our flags
diff.Text = stored(typ) + diff.Text
empty(typ)
unfinishedEnt[typ] = false
unfinishedTag[typ] = false
}

// check if this block ends in an unfinished tag
idx := unfinishedtagRegex.FindStringIndex(diff.Text)
if idx != nil {
unfinishedTag[typ] = true
} else {
// otherwise does it end in an unfinished entity
idx = entityRegex.FindStringIndex(diff.Text)
if idx != nil {
unfinishedEnt[typ] = true
}
}

// If there is an unfinished component
if idx != nil {
// Store the fragment
store(diff.Text[idx[0]:], typ)
// and remove it from this block
diff.Text = diff.Text[:idx[0]]
}

// If that hasn't left the block empty
if len(diff.Text) > 0 {
// if the last block of this type was after the last equal block
if last[typ] > last[equal] {
// store this blocks content on that block
fixedup[last[typ]].Text += diff.Text
} else {
// otherwise store the position of the last block of this type and store the block
last[typ] = len(fixedup)
fixedup = append(fixedup, diff)
}
}
continue
}
}

// pop off any remaining stored content
fixedup = pop(insert, fixedup)
fixedup = pop(delete, fixedup)

return fixedup
}

func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) template.HTML {
buf := bytes.NewBuffer(nil)
match := ""

diffs = fixupBrokenSpans(diffs)

for _, diff := range diffs {
if shouldWriteInline(diff, lineType) {
if len(match) > 0 {
Expand Down
24 changes: 18 additions & 6 deletions services/gitdiff/gitdiff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/setting"
dmp "github.com/sergi/go-diff/diffmatchpatch"
"github.com/stretchr/testify/assert"
Expand All @@ -23,7 +24,7 @@ import (

func assertEqual(t *testing.T, s1 string, s2 template.HTML) {
if s1 != string(s2) {
t.Errorf("%s should be equal %s", s2, s1)
t.Errorf("Did not receive expected results:\nExpected: %s\nActual: %s", s1, s2)
}
}

Expand Down Expand Up @@ -61,22 +62,22 @@ func TestDiffToHTML(t *testing.T) {
{Type: dmp.DiffEqual, Text: "</span><span class=\"p\">)</span>"},
}, DiffLineDel))

assertEqual(t, "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"removed-code\"><span class=\"nx\">language</span></span><span class=\"removed-code\"><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
assertEqual(t, "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"removed-code\"><span class=\"nx\">language</span><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This removes an unnecessary close and reopen of a removed-code span

{Type: dmp.DiffEqual, Text: "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"nx\">"},
{Type: dmp.DiffDelete, Text: "language</span><span "},
{Type: dmp.DiffEqual, Text: "c"},
{Type: dmp.DiffDelete, Text: "lass=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs"},
{Type: dmp.DiffEqual, Text: "</span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>"},
}, DiffLineDel))

assertEqual(t, "<span class=\"added-code\">language</span></span><span class=\"added-code\"><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
assertEqual(t, "<span class=\"added-code\">language</span><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This removes an unnecessary close and reopen of a added-code span

{Type: dmp.DiffInsert, Text: "language</span><span "},
{Type: dmp.DiffEqual, Text: "c"},
{Type: dmp.DiffInsert, Text: "lass=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs"},
{Type: dmp.DiffEqual, Text: "</span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>"},
}, DiffLineAdd))

assertEqual(t, "<span class=\"k\">print</span><span class=\"added-code\"></span><span class=\"added-code\"><span class=\"p\">(</span></span><span class=\"sa\"></span><span class=\"s2\">&#34;</span><span class=\"s2\">// </span><span class=\"s2\">&#34;</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span><span class=\"added-code\"><span class=\"p\">)</span></span>", diffToHTML("", []dmp.Diff{
assertEqual(t, "<span class=\"k\">print</span><span class=\"added-code\"><span class=\"p\">(</span></span><span class=\"sa\"></span><span class=\"s2\">&#34;</span><span class=\"s2\">// </span><span class=\"s2\">&#34;</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span><span class=\"added-code\"><span class=\"p\">)</span></span>", diffToHTML("", []dmp.Diff{
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This removes an unnecessary close and reopen of a removed-code span

{Type: dmp.DiffEqual, Text: "<span class=\"k\">print</span>"},
{Type: dmp.DiffInsert, Text: "<span"},
{Type: dmp.DiffEqual, Text: " "},
Expand All @@ -85,14 +86,14 @@ func TestDiffToHTML(t *testing.T) {
{Type: dmp.DiffInsert, Text: "<span class=\"p\">)</span>"},
}, DiffLineAdd))

assertEqual(t, "sh <span class=\"added-code\">&#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins</span>&#39;", diffToHTML("", []dmp.Diff{
assertEqual(t, "sh <span class=\"added-code\">&#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39;</span>", diffToHTML("", []dmp.Diff{
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you look at the provided diff array and what was previously listed as expected you'll see that we actually expected something incorrect!

{Type: dmp.DiffEqual, Text: "sh &#3"},
{Type: dmp.DiffDelete, Text: "4;useradd -u 111 jenkins&#34"},
{Type: dmp.DiffInsert, Text: "9;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39"},
{Type: dmp.DiffEqual, Text: ";"},
}, DiffLineAdd))

assertEqual(t, "<span class=\"x\"> &lt;h<span class=\"added-code\">4 class=</span><span class=\"added-code\">&#34;release-list-title df ac&#34;</span>&gt;</span>", diffToHTML("", []dmp.Diff{
assertEqual(t, "<span class=\"x\"> &lt;h<span class=\"added-code\">4 class=&#34;release-list-title df ac&#34;</span>&gt;</span>", diffToHTML("", []dmp.Diff{
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This removes an unnecessary close and reopen of a addded-code span

{Type: dmp.DiffEqual, Text: "<span class=\"x\"> &lt;h"},
{Type: dmp.DiffInsert, Text: "4 class=&#"},
{Type: dmp.DiffEqual, Text: "3"},
Expand Down Expand Up @@ -462,3 +463,14 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) {
}
}
}

func TestDiffToHTML_14231(t *testing.T) {
setting.Cfg = ini.Empty()
diffRecord := diffMatchPatch.DiffMain(highlight.Code("main.v", " run()\n"), highlight.Code("main.v", " run(db)\n"), true)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)

expected := ` <span class="n">run</span><span class="added-code"><span class="o">(</span><span class="n">db</span></span><span class="o">)</span>`
output := diffToHTML("main.v", diffRecord, DiffLineAdd)

assertEqual(t, expected, output)
}
Comment on lines +467 to +476
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here we add a test case specific for the issue that this PR fixes.