diff --git a/Makefile b/Makefile index 8f6ed2c..ae03d73 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ SOURCE_DIR := $(abspath $(dir $(lastword ${MAKEFILE_LIST}))) BUILD_DIR := ${SOURCE_DIR}/_build BUILD_TIME := $(shell date +'%Y-%m-%dT%H:%M:%S%z') BUILD_COMMIT := $(shell git rev-parse --short HEAD 2>/dev/null || echo 'none') -BUILD_VERSION := $(shell cat VERSION || echo '0.14.0') +BUILD_VERSION := $(shell cat VERSION || echo '0.15.0') GO_PACKAGES := $(shell go list ./... | grep -v '^${PKG}/mock/' | grep -v '^${PKG}/proto/') GO_LDFLAGS := -ldflags '-X ${PKG}/pkg/version.version=${BUILD_VERSION} -X ${PKG}/pkg/version.buildTime=${BUILD_TIME} -X ${PKG}/pkg/version.buildCommit=${BUILD_COMMIT}' diff --git a/VERSION b/VERSION index 0548fb4..7092c7c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.14.0 \ No newline at end of file +0.15.0 \ No newline at end of file diff --git a/bali.toml b/bali.toml index 1704c9b..0ab48b7 100644 --- a/bali.toml +++ b/bali.toml @@ -3,7 +3,7 @@ name = "zeta" summary = "HugeSCM - A next generation cloud-based version control system" description = "HugeSCM - A next generation cloud-based version control system" package-name = "alipay-linkc-zeta" -version = "0.14.0" +version = "0.15.0" license = "MIT" prefix = "/usr/local" packager = "江二" diff --git a/cmd/zeta-mc/crate.toml b/cmd/zeta-mc/crate.toml index 8eed577..bb311f7 100644 --- a/cmd/zeta-mc/crate.toml +++ b/cmd/zeta-mc/crate.toml @@ -1,7 +1,7 @@ name = "zeta-mc" description = "zeta-mc - Migrate Git repository to zeta" destination = "bin" -version = "0.14.0" +version = "0.15.0" goflags = [ "-ldflags", "-X github.com/antgroup/hugescm/pkg/version.version=$BUILD_VERSION -X github.com/antgroup/hugescm/pkg/version.buildTime=$BUILD_TIME -X github.com/antgroup/hugescm/pkg/version.buildCommit=$BUILD_COMMIT", diff --git a/cmd/zeta/crate.toml b/cmd/zeta/crate.toml index 3085c85..72b4e7e 100644 --- a/cmd/zeta/crate.toml +++ b/cmd/zeta/crate.toml @@ -1,7 +1,7 @@ name = "zeta" description = "HugeSCM - A next generation cloud-based version control system" destination = "bin" -version = "0.14.0" +version = "0.15.0" goflags = [ "-ldflags", "-X github.com/antgroup/hugescm/pkg/version.version=$BUILD_VERSION -X github.com/antgroup/hugescm/pkg/version.buildTime=$BUILD_TIME -X github.com/antgroup/hugescm/pkg/version.buildCommit=$BUILD_COMMIT", diff --git a/modules/diferenco/diferenco.go b/modules/diferenco/diferenco.go index 1f4ab7a..1552d11 100644 --- a/modules/diferenco/diferenco.go +++ b/modules/diferenco/diferenco.go @@ -131,8 +131,8 @@ type StringDiff struct { Text string } -type Stats struct { - Del, Ins, Hunks int +type FileStat struct { + Addition, Deletion, Hunks int } type Options struct { @@ -167,7 +167,7 @@ func diffInternal(ctx context.Context, L1, L2 []int, a Algorithm) ([]Change, err } } -func DoStats(ctx context.Context, opts *Options) (*Stats, error) { +func Stat(ctx context.Context, opts *Options) (*FileStat, error) { sink := &Sink{ Index: make(map[string]int), } @@ -183,12 +183,12 @@ func DoStats(ctx context.Context, opts *Options) (*Stats, error) { if err != nil { return nil, err } - stats := &Stats{ + stats := &FileStat{ Hunks: len(changes), } for _, ch := range changes { - stats.Del += ch.Del - stats.Ins += ch.Ins + stats.Addition += ch.Ins + stats.Deletion += ch.Del } return stats, nil } diff --git a/modules/diferenco/diferenco_test.go b/modules/diferenco/diferenco_test.go index 613560c..447ed04 100644 --- a/modules/diferenco/diferenco_test.go +++ b/modules/diferenco/diferenco_test.go @@ -32,7 +32,7 @@ func TestDiff(t *testing.T) { now := time.Now() u, err := DoUnified(context.Background(), &Options{ From: &File{ - Path: "a.txt", + Name: "a.txt", }, To: nil, S1: textA, @@ -64,12 +64,12 @@ func TestPatchFD(t *testing.T) { textB := string(bytesB) u, err := DoUnified(context.Background(), &Options{ From: &File{ - Path: "a.txt", + Name: "a.txt", Hash: "4789568", Mode: 0o10644, }, To: &File{ - Path: "b.txt", + Name: "b.txt", Hash: "6547898", Mode: 0o10644, }, @@ -101,12 +101,12 @@ func TestPatch(t *testing.T) { textB := string(bytesB) u, err := DoUnified(context.Background(), &Options{ From: &File{ - Path: "a.txt", + Name: "a.txt", Hash: "4789568", Mode: 0o10644, }, To: &File{ - Path: "b.txt", + Name: "b.txt", Hash: "6547898", Mode: 0o10644, }, @@ -133,7 +133,7 @@ func TestPatchNew(t *testing.T) { u, err := DoUnified(context.Background(), &Options{ From: nil, To: &File{ - Path: "a.txt", + Name: "a.txt", Hash: "6547898", Mode: 0o10644, }, @@ -159,7 +159,7 @@ func TestPatchDelete(t *testing.T) { textA := string(bytesA) u, err := DoUnified(context.Background(), &Options{ From: &File{ - Path: "a.txt", + Name: "a.txt", Hash: "6547898", Mode: 0o10644, }, @@ -189,7 +189,7 @@ foo bar 31df1778815171897c907daf454c4419cfaa46f9` u, err := DoUnified(context.Background(), &Options{ From: &File{ - Path: "a.txt", + Name: "a.txt", Hash: "6547898", Mode: 0o10644, }, @@ -222,12 +222,12 @@ func TestPatchScss(t *testing.T) { textB := string(bytesB) u, err := DoUnified(context.Background(), &Options{ From: &File{ - Path: "a.txt", + Name: "a.txt", Hash: "4789568", Mode: 0o10644, }, To: &File{ - Path: "b.txt", + Name: "b.txt", Hash: "6547898", Mode: 0o10644, }, @@ -259,12 +259,12 @@ func TestPatchCss(t *testing.T) { textB := string(bytesB) u, err := DoUnified(context.Background(), &Options{ From: &File{ - Path: "a.txt", + Name: "a.txt", Hash: "4789568", Mode: 0o10644, }, To: &File{ - Path: "b.txt", + Name: "b.txt", Hash: "6547898", Mode: 0o10644, }, @@ -278,3 +278,27 @@ func TestPatchCss(t *testing.T) { e.SetColor(color.NewColorConfig()) _ = e.Encode([]*Unified{u}) } + +func TestShowPatch(t *testing.T) { + patch := []*Unified{ + { + From: &File{ + Name: "docs/a.png", + Hash: "1ab12893fc666524ed79caae503e12c20a748e2f92db7730c8be09d981970f96", + Mode: 33188, + }, + IsBinary: true, + }, + { + To: &File{ + Name: "images/windows7.iso", + Hash: "adba50d9794b9ef3f7ec8cbc680f7f1fa3fbf9df0ac8d1f9b9ccab6d941bc11b", + Mode: 33188, + }, + IsFragments: true, + }, + } + e := NewUnifiedEncoder(os.Stderr) + e.SetColor(color.NewColorConfig()) + _ = e.Encode(patch) +} diff --git a/modules/diferenco/histogram_test.go b/modules/diferenco/histogram_test.go index b71f8ce..19a9066 100644 --- a/modules/diferenco/histogram_test.go +++ b/modules/diferenco/histogram_test.go @@ -32,8 +32,8 @@ func TestHistogram(t *testing.T) { a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := HistogramDiff(context.Background(), a, b) - u := sink.ToUnified(&File{Path: "a.txt"}, &File{Path: "b.txt"}, changes, a, b, DefaultContextLines) + u := sink.ToUnified(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) e := NewUnifiedEncoder(os.Stderr) e.SetColor(color.NewColorConfig()) - e.Encode([]*Unified{u}) + _ = e.Encode([]*Unified{u}) } diff --git a/modules/diferenco/minimal_test.go b/modules/diferenco/minimal_test.go index 81e06a5..9d0bf94 100644 --- a/modules/diferenco/minimal_test.go +++ b/modules/diferenco/minimal_test.go @@ -32,8 +32,8 @@ func TestMinimalDiff(t *testing.T) { a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := MinimalDiff(context.Background(), a, b) - u := sink.ToUnified(&File{Path: "a.txt"}, &File{Path: "b.txt"}, changes, a, b, DefaultContextLines) + u := sink.ToUnified(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) e := NewUnifiedEncoder(os.Stderr) e.SetColor(color.NewColorConfig()) - e.Encode([]*Unified{u}) + _ = e.Encode([]*Unified{u}) } diff --git a/modules/diferenco/myers_test.go b/modules/diferenco/myers_test.go index a6ef4e0..345d76f 100644 --- a/modules/diferenco/myers_test.go +++ b/modules/diferenco/myers_test.go @@ -70,7 +70,7 @@ func TestMyersDiff2(t *testing.T) { a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := MyersDiff(context.Background(), a, b) - u := sink.ToUnified(&File{Path: "a.txt"}, &File{Path: "b.txt"}, changes, a, b, DefaultContextLines) + u := sink.ToUnified(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) fmt.Fprintf(os.Stderr, "diff:\n%s\n", u.String()) } @@ -91,6 +91,6 @@ func TestMyersDiff3(t *testing.T) { a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := MyersDiff(context.Background(), a, b) - u := sink.ToUnified(&File{Path: "a.txt"}, &File{Path: "b.txt"}, changes, a, b, DefaultContextLines) + u := sink.ToUnified(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) fmt.Fprintf(os.Stderr, "diff:\n%s\n", u.String()) } diff --git a/modules/diferenco/unified.go b/modules/diferenco/unified.go index bd9d819..e356e6b 100644 --- a/modules/diferenco/unified.go +++ b/modules/diferenco/unified.go @@ -14,25 +14,35 @@ import ( const DefaultContextLines = 3 type File struct { - Path string - Hash string - Mode uint32 + Name string `json:"name"` + Hash string `json:"hash"` + Mode uint32 `json:"mode"` } // unified represents a set of edits as a unified diff. type Unified struct { // From is the name of the original file. - From *File + From *File `json:"from,omitempty"` // To is the name of the modified file. - To *File + To *File `json:"to,omitempty"` // IsBinary returns true if this patch is representing a binary file. - IsBinary bool + IsBinary bool `json:"binary"` // Fragments returns true if this patch is representing a fragments file. - IsFragments bool + IsFragments bool `json:"fragments"` // Message prefix, eg: warning: something - Message string + Message string `json:"message"` // Hunks is the set of edit Hunks needed to transform the file content. - Hunks []*Hunk + Hunks []*Hunk `json:"hunks,omitempty"` +} + +func (u Unified) Stat() FileStat { + s := FileStat{Hunks: len(u.Hunks)} + for _, h := range u.Hunks { + ins, del := h.Stat() + s.Addition += ins + s.Deletion += del + } + return s } // String converts a unified diff to the standard textual form for that diff. @@ -43,12 +53,12 @@ func (u Unified) String() string { } b := new(strings.Builder) if u.From != nil { - fmt.Fprintf(b, "--- %s\n", u.From.Path) + fmt.Fprintf(b, "--- %s\n", u.From.Name) } else { fmt.Fprintf(b, "--- /dev/null\n") } if u.To != nil { - fmt.Fprintf(b, "+++ %s\n", u.To.Path) + fmt.Fprintf(b, "+++ %s\n", u.To.Name) } else { fmt.Fprintf(b, "+++ /dev/null\n") } @@ -104,16 +114,29 @@ func (u Unified) String() string { // Hunk represents a contiguous set of line edits to apply. type Hunk struct { // The line in the original source where the hunk starts. - FromLine int + FromLine int `json:"from_line"` // The line in the original source where the hunk finishes. - ToLine int + ToLine int `json:"to_line"` // The set of line based edits to apply. - Lines []Line + Lines []Line `json:"lines,omitempty"` +} + +func (h Hunk) Stat() (int, int) { + var ins, del int + for _, l := range h.Lines { + switch l.Kind { + case Delete: + del++ + case Insert: + ins++ + } + } + return ins, del } type Line struct { - Kind Operation - Content string + Kind Operation `json:"kind"` + Content string `json:"content"` } func DoUnified(ctx context.Context, opts *Options) (*Unified, error) { diff --git a/modules/diferenco/unified_encoder.go b/modules/diferenco/unified_encoder.go index a2430dc..c66d8e0 100644 --- a/modules/diferenco/unified_encoder.go +++ b/modules/diferenco/unified_encoder.go @@ -9,6 +9,10 @@ import ( "github.com/antgroup/hugescm/modules/diferenco/color" ) +const ( + ZERO_OID = "0000000000000000000000000000000000000000000000000000000000000000" // zeta zero OID +) + var ( operationChar = map[Operation]byte{ Insert: '+', @@ -89,10 +93,6 @@ func (e *UnifiedEncoder) appendPathLines(lines []string, fromPath, toPath string ) } -const ( - ZERO_OID = "0000000000000000000000000000000000000000000000000000000000000000" // zeta zero OID -) - func (e *UnifiedEncoder) writeFilePatchHeader(u *Unified, b *strings.Builder) { from, to := u.From, u.To if from == nil && to == nil { @@ -104,7 +104,7 @@ func (e *UnifiedEncoder) writeFilePatchHeader(u *Unified, b *strings.Builder) { hashEquals := from.Hash == to.Hash lines = append(lines, fmt.Sprintf("diff --zeta %s%s %s%s", - e.srcPrefix, from.Path, e.dstPrefix, to.Path), + e.srcPrefix, from.Name, e.dstPrefix, to.Name), ) if from.Mode != to.Mode { lines = append(lines, @@ -112,10 +112,10 @@ func (e *UnifiedEncoder) writeFilePatchHeader(u *Unified, b *strings.Builder) { fmt.Sprintf("new mode %o", to.Mode), ) } - if from.Path != to.Path { + if from.Name != to.Name { lines = append(lines, - fmt.Sprintf("rename from %s", from.Path), - fmt.Sprintf("rename to %s", to.Path), + fmt.Sprintf("rename from %s", from.Name), + fmt.Sprintf("rename to %s", to.Name), ) } if from.Mode != to.Mode && !hashEquals { @@ -128,22 +128,22 @@ func (e *UnifiedEncoder) writeFilePatchHeader(u *Unified, b *strings.Builder) { ) } if !hashEquals { - lines = e.appendPathLines(lines, e.srcPrefix+from.Path, e.dstPrefix+to.Path, u.IsBinary, u.IsFragments) + lines = e.appendPathLines(lines, e.srcPrefix+from.Name, e.dstPrefix+to.Name, u.IsBinary, u.IsFragments) } case from == nil: lines = append(lines, - fmt.Sprintf("diff --zeta %s %s", e.srcPrefix+to.Path, e.dstPrefix+to.Path), + fmt.Sprintf("diff --zeta %s %s", e.srcPrefix+to.Name, e.dstPrefix+to.Name), fmt.Sprintf("new file mode %o", to.Mode), fmt.Sprintf("index %s..%s", ZERO_OID, to.Hash), ) - lines = e.appendPathLines(lines, "/dev/null", e.dstPrefix+to.Path, u.IsBinary, u.IsFragments) + lines = e.appendPathLines(lines, "/dev/null", e.dstPrefix+to.Name, u.IsBinary, u.IsFragments) case to == nil: lines = append(lines, - fmt.Sprintf("diff --zeta %s %s", e.srcPrefix+from.Path, e.dstPrefix+from.Path), + fmt.Sprintf("diff --zeta %s %s", e.srcPrefix+from.Name, e.dstPrefix+from.Name), fmt.Sprintf("deleted file mode %o", from.Mode), fmt.Sprintf("index %s..%s", from.Hash, ZERO_OID), ) - lines = e.appendPathLines(lines, e.srcPrefix+from.Path, "/dev/null", u.IsBinary, u.IsFragments) + lines = e.appendPathLines(lines, e.srcPrefix+from.Name, "/dev/null", u.IsBinary, u.IsFragments) } b.WriteString(e.color[color.Meta]) b.WriteString(lines[0]) @@ -227,6 +227,9 @@ func (e *UnifiedEncoder) writeUnified(u *Unified) error { } e.writeFilePatchHeader(u, b) if len(u.Hunks) == 0 { + if _, err := io.WriteString(e.Writer, b.String()); err != nil { + return err + } return nil } for _, hunk := range u.Hunks { diff --git a/modules/diff/diff.go b/modules/diff/diff.go deleted file mode 100644 index 8b786c2..0000000 --- a/modules/diff/diff.go +++ /dev/null @@ -1,64 +0,0 @@ -// Package diff implements line oriented diffs, similar to the ancient -// Unix diff command. -// -// The current implementation is just a wrapper around Sergi's -// go-diff/diffmatchpatch library, which is a go port of Neil -// Fraser's google-diff-match-patch code -package diff - -import ( - "bytes" - "time" - - "github.com/antgroup/hugescm/modules/diffmatchpatch" -) - -// Do computes the (line oriented) modifications needed to turn the src -// string into the dst string. The underlying algorithm is Meyers, -// its complexity is O(N*d) where N is min(lines(src), lines(dst)) and d -// is the size of the diff. -func Do(src, dst string) ([]diffmatchpatch.Diff, error) { - // the default timeout is time.Second which may be too small under heavy load - return DoWithTimeout(src, dst, time.Hour) -} - -// DoWithTimeout computes the (line oriented) modifications needed to turn the src -// string into the dst string. The `timeout` argument specifies the maximum -// amount of time it is allowed to spend in this function. If the timeout -// is exceeded, the parts of the strings which were not considered are turned into -// a bulk delete+insert and the half-baked suboptimal result is returned at once. -// The underlying algorithm is Meyers, its complexity is O(N*d) where N is -// min(lines(src), lines(dst)) and d is the size of the diff. -func DoWithTimeout(src, dst string, timeout time.Duration) ([]diffmatchpatch.Diff, error) { - dmp := diffmatchpatch.New() - dmp.DiffTimeout = timeout - wSrc, wDst, warray, err := dmp.DiffLinesToRunes(src, dst) - if err != nil { - return nil, err - } - diffs := dmp.DiffMainRunes(wSrc, wDst, false) - diffs = dmp.DiffCharsToLines(diffs, warray) - return diffs, nil -} - -// Dst computes and returns the destination text. -func Dst(diffs []diffmatchpatch.Diff) string { - var text bytes.Buffer - for _, d := range diffs { - if d.Type != diffmatchpatch.DiffDelete { - text.WriteString(d.Text) - } - } - return text.String() -} - -// Src computes and returns the source text -func Src(diffs []diffmatchpatch.Diff) string { - var text bytes.Buffer - for _, d := range diffs { - if d.Type != diffmatchpatch.DiffInsert { - text.WriteString(d.Text) - } - } - return text.String() -} diff --git a/modules/diffmatchpatch/LICENSE b/modules/diffmatchpatch/LICENSE deleted file mode 100644 index ede6be2..0000000 --- a/modules/diffmatchpatch/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2012-2016 The go-diff Authors. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, -and/or sell copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/modules/diffmatchpatch/VERSION b/modules/diffmatchpatch/VERSION deleted file mode 100644 index d02f37c..0000000 --- a/modules/diffmatchpatch/VERSION +++ /dev/null @@ -1,6 +0,0 @@ -https://github.com/sergi/go-diff -5b0b94c5c0d3d261e044521f7f46479ef869cf76 - -PR: apply https://github.com/sergi/go-diff/pull/141 - -Zeta Changes: merge feature \ No newline at end of file diff --git a/modules/diffmatchpatch/diff.go b/modules/diffmatchpatch/diff.go deleted file mode 100644 index 5790d94..0000000 --- a/modules/diffmatchpatch/diff.go +++ /dev/null @@ -1,1399 +0,0 @@ -// Copyright (c) 2012-2016 The go-diff authors. All rights reserved. -// https://github.com/sergi/go-diff -// See the included LICENSE file for license details. -// -// go-diff is a Go implementation of Google's Diff, Match, and Patch library -// Original library is Copyright (c) 2006 Google Inc. -// http://code.google.com/p/google-diff-match-patch/ - -package diffmatchpatch - -import ( - "bytes" - "errors" - "fmt" - "html" - "math" - "net/url" - "regexp" - "slices" - "strconv" - "strings" - "time" - "unicode/utf8" -) - -// LineMap is a mapping from a line hash to its text. -type LineMap map[rune]string - -// Operation defines the operation of a diff item. -type Operation int8 - -//go:generate stringer -type=Operation -trimprefix=Diff - -const ( - // DiffDelete item represents a delete diff. - DiffDelete Operation = -1 - // DiffInsert item represents an insert diff. - DiffInsert Operation = 1 - // DiffEqual item represents an equal diff. - DiffEqual Operation = 0 -) - -// Diff represents one diff operation -type Diff struct { - Type Operation - Text string -} - -// splice removes amount elements from slice at index index, replacing them with elements. -func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff { - if len(elements) == amount { - // Easy case: overwrite the relevant items. - copy(slice[index:], elements) - return slice - } - if len(elements) < amount { - // Fewer new items than old. - // Copy in the new items. - copy(slice[index:], elements) - // Shift the remaining items left. - copy(slice[index+len(elements):], slice[index+amount:]) - // Calculate the new end of the slice. - end := len(slice) - amount + len(elements) - // Zero stranded elements at end so that they can be garbage collected. - tail := slice[end:] - for i := range tail { - tail[i] = Diff{} - } - return slice[:end] - } - // More new items than old. - // Make room in slice for new elements. - // There's probably an even more efficient way to do this, - // but this is simple and clear. - need := len(slice) - amount + len(elements) - for len(slice) < need { - slice = append(slice, Diff{}) - } - // Shift slice elements right to make room for new elements. - copy(slice[index+len(elements):], slice[index+amount:]) - // Copy in new elements. - copy(slice[index:], elements) - return slice -} - -// DiffMain finds the differences between two texts. -// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character. -// -// Note: if checklines is true, the limitation noted in DiffLinesToChars applies -func (dmp *DiffMatchPatch) DiffMain(text1, text2 string, checklines bool) []Diff { - return dmp.DiffMainRunes([]rune(text1), []rune(text2), checklines) -} - -// DiffMainRunes finds the differences between two rune sequences. -// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character. -// -// Note: if checklines is true, the limitation noted in DiffLinesToRunes applies -func (dmp *DiffMatchPatch) DiffMainRunes(text1, text2 []rune, checklines bool) []Diff { - var deadline time.Time - if dmp.DiffTimeout > 0 { - deadline = time.Now().Add(dmp.DiffTimeout) - } - return dmp.diffMainRunes(text1, text2, checklines, deadline) -} - -func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, deadline time.Time) []Diff { - if slices.Equal(text1, text2) { - var diffs []Diff - if len(text1) > 0 { - diffs = append(diffs, Diff{DiffEqual, string(text1)}) - } - return diffs - } - // Trim off common prefix (speedup). - commonlength := commonPrefixLength(text1, text2) - commonprefix := text1[:commonlength] - text1 = text1[commonlength:] - text2 = text2[commonlength:] - - // Trim off common suffix (speedup). - commonlength = commonSuffixLength(text1, text2) - commonsuffix := text1[len(text1)-commonlength:] - text1 = text1[:len(text1)-commonlength] - text2 = text2[:len(text2)-commonlength] - - // Compute the diff on the middle block. - diffs := dmp.diffCompute(text1, text2, checklines, deadline) - - // Restore the prefix and suffix. - if len(commonprefix) != 0 { - diffs = append([]Diff{{DiffEqual, string(commonprefix)}}, diffs...) - } - if len(commonsuffix) != 0 { - diffs = append(diffs, Diff{DiffEqual, string(commonsuffix)}) - } - - return dmp.DiffCleanupMerge(diffs) -} - -// diffCompute finds the differences between two rune slices. Assumes that the texts do not have any common prefix or suffix. -func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, deadline time.Time) []Diff { - diffs := []Diff{} - if len(text1) == 0 { - // Just add some text (speedup). - return append(diffs, Diff{DiffInsert, string(text2)}) - } - if len(text2) == 0 { - // Just delete some text (speedup). - return append(diffs, Diff{DiffDelete, string(text1)}) - } - - var longtext, shorttext []rune - if len(text1) > len(text2) { - longtext = text1 - shorttext = text2 - } else { - longtext = text2 - shorttext = text1 - } - - if i := runesIndex(longtext, shorttext); i != -1 { - op := DiffInsert - // Swap insertions for deletions if diff is reversed. - if len(text1) > len(text2) { - op = DiffDelete - } - // Shorter text is inside the longer text (speedup). - return []Diff{ - {op, string(longtext[:i])}, - {DiffEqual, string(shorttext)}, - {op, string(longtext[i+len(shorttext):])}, - } - } else if len(shorttext) == 1 { - // Single character string. - // After the previous speedup, the character can't be an equality. - return []Diff{ - {DiffDelete, string(text1)}, - {DiffInsert, string(text2)}, - } - // Check to see if the problem can be split in two. - } else if hm := dmp.diffHalfMatch(text1, text2); hm != nil { - // A half-match was found, sort out the return data. - text1A := hm[0] - text1B := hm[1] - text2A := hm[2] - text2B := hm[3] - midCommon := hm[4] - // Send both pairs off for separate processing. - diffsA := dmp.diffMainRunes(text1A, text2A, checklines, deadline) - diffsB := dmp.diffMainRunes(text1B, text2B, checklines, deadline) - // Merge the results. - diffs := diffsA - diffs = append(diffs, Diff{DiffEqual, string(midCommon)}) - diffs = append(diffs, diffsB...) - return diffs - } else if checklines && len(text1) > 100 && len(text2) > 100 { - return dmp.diffLineMode(text1, text2, deadline) - } - return dmp.diffBisect(text1, text2, deadline) -} - -// diffLineMode does a quick line-level diff on both []runes, then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs. -func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff { - // Scan the text on a line-by-line basis first. - text1, text2, linearray, err := dmp.DiffLinesToRunes(string(text1), string(text2)) - if err != nil { - return []Diff{} - } - - diffs := dmp.diffMainRunes(text1, text2, false, deadline) - - // Convert the diff back to original text. - diffs = dmp.DiffCharsToLines(diffs, linearray) - // Eliminate freak matches (e.g. blank lines) - diffs = dmp.DiffCleanupSemantic(diffs) - - // Rediff any replacement blocks, this time character-by-character. - // Add a dummy entry at the end. - diffs = append(diffs, Diff{DiffEqual, ""}) - - pointer := 0 - countDelete := 0 - countInsert := 0 - - // NOTE: Rune slices are slower than using strings in this case. - textDelete := "" - textInsert := "" - - for pointer < len(diffs) { - switch diffs[pointer].Type { - case DiffInsert: - countInsert++ - textInsert += diffs[pointer].Text - case DiffDelete: - countDelete++ - textDelete += diffs[pointer].Text - case DiffEqual: - // Upon reaching an equality, check for prior redundancies. - if countDelete >= 1 && countInsert >= 1 { - // Delete the offending records and add the merged ones. - diffs = splice(diffs, pointer-countDelete-countInsert, - countDelete+countInsert) - - pointer = pointer - countDelete - countInsert - a := dmp.diffMainRunes([]rune(textDelete), []rune(textInsert), false, deadline) - for j := len(a) - 1; j >= 0; j-- { - diffs = splice(diffs, pointer, 0, a[j]) - } - pointer = pointer + len(a) - } - - countInsert = 0 - countDelete = 0 - textDelete = "" - textInsert = "" - } - pointer++ - } - - return diffs[:len(diffs)-1] // Remove the dummy entry at the end. -} - -// DiffBisect finds the 'middle snake' of a diff, split the problem in two and return the recursively constructed diff. -// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character. -// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. -func (dmp *DiffMatchPatch) DiffBisect(text1, text2 string, deadline time.Time) []Diff { - // Unused in this code, but retained for interface compatibility. - return dmp.diffBisect([]rune(text1), []rune(text2), deadline) -} - -// diffBisect finds the 'middle snake' of a diff, splits the problem in two and returns the recursively constructed diff. -// See Myers's 1986 paper: An O(ND) Difference Algorithm and Its Variations. -func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time) []Diff { - // Cache the text lengths to prevent multiple calls. - runes1Len, runes2Len := len(runes1), len(runes2) - - maxD := (runes1Len + runes2Len + 1) / 2 - vOffset := maxD - vLength := 2 * maxD - - v1 := make([]int, vLength) - v2 := make([]int, vLength) - for i := range v1 { - v1[i] = -1 - v2[i] = -1 - } - v1[vOffset+1] = 0 - v2[vOffset+1] = 0 - - delta := runes1Len - runes2Len - // If the total number of characters is odd, then the front path will collide with the reverse path. - front := (delta%2 != 0) - // Offsets for start and end of k loop. Prevents mapping of space beyond the grid. - k1start := 0 - k1end := 0 - k2start := 0 - k2end := 0 - for d := 0; d < maxD; d++ { - // Bail out if deadline is reached. - if !deadline.IsZero() && d%16 == 0 && time.Now().After(deadline) { - break - } - - // Walk the front path one step. - for k1 := -d + k1start; k1 <= d-k1end; k1 += 2 { - k1Offset := vOffset + k1 - var x1 int - - if k1 == -d || (k1 != d && v1[k1Offset-1] < v1[k1Offset+1]) { - x1 = v1[k1Offset+1] - } else { - x1 = v1[k1Offset-1] + 1 - } - - y1 := x1 - k1 - for x1 < runes1Len && y1 < runes2Len { - if runes1[x1] != runes2[y1] { - break - } - x1++ - y1++ - } - v1[k1Offset] = x1 - if x1 > runes1Len { - // Ran off the right of the graph. - k1end += 2 - } else if y1 > runes2Len { - // Ran off the bottom of the graph. - k1start += 2 - } else if front { - k2Offset := vOffset + delta - k1 - if k2Offset >= 0 && k2Offset < vLength && v2[k2Offset] != -1 { - // Mirror x2 onto top-left coordinate system. - x2 := runes1Len - v2[k2Offset] - if x1 >= x2 { - // Overlap detected. - return dmp.diffBisectSplit(runes1, runes2, x1, y1, deadline) - } - } - } - } - // Walk the reverse path one step. - for k2 := -d + k2start; k2 <= d-k2end; k2 += 2 { - k2Offset := vOffset + k2 - var x2 int - if k2 == -d || (k2 != d && v2[k2Offset-1] < v2[k2Offset+1]) { - x2 = v2[k2Offset+1] - } else { - x2 = v2[k2Offset-1] + 1 - } - var y2 = x2 - k2 - for x2 < runes1Len && y2 < runes2Len { - if runes1[runes1Len-x2-1] != runes2[runes2Len-y2-1] { - break - } - x2++ - y2++ - } - v2[k2Offset] = x2 - if x2 > runes1Len { - // Ran off the left of the graph. - k2end += 2 - } else if y2 > runes2Len { - // Ran off the top of the graph. - k2start += 2 - } else if !front { - k1Offset := vOffset + delta - k2 - if k1Offset >= 0 && k1Offset < vLength && v1[k1Offset] != -1 { - x1 := v1[k1Offset] - y1 := vOffset + x1 - k1Offset - // Mirror x2 onto top-left coordinate system. - x2 = runes1Len - x2 - if x1 >= x2 { - // Overlap detected. - return dmp.diffBisectSplit(runes1, runes2, x1, y1, deadline) - } - } - } - } - } - // Diff took too long and hit the deadline or number of diffs equals number of characters, no commonality at all. - return []Diff{ - {DiffDelete, string(runes1)}, - {DiffInsert, string(runes2)}, - } -} - -func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int, - deadline time.Time) []Diff { - runes1a := runes1[:x] - runes2a := runes2[:y] - runes1b := runes1[x:] - runes2b := runes2[y:] - - // Compute both diffs serially. - diffs := dmp.diffMainRunes(runes1a, runes2a, false, deadline) - diffsb := dmp.diffMainRunes(runes1b, runes2b, false, deadline) - - return append(diffs, diffsb...) -} - -// DiffLinesToChars splits two texts into a list of strings, and educes the texts to a string of hashes where each Unicode character represents one line. -// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes. -// -// Note: since we hash lines to runes, there is an upper limit to the number of -// unique lines this algorithm can handle. That limit is 1,112,063 unique -// lines. -func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, LineMap, error) { - chars1, chars2, lineMap, err := dmp.diffLinesToStrings(text1, text2) - return chars1, chars2, lineMap, err -} - -// DiffLinesToRunes splits two texts into a list of runes. -// -// Note: since we hash lines to runes, there is an upper limit to the number of -// unique lines this algorithm can handle. That limit is 1,112,063 unique -// lines. -func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, LineMap, error) { - chars1, chars2, lineMap, err := dmp.diffLinesToStrings(text1, text2) - return []rune(chars1), []rune(chars2), lineMap, err -} - -// DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text. -func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineMap LineMap) []Diff { - hydrated := make([]Diff, 0, len(diffs)) - for _, aDiff := range diffs { - runes := []rune(aDiff.Text) - text := make([]string, len(runes)) - - for i, r := range runes { - text[i] = lineMap[r] - } - - aDiff.Text = strings.Join(text, "") - hydrated = append(hydrated, aDiff) - } - return hydrated -} - -// DiffCommonPrefix determines the common prefix length of two strings. -func (dmp *DiffMatchPatch) DiffCommonPrefix(text1, text2 string) int { - // Unused in this code, but retained for interface compatibility. - return commonPrefixLength([]rune(text1), []rune(text2)) -} - -// DiffCommonSuffix determines the common suffix length of two strings. -func (dmp *DiffMatchPatch) DiffCommonSuffix(text1, text2 string) int { - // Unused in this code, but retained for interface compatibility. - return commonSuffixLength([]rune(text1), []rune(text2)) -} - -// commonPrefixLength returns the length of the common prefix of two rune slices. -func commonPrefixLength(text1, text2 []rune) int { - // Linear search. See comment in commonSuffixLength. - n := 0 - for ; n < len(text1) && n < len(text2); n++ { - if text1[n] != text2[n] { - return n - } - } - return n -} - -// commonSuffixLength returns the length of the common suffix of two rune slices. -func commonSuffixLength(text1, text2 []rune) int { - // Use linear search rather than the binary search discussed at https://neil.fraser.name/news/2007/10/09/. - // See discussion at https://github.com/sergi/go-diff/issues/54. - i1 := len(text1) - i2 := len(text2) - for n := 0; ; n++ { - i1-- - i2-- - if i1 < 0 || i2 < 0 || text1[i1] != text2[i2] { - return n - } - } -} - -// DiffCommonOverlap determines if the suffix of one string is the prefix of another. -func (dmp *DiffMatchPatch) DiffCommonOverlap(text1 string, text2 string) int { - // Cache the text lengths to prevent multiple calls. - text1Length := len(text1) - text2Length := len(text2) - // Eliminate the null case. - if text1Length == 0 || text2Length == 0 { - return 0 - } - // Truncate the longer string. - if text1Length > text2Length { - text1 = text1[text1Length-text2Length:] - } else if text1Length < text2Length { - text2 = text2[0:text1Length] - } - textLength := int(math.Min(float64(text1Length), float64(text2Length))) - // Quick check for the worst case. - if text1 == text2 { - return textLength - } - - // Start by looking for a single character match and increase length until no match is found. Performance analysis: http://neil.fraser.name/news/2010/11/04/ - best := 0 - length := 1 - for { - pattern := text1[textLength-length:] - found := strings.Index(text2, pattern) - if found == -1 { - break - } - length += found - if found == 0 || text1[textLength-length:] == text2[0:length] { - best = length - length++ - } - } - - return best -} - -// DiffHalfMatch checks whether the two texts share a substring which is at least half the length of the longer text. This speedup can produce non-minimal diffs. -func (dmp *DiffMatchPatch) DiffHalfMatch(text1, text2 string) []string { - // Unused in this code, but retained for interface compatibility. - runeSlices := dmp.diffHalfMatch([]rune(text1), []rune(text2)) - if runeSlices == nil { - return nil - } - - result := make([]string, len(runeSlices)) - for i, r := range runeSlices { - result[i] = string(r) - } - return result -} - -func (dmp *DiffMatchPatch) diffHalfMatch(text1, text2 []rune) [][]rune { - if dmp.DiffTimeout <= 0 { - // Don't risk returning a non-optimal diff if we have unlimited time. - return nil - } - - var longtext, shorttext []rune - if len(text1) > len(text2) { - longtext = text1 - shorttext = text2 - } else { - longtext = text2 - shorttext = text1 - } - - if len(longtext) < 4 || len(shorttext)*2 < len(longtext) { - return nil // Pointless. - } - - // First check if the second quarter is the seed for a half-match. - hm1 := dmp.diffHalfMatchI(longtext, shorttext, int(float64(len(longtext)+3)/4)) - - // Check again based on the third quarter. - hm2 := dmp.diffHalfMatchI(longtext, shorttext, int(float64(len(longtext)+1)/2)) - - var hm [][]rune - switch { - case hm1 == nil && hm2 == nil: - return nil - case hm2 == nil: - hm = hm1 - case hm1 == nil: - hm = hm2 - default: - // Both matched. Select the longest. - if len(hm1[4]) > len(hm2[4]) { - hm = hm1 - } else { - hm = hm2 - } - } - - // A half-match was found, sort out the return data. - if len(text1) > len(text2) { - return hm - } - - return [][]rune{hm[2], hm[3], hm[0], hm[1], hm[4]} -} - -// diffHalfMatchI checks if a substring of shorttext exist within longtext such that the substring is at least half the length of longtext? -// Returns a slice containing the prefix of longtext, the suffix of longtext, the prefix of shorttext, the suffix of shorttext and the common middle, or null if there was no match. -func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune { - var bestCommonA []rune - var bestCommonB []rune - var bestCommonLen int - var bestLongtextA []rune - var bestLongtextB []rune - var bestShorttextA []rune - var bestShorttextB []rune - - // Start with a 1/4 length substring at position i as a seed. - seed := l[i : i+len(l)/4] - - for j := runesIndexOf(s, seed, 0); j != -1; j = runesIndexOf(s, seed, j+1) { - prefixLength := commonPrefixLength(l[i:], s[j:]) - suffixLength := commonSuffixLength(l[:i], s[:j]) - - if bestCommonLen < suffixLength+prefixLength { - bestCommonA = s[j-suffixLength : j] - bestCommonB = s[j : j+prefixLength] - bestCommonLen = len(bestCommonA) + len(bestCommonB) - bestLongtextA = l[:i-suffixLength] - bestLongtextB = l[i+prefixLength:] - bestShorttextA = s[:j-suffixLength] - bestShorttextB = s[j+prefixLength:] - } - } - - if bestCommonLen*2 < len(l) { - return nil - } - - return [][]rune{ - bestLongtextA, - bestLongtextB, - bestShorttextA, - bestShorttextB, - append(bestCommonA, bestCommonB...), - } -} - -// DiffCleanupSemantic reduces the number of edits by eliminating semantically trivial equalities. -func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { - changes := false - // Stack of indices where equalities are found. - equalities := make([]int, 0, len(diffs)) - - var lastequality string - // Always equal to diffs[equalities[equalitiesLength - 1]][1] - var pointer int // Index of current position. - // Number of characters that changed prior to the equality. - var lengthInsertions1, lengthDeletions1 int - // Number of characters that changed after the equality. - var lengthInsertions2, lengthDeletions2 int - - for pointer < len(diffs) { - if diffs[pointer].Type == DiffEqual { - // Equality found. - equalities = append(equalities, pointer) - lengthInsertions1 = lengthInsertions2 - lengthDeletions1 = lengthDeletions2 - lengthInsertions2 = 0 - lengthDeletions2 = 0 - lastequality = diffs[pointer].Text - } else { - // An insertion or deletion. - - if diffs[pointer].Type == DiffInsert { - lengthInsertions2 += utf8.RuneCountInString(diffs[pointer].Text) - } else { - lengthDeletions2 += utf8.RuneCountInString(diffs[pointer].Text) - } - // Eliminate an equality that is smaller or equal to the edits on both sides of it. - difference1 := int(math.Max(float64(lengthInsertions1), float64(lengthDeletions1))) - difference2 := int(math.Max(float64(lengthInsertions2), float64(lengthDeletions2))) - if utf8.RuneCountInString(lastequality) > 0 && - (utf8.RuneCountInString(lastequality) <= difference1) && - (utf8.RuneCountInString(lastequality) <= difference2) { - // Duplicate record. - insPoint := equalities[len(equalities)-1] - diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality}) - - // Change second copy to insert. - diffs[insPoint+1].Type = DiffInsert - // Throw away the equality we just deleted. - equalities = equalities[:len(equalities)-1] - - if len(equalities) > 0 { - equalities = equalities[:len(equalities)-1] - } - pointer = -1 - if len(equalities) > 0 { - pointer = equalities[len(equalities)-1] - } - - lengthInsertions1 = 0 // Reset the counters. - lengthDeletions1 = 0 - lengthInsertions2 = 0 - lengthDeletions2 = 0 - lastequality = "" - changes = true - } - } - pointer++ - } - - // Normalize the diff. - if changes { - diffs = dmp.DiffCleanupMerge(diffs) - } - diffs = dmp.DiffCleanupSemanticLossless(diffs) - // Find any overlaps between deletions and insertions. - // e.g: abcxxxxxxdef - // -> abcxxxdef - // e.g: xxxabcdefxxx - // -> defxxxabc - // Only extract an overlap if it is as big as the edit ahead or behind it. - pointer = 1 - for pointer < len(diffs) { - if diffs[pointer-1].Type == DiffDelete && - diffs[pointer].Type == DiffInsert { - deletion := diffs[pointer-1].Text - insertion := diffs[pointer].Text - overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion) - overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion) - if overlapLength1 >= overlapLength2 { - if float64(overlapLength1) >= float64(utf8.RuneCountInString(deletion))/2 || - float64(overlapLength1) >= float64(utf8.RuneCountInString(insertion))/2 { - - // Overlap found. Insert an equality and trim the surrounding edits. - diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]}) - diffs[pointer-1].Text = - deletion[0 : len(deletion)-overlapLength1] - diffs[pointer+1].Text = insertion[overlapLength1:] - pointer++ - } - } else { - if float64(overlapLength2) >= float64(utf8.RuneCountInString(deletion))/2 || - float64(overlapLength2) >= float64(utf8.RuneCountInString(insertion))/2 { - // Reverse overlap found. Insert an equality and swap and trim the surrounding edits. - overlap := Diff{DiffEqual, deletion[:overlapLength2]} - diffs = splice(diffs, pointer, 0, overlap) - diffs[pointer-1].Type = DiffInsert - diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2] - diffs[pointer+1].Type = DiffDelete - diffs[pointer+1].Text = deletion[overlapLength2:] - pointer++ - } - } - pointer++ - } - pointer++ - } - - return diffs -} - -// Define some regex patterns for matching boundaries. -var ( - nonAlphaNumericRegex = regexp.MustCompile(`[^a-zA-Z0-9]`) - whitespaceRegex = regexp.MustCompile(`\s`) - linebreakRegex = regexp.MustCompile(`[\r\n]`) - blanklineEndRegex = regexp.MustCompile(`\n\r?\n$`) - //blanklineStartRegex = regexp.MustCompile(`^\r?\n\r?\n`) -) - -// diffCleanupSemanticScore computes a score representing whether the internal boundary falls on logical boundaries. -// Scores range from 6 (best) to 0 (worst). Closure, but does not reference any external variables. -func diffCleanupSemanticScore(one, two string) int { - if len(one) == 0 || len(two) == 0 { - // Edges are the best. - return 6 - } - - // Each port of this function behaves slightly differently due to subtle differences in each language's definition of things like 'whitespace'. Since this function's purpose is largely cosmetic, the choice has been made to use each language's native features rather than force total conformity. - rune1, _ := utf8.DecodeLastRuneInString(one) - rune2, _ := utf8.DecodeRuneInString(two) - char1 := string(rune1) - char2 := string(rune2) - - nonAlphaNumeric1 := nonAlphaNumericRegex.MatchString(char1) - nonAlphaNumeric2 := nonAlphaNumericRegex.MatchString(char2) - whitespace1 := nonAlphaNumeric1 && whitespaceRegex.MatchString(char1) - whitespace2 := nonAlphaNumeric2 && whitespaceRegex.MatchString(char2) - lineBreak1 := whitespace1 && linebreakRegex.MatchString(char1) - lineBreak2 := whitespace2 && linebreakRegex.MatchString(char2) - blankLine1 := lineBreak1 && blanklineEndRegex.MatchString(one) - blankLine2 := lineBreak2 && blanklineEndRegex.MatchString(two) - - if blankLine1 || blankLine2 { - // Five points for blank lines. - return 5 - } else if lineBreak1 || lineBreak2 { - // Four points for line breaks. - return 4 - } else if nonAlphaNumeric1 && !whitespace1 && whitespace2 { - // Three points for end of sentences. - return 3 - } else if whitespace1 || whitespace2 { - // Two points for whitespace. - return 2 - } else if nonAlphaNumeric1 || nonAlphaNumeric2 { - // One point for non-alphanumeric. - return 1 - } - return 0 -} - -// DiffCleanupSemanticLossless looks for single edits surrounded on both sides by equalities which can be shifted sideways to align the edit to a word boundary. -// E.g: The cat came. -> The cat came. -func (dmp *DiffMatchPatch) DiffCleanupSemanticLossless(diffs []Diff) []Diff { - pointer := 1 - - // Intentionally ignore the first and last element (don't need checking). - for pointer < len(diffs)-1 { - if diffs[pointer-1].Type == DiffEqual && - diffs[pointer+1].Type == DiffEqual { - - // This is a single edit surrounded by equalities. - equality1 := diffs[pointer-1].Text - edit := diffs[pointer].Text - equality2 := diffs[pointer+1].Text - - // First, shift the edit as far left as possible. - commonOffset := dmp.DiffCommonSuffix(equality1, edit) - if commonOffset > 0 { - commonString := edit[len(edit)-commonOffset:] - equality1 = equality1[0 : len(equality1)-commonOffset] - edit = commonString + edit[:len(edit)-commonOffset] - equality2 = commonString + equality2 - } - - // Second, step character by character right, looking for the best fit. - bestEquality1 := equality1 - bestEdit := edit - bestEquality2 := equality2 - bestScore := diffCleanupSemanticScore(equality1, edit) + - diffCleanupSemanticScore(edit, equality2) - - for len(edit) != 0 && len(equality2) != 0 { - _, sz := utf8.DecodeRuneInString(edit) - if len(equality2) < sz || edit[:sz] != equality2[:sz] { - break - } - equality1 += edit[:sz] - edit = edit[sz:] + equality2[:sz] - equality2 = equality2[sz:] - score := diffCleanupSemanticScore(equality1, edit) + - diffCleanupSemanticScore(edit, equality2) - // The >= encourages trailing rather than leading whitespace on edits. - if score >= bestScore { - bestScore = score - bestEquality1 = equality1 - bestEdit = edit - bestEquality2 = equality2 - } - } - - if diffs[pointer-1].Text != bestEquality1 { - // We have an improvement, save it back to the diff. - if len(bestEquality1) != 0 { - diffs[pointer-1].Text = bestEquality1 - } else { - diffs = splice(diffs, pointer-1, 1) - pointer-- - } - - diffs[pointer].Text = bestEdit - if len(bestEquality2) != 0 { - diffs[pointer+1].Text = bestEquality2 - } else { - diffs = append(diffs[:pointer+1], diffs[pointer+2:]...) - pointer-- - } - } - } - pointer++ - } - - return diffs -} - -// DiffCleanupEfficiency reduces the number of edits by eliminating operationally trivial equalities. -func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff { - changes := false - // Stack of indices where equalities are found. - type equality struct { - data int - next *equality - } - var equalities *equality - // Always equal to equalities[equalitiesLength-1][1] - lastequality := "" - pointer := 0 // Index of current position. - // Is there an insertion operation before the last equality. - preIns := false - // Is there a deletion operation before the last equality. - preDel := false - // Is there an insertion operation after the last equality. - postIns := false - // Is there a deletion operation after the last equality. - postDel := false - for pointer < len(diffs) { - if diffs[pointer].Type == DiffEqual { // Equality found. - if len(diffs[pointer].Text) < dmp.DiffEditCost && - (postIns || postDel) { - // Candidate found. - equalities = &equality{ - data: pointer, - next: equalities, - } - preIns = postIns - preDel = postDel - lastequality = diffs[pointer].Text - } else { - // Not a candidate, and can never become one. - equalities = nil - lastequality = "" - } - postIns = false - postDel = false - } else { // An insertion or deletion. - if diffs[pointer].Type == DiffDelete { - postDel = true - } else { - postIns = true - } - - // Five types to be split: - // ABXYCD - // AXCD - // ABXC - // AXCD - // ABXC - var sumPres int - if preIns { - sumPres++ - } - if preDel { - sumPres++ - } - if postIns { - sumPres++ - } - if postDel { - sumPres++ - } - if len(lastequality) > 0 && - ((preIns && preDel && postIns && postDel) || - ((len(lastequality) < dmp.DiffEditCost/2) && sumPres == 3)) { - - insPoint := equalities.data - - // Duplicate record. - diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality}) - - // Change second copy to insert. - diffs[insPoint+1].Type = DiffInsert - // Throw away the equality we just deleted. - equalities = equalities.next - lastequality = "" - - if preIns && preDel { - // No changes made which could affect previous entry, keep going. - postIns = true - postDel = true - equalities = nil - } else { - if equalities != nil { - equalities = equalities.next - } - if equalities != nil { - pointer = equalities.data - } else { - pointer = -1 - } - postIns = false - postDel = false - } - changes = true - } - } - pointer++ - } - - if changes { - diffs = dmp.DiffCleanupMerge(diffs) - } - - return diffs -} - -// DiffCleanupMerge reorders and merges like edit sections. Merge equalities. -// Any edit section can move as long as it doesn't cross an equality. -func (dmp *DiffMatchPatch) DiffCleanupMerge(diffs []Diff) []Diff { - // Add a dummy entry at the end. - diffs = append(diffs, Diff{DiffEqual, ""}) - pointer := 0 - countDelete := 0 - countInsert := 0 - commonlength := 0 - textDelete := []rune(nil) - textInsert := []rune(nil) - - for pointer < len(diffs) { - switch diffs[pointer].Type { - case DiffInsert: - countInsert++ - textInsert = append(textInsert, []rune(diffs[pointer].Text)...) - pointer++ - case DiffDelete: - countDelete++ - textDelete = append(textDelete, []rune(diffs[pointer].Text)...) - pointer++ - case DiffEqual: - // Upon reaching an equality, check for prior redundancies. - if countDelete+countInsert > 1 { - if countDelete != 0 && countInsert != 0 { - // Factor out any common prefixies. - commonlength = commonPrefixLength(textInsert, textDelete) - if commonlength != 0 { - x := pointer - countDelete - countInsert - if x > 0 && diffs[x-1].Type == DiffEqual { - diffs[x-1].Text += string(textInsert[:commonlength]) - } else { - diffs = append([]Diff{{DiffEqual, string(textInsert[:commonlength])}}, diffs...) - pointer++ - } - textInsert = textInsert[commonlength:] - textDelete = textDelete[commonlength:] - } - // Factor out any common suffixies. - commonlength = commonSuffixLength(textInsert, textDelete) - if commonlength != 0 { - insertIndex := len(textInsert) - commonlength - deleteIndex := len(textDelete) - commonlength - diffs[pointer].Text = string(textInsert[insertIndex:]) + diffs[pointer].Text - textInsert = textInsert[:insertIndex] - textDelete = textDelete[:deleteIndex] - } - } - // Delete the offending records and add the merged ones. - if countDelete == 0 { - diffs = splice(diffs, pointer-countInsert, - countDelete+countInsert, - Diff{DiffInsert, string(textInsert)}) - } else if countInsert == 0 { - diffs = splice(diffs, pointer-countDelete, - countDelete+countInsert, - Diff{DiffDelete, string(textDelete)}) - } else { - diffs = splice(diffs, pointer-countDelete-countInsert, - countDelete+countInsert, - Diff{DiffDelete, string(textDelete)}, - Diff{DiffInsert, string(textInsert)}) - } - - pointer = pointer - countDelete - countInsert + 1 - if countDelete != 0 { - pointer++ - } - if countInsert != 0 { - pointer++ - } - } else if pointer != 0 && diffs[pointer-1].Type == DiffEqual { - // Merge this equality with the previous one. - diffs[pointer-1].Text += diffs[pointer].Text - diffs = append(diffs[:pointer], diffs[pointer+1:]...) - } else { - pointer++ - } - countInsert = 0 - countDelete = 0 - textDelete = nil - textInsert = nil - } - } - - if len(diffs[len(diffs)-1].Text) == 0 { - diffs = diffs[0 : len(diffs)-1] // Remove the dummy entry at the end. - } - - // Second pass: look for single edits surrounded on both sides by equalities which can be shifted sideways to eliminate an equality. E.g: ABAC -> ABAC - changes := false - pointer = 1 - // Intentionally ignore the first and last element (don't need checking). - for pointer < (len(diffs) - 1) { - if diffs[pointer-1].Type == DiffEqual && - diffs[pointer+1].Type == DiffEqual { - // This is a single edit surrounded by equalities. - if strings.HasSuffix(diffs[pointer].Text, diffs[pointer-1].Text) { - // Shift the edit over the previous equality. - diffs[pointer].Text = diffs[pointer-1].Text + - diffs[pointer].Text[:len(diffs[pointer].Text)-len(diffs[pointer-1].Text)] - diffs[pointer+1].Text = diffs[pointer-1].Text + diffs[pointer+1].Text - diffs = splice(diffs, pointer-1, 1) - changes = true - } else if strings.HasPrefix(diffs[pointer].Text, diffs[pointer+1].Text) { - // Shift the edit over the next equality. - diffs[pointer-1].Text += diffs[pointer+1].Text - diffs[pointer].Text = - diffs[pointer].Text[len(diffs[pointer+1].Text):] + diffs[pointer+1].Text - diffs = splice(diffs, pointer+1, 1) - changes = true - } - } - pointer++ - } - - // If shifts were made, the diff needs reordering and another shift sweep. - if changes { - diffs = dmp.DiffCleanupMerge(diffs) - } - - return diffs -} - -// DiffXIndex returns the equivalent location in s2. -func (dmp *DiffMatchPatch) DiffXIndex(diffs []Diff, loc int) int { - chars1 := 0 - chars2 := 0 - lastChars1 := 0 - lastChars2 := 0 - lastDiff := Diff{} - for i := 0; i < len(diffs); i++ { - aDiff := diffs[i] - if aDiff.Type != DiffInsert { - // Equality or deletion. - chars1 += len(aDiff.Text) - } - if aDiff.Type != DiffDelete { - // Equality or insertion. - chars2 += len(aDiff.Text) - } - if chars1 > loc { - // Overshot the location. - lastDiff = aDiff - break - } - lastChars1 = chars1 - lastChars2 = chars2 - } - if lastDiff.Type == DiffDelete { - // The location was deleted. - return lastChars2 - } - // Add the remaining character length. - return lastChars2 + (loc - lastChars1) -} - -// DiffPrettyHtml converts a []Diff into a pretty HTML report. -// It is intended as an example from which to write one's own display functions. -func (dmp *DiffMatchPatch) DiffPrettyHtml(diffs []Diff) string { - var buff bytes.Buffer - for _, diff := range diffs { - text := strings.Replace(html.EscapeString(diff.Text), "\n", "¶
", -1) - switch diff.Type { - case DiffInsert: - _, _ = buff.WriteString("") - _, _ = buff.WriteString(text) - _, _ = buff.WriteString("") - case DiffDelete: - _, _ = buff.WriteString("") - _, _ = buff.WriteString(text) - _, _ = buff.WriteString("") - case DiffEqual: - _, _ = buff.WriteString("") - _, _ = buff.WriteString(text) - _, _ = buff.WriteString("") - } - } - return buff.String() -} - -// DiffPrettyText converts a []Diff into a colored text report. -func (dmp *DiffMatchPatch) DiffPrettyText(diffs []Diff) string { - var buff bytes.Buffer - for _, diff := range diffs { - text := diff.Text - - switch diff.Type { - case DiffInsert: - lines := strings.Split(text, "\n") - for i, line := range lines { - _, _ = buff.WriteString("\x1b[32m") - _, _ = buff.WriteString(line) - if i < len(lines)-1 { - _, _ = buff.WriteString("\x1b[0m\n") - } else { - _, _ = buff.WriteString("\x1b[0m") - } - } - case DiffDelete: - lines := strings.Split(text, "\n") - for i, line := range lines { - _, _ = buff.WriteString("\x1b[31m") - _, _ = buff.WriteString(line) - if i < len(lines)-1 { - _, _ = buff.WriteString("\x1b[0m\n") - } else { - _, _ = buff.WriteString("\x1b[0m") - } - } - case DiffEqual: - _, _ = buff.WriteString(text) - } - } - - return buff.String() -} - -// DiffText1 computes and returns the source text (all equalities and deletions). -func (dmp *DiffMatchPatch) DiffText1(diffs []Diff) string { - //StringBuilder text = new StringBuilder() - var text bytes.Buffer - - for _, aDiff := range diffs { - if aDiff.Type != DiffInsert { - _, _ = text.WriteString(aDiff.Text) - } - } - return text.String() -} - -// DiffText2 computes and returns the destination text (all equalities and insertions). -func (dmp *DiffMatchPatch) DiffText2(diffs []Diff) string { - var text bytes.Buffer - - for _, aDiff := range diffs { - if aDiff.Type != DiffDelete { - _, _ = text.WriteString(aDiff.Text) - } - } - return text.String() -} - -// DiffLevenshtein computes the Levenshtein distance that is the number of inserted, deleted or substituted characters. -func (dmp *DiffMatchPatch) DiffLevenshtein(diffs []Diff) int { - levenshtein := 0 - insertions := 0 - deletions := 0 - - for _, aDiff := range diffs { - switch aDiff.Type { - case DiffInsert: - insertions += utf8.RuneCountInString(aDiff.Text) - case DiffDelete: - deletions += utf8.RuneCountInString(aDiff.Text) - case DiffEqual: - // A deletion and an insertion is one substitution. - levenshtein += max(insertions, deletions) - insertions = 0 - deletions = 0 - } - } - - levenshtein += max(insertions, deletions) - return levenshtein -} - -// DiffToDelta crushes the diff into an encoded string which describes the operations required to transform text1 into text2. -// E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. Operations are tab-separated. Inserted text is escaped using %xx notation. -func (dmp *DiffMatchPatch) DiffToDelta(diffs []Diff) string { - var text bytes.Buffer - for _, aDiff := range diffs { - switch aDiff.Type { - case DiffInsert: - _, _ = text.WriteString("+") - _, _ = text.WriteString(strings.Replace(url.QueryEscape(aDiff.Text), "+", " ", -1)) - _, _ = text.WriteString("\t") - case DiffDelete: - _, _ = text.WriteString("-") - _, _ = text.WriteString(strconv.Itoa(utf8.RuneCountInString(aDiff.Text))) - _, _ = text.WriteString("\t") - case DiffEqual: - _, _ = text.WriteString("=") - _, _ = text.WriteString(strconv.Itoa(utf8.RuneCountInString(aDiff.Text))) - _, _ = text.WriteString("\t") - } - } - delta := text.String() - if len(delta) != 0 { - // Strip off trailing tab character. - delta = delta[0 : utf8.RuneCountInString(delta)-1] - delta = unescaper.Replace(delta) - } - return delta -} - -// DiffFromDelta given the original text1, and an encoded string which describes the operations required to transform text1 into text2, comAdde the full diff. -func (dmp *DiffMatchPatch) DiffFromDelta(text1 string, delta string) (diffs []Diff, err error) { - i := 0 - runes := []rune(text1) - - for _, token := range strings.Split(delta, "\t") { - if len(token) == 0 { - // Blank tokens are ok (from a trailing \t). - continue - } - - // Each token begins with a one character parameter which specifies the operation of this token (delete, insert, equality). - param := token[1:] - - switch op := token[0]; op { - case '+': - // Decode would Diff all "+" to " " - param = strings.Replace(param, "+", "%2b", -1) - param, err = url.QueryUnescape(param) - if err != nil { - return nil, err - } - if !utf8.ValidString(param) { - return nil, fmt.Errorf("invalid UTF-8 token: %q", param) - } - - diffs = append(diffs, Diff{DiffInsert, param}) - case '=', '-': - n, err := strconv.ParseInt(param, 10, 0) - if err != nil { - return nil, err - } else if n < 0 { - return nil, errors.New("Negative number in DiffFromDelta: " + param) - } - - i += int(n) - // Break out if we are out of bounds, go1.6 can't handle this very well - if i > len(runes) { - break - } - // Remember that string slicing is by byte - we want by rune here. - text := string(runes[i-int(n) : i]) - - if op == '=' { - diffs = append(diffs, Diff{DiffEqual, text}) - } else { - diffs = append(diffs, Diff{DiffDelete, text}) - } - default: - // Anything else is an error. - return nil, errors.New("Invalid diff operation in DiffFromDelta: " + string(token[0])) - } - } - - if i != len(runes) { - return nil, fmt.Errorf("delta length (%v) is different from source text length (%v)", i, len(text1)) - } - - return diffs, nil -} - -// diffLinesToStrings splits two texts into a list of strings. Each string represents one line. -func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, LineMap, error) { - lineMap := LineMap{} // e.g. lineMap[4] == 'Hello\n' - - lineHash := make(map[string]rune) - //Each string has the index of lineArray which it points to - strIndexArray1, err := dmp.diffLinesToRunesMunge(text1, lineMap, lineHash) - if err != nil { - return "", "", nil, err - } - strIndexArray2, err := dmp.diffLinesToRunesMunge(text2, lineMap, lineHash) - if err != nil { - return "", "", nil, err - } - return string(strIndexArray1), string(strIndexArray2), lineMap, nil -} - -// Code points in the surrogate range are not valid for UTF-8. -const ( - surrogateMin = 0xD800 - surrogateMax = 0xDFFF -) - -var ( - ErrTooManyUniqueLines = errors.New("too many unique lines") -) - -// diffLinesToRunesMunge splits a text into an array of strings, and reduces the texts to a LineMap. -func (dmp *DiffMatchPatch) diffLinesToRunesMunge(text string, lineMap LineMap, lineHash map[string]rune) ([]rune, error) { - // Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect. - lineStart := 0 - lineEnd := -1 - var strs []rune - - for lineEnd < len(text)-1 { - lineEnd = indexOf(text, "\n", lineStart) - - if lineEnd == -1 { - lineEnd = len(text) - 1 - } - - line := text[lineStart : lineEnd+1] - lineStart = lineEnd + 1 - lineValue, ok := lineHash[line] - - if ok { - strs = append(strs, lineValue) - continue - } - nextRune := rune(len(lineMap) + 1) - if nextRune >= surrogateMin { - // Skip invalid utf8 runes, if needed. - nextRune += surrogateMax - surrogateMin + 1 - } - if nextRune > utf8.MaxRune { - //panic("too many unique lines to use rune hashing") - return nil, ErrTooManyUniqueLines - } - lineMap[nextRune] = line - lineHash[line] = nextRune - strs = append(strs, nextRune) - } - - return strs, nil -} diff --git a/modules/diffmatchpatch/diffmatchpatch.go b/modules/diffmatchpatch/diffmatchpatch.go deleted file mode 100644 index d3acc32..0000000 --- a/modules/diffmatchpatch/diffmatchpatch.go +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2012-2016 The go-diff authors. All rights reserved. -// https://github.com/sergi/go-diff -// See the included LICENSE file for license details. -// -// go-diff is a Go implementation of Google's Diff, Match, and Patch library -// Original library is Copyright (c) 2006 Google Inc. -// http://code.google.com/p/google-diff-match-patch/ - -// Package diffmatchpatch offers robust algorithms to perform the operations required for synchronizing plain text. -package diffmatchpatch - -import ( - "time" -) - -// DiffMatchPatch holds the configuration for diff-match-patch operations. -type DiffMatchPatch struct { - // Number of seconds to map a diff before giving up (0 for infinity). - DiffTimeout time.Duration - // Cost of an empty edit operation in terms of edit characters. - DiffEditCost int - // How far to search for a match (0 = exact location, 1000+ = broad match). A match this many characters away from the expected location will add 1.0 to the score (0.0 is a perfect match). - MatchDistance int - // When deleting a large block of text (over ~64 characters), how close do the contents have to be to match the expected contents. (0.0 = perfection, 1.0 = very loose). Note that MatchThreshold controls how closely the end points of a delete need to match. - PatchDeleteThreshold float64 - // Chunk size for context length. - PatchMargin int - // The number of bits in an int. - MatchMaxBits int - // At what point is no match declared (0.0 = perfection, 1.0 = very loose). - MatchThreshold float64 -} - -// New creates a new DiffMatchPatch object with default parameters. -func New() *DiffMatchPatch { - // Defaults. - return &DiffMatchPatch{ - DiffTimeout: time.Second, - DiffEditCost: 4, - MatchThreshold: 0.5, - MatchDistance: 1000, - PatchDeleteThreshold: 0.5, - PatchMargin: 4, - MatchMaxBits: 32, - } -} diff --git a/modules/diffmatchpatch/match.go b/modules/diffmatchpatch/match.go deleted file mode 100644 index 02e328b..0000000 --- a/modules/diffmatchpatch/match.go +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright (c) 2012-2016 The go-diff authors. All rights reserved. -// https://github.com/sergi/go-diff -// See the included LICENSE file for license details. -// -// go-diff is a Go implementation of Google's Diff, Match, and Patch library -// Original library is Copyright (c) 2006 Google Inc. -// http://code.google.com/p/google-diff-match-patch/ - -package diffmatchpatch - -import ( - "math" -) - -// MatchMain locates the best instance of 'pattern' in 'text' near 'loc'. -// Returns -1 if no match found. -func (dmp *DiffMatchPatch) MatchMain(text, pattern string, loc int) int { - // Check for null inputs not needed since null can't be passed in C#. - - loc = int(math.Max(0, math.Min(float64(loc), float64(len(text))))) - if text == pattern { - // Shortcut (potentially not guaranteed by the algorithm) - return 0 - } - if len(text) == 0 { - // Nothing to match. - return -1 - } - if loc+len(pattern) <= len(text) && text[loc:loc+len(pattern)] == pattern { - // Perfect match at the perfect spot! (Includes case of null pattern) - return loc - } - // Do a fuzzy compare. - return dmp.MatchBitap(text, pattern, loc) -} - -// MatchBitap locates the best instance of 'pattern' in 'text' near 'loc' using the Bitap algorithm. -// Returns -1 if no match was found. -func (dmp *DiffMatchPatch) MatchBitap(text, pattern string, loc int) int { - // Initialise the alphabet. - s := dmp.MatchAlphabet(pattern) - - // Highest score beyond which we give up. - scoreThreshold := dmp.MatchThreshold - // Is there a nearby exact match? (speedup) - bestLoc := indexOf(text, pattern, loc) - if bestLoc != -1 { - scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc, - pattern), scoreThreshold) - // What about in the other direction? (speedup) - bestLoc = lastIndexOf(text, pattern, loc+len(pattern)) - if bestLoc != -1 { - scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc, - pattern), scoreThreshold) - } - } - - // Initialise the bit arrays. - matchmask := 1 << uint((len(pattern) - 1)) - bestLoc = -1 - - var binMin, binMid int - binMax := len(pattern) + len(text) - lastRd := []int{} - for d := 0; d < len(pattern); d++ { - // Scan for the best match; each iteration allows for one more error. Run a binary search to determine how far from 'loc' we can stray at this error level. - binMin = 0 - binMid = binMax - for binMin < binMid { - if dmp.matchBitapScore(d, loc+binMid, loc, pattern) <= scoreThreshold { - binMin = binMid - } else { - binMax = binMid - } - binMid = (binMax-binMin)/2 + binMin - } - // Use the result from this iteration as the maximum for the next. - binMax = binMid - start := int(math.Max(1, float64(loc-binMid+1))) - finish := int(math.Min(float64(loc+binMid), float64(len(text))) + float64(len(pattern))) - - rd := make([]int, finish+2) - rd[finish+1] = (1 << uint(d)) - 1 - - for j := finish; j >= start; j-- { - var charMatch int - if len(text) <= j-1 { - // Out of range. - charMatch = 0 - } else if _, ok := s[text[j-1]]; !ok { - charMatch = 0 - } else { - charMatch = s[text[j-1]] - } - - if d == 0 { - // First pass: exact match. - rd[j] = ((rd[j+1] << 1) | 1) & charMatch - } else { - // Subsequent passes: fuzzy match. - rd[j] = ((rd[j+1]<<1)|1)&charMatch | (((lastRd[j+1] | lastRd[j]) << 1) | 1) | lastRd[j+1] - } - if (rd[j] & matchmask) != 0 { - score := dmp.matchBitapScore(d, j-1, loc, pattern) - // This match will almost certainly be better than any existing match. But check anyway. - if score <= scoreThreshold { - // Told you so. - scoreThreshold = score - bestLoc = j - 1 - if bestLoc > loc { - // When passing loc, don't exceed our current distance from loc. - start = int(math.Max(1, float64(2*loc-bestLoc))) - } else { - // Already passed loc, downhill from here on in. - break - } - } - } - } - if dmp.matchBitapScore(d+1, loc, loc, pattern) > scoreThreshold { - // No hope for a (better) match at greater error levels. - break - } - lastRd = rd - } - return bestLoc -} - -// matchBitapScore computes and returns the score for a match with e errors and x location. -func (dmp *DiffMatchPatch) matchBitapScore(e, x, loc int, pattern string) float64 { - accuracy := float64(e) / float64(len(pattern)) - proximity := math.Abs(float64(loc - x)) - if dmp.MatchDistance == 0 { - // Dodge divide by zero error. - if proximity == 0 { - return accuracy - } - - return 1.0 - } - return accuracy + (proximity / float64(dmp.MatchDistance)) -} - -// MatchAlphabet initialises the alphabet for the Bitap algorithm. -func (dmp *DiffMatchPatch) MatchAlphabet(pattern string) map[byte]int { - s := map[byte]int{} - charPattern := []byte(pattern) - for _, c := range charPattern { - _, ok := s[c] - if !ok { - s[c] = 0 - } - } - i := 0 - - for _, c := range charPattern { - value := s[c] | int(uint(1)<>>>>>> B -// 6 -func (dmp *DiffMatchPatch) Merge(textO, textA, textB string, labelA, labelB string) (string, bool, error) { - m, err := dmp.MergeLinesToRunes(textO, textA, textB) - if err != nil { - return "", false, err - } - diffsA := dmp.DiffMainRunes(m.StrIndexArrayO, m.StrIndexArrayA, false) - diffsB := dmp.DiffMainRunes(m.StrIndexArrayO, m.StrIndexArrayB, false) - p1 := diffsToPair(diffsA) - p2 := diffsToPair(diffsB) - chunks := diffsPairToChunks(p1, p2) - var b strings.Builder - b.Grow(max(len(textA), len(textB))) // grow: reduce - mergeLine := func(i rune) { - if line, ok := m.Lines[i]; ok { - _, _ = b.WriteString(line) - } - } - var conflict bool - for _, chunk := range chunks { - // stable chunk, add lines to new file - if !chunk.conflict { - for _, i := range chunk.o { - mergeLine(i) - } - continue - } - // unstable chunk, add version A and version B to file - conflict = true - fmt.Fprintf(&b, "%s %s\n", Sep1, labelA) - for _, i := range chunk.a { - mergeLine(i) - } - fmt.Fprintf(&b, "%s\n", Sep2) - for _, i := range chunk.b { - mergeLine(i) - } - fmt.Fprintf(&b, "%s %s\n", Sep3, labelB) - } - return b.String(), conflict, nil -} - -type diffsPair struct { - unmodified []rune - modified []rune -} - -// change diff type from diff trunk to aligned pair -// example: -// diffs: equal [1,2]; insert [3]; equal [4,5] delete [6], equal [7] -// pair: origin: [1,2,-1,4,5,6,7]; modified: [1,2,3,4,5,-1,7] -func diffsToPair(diffs []Diff) *diffsPair { - p := &diffsPair{ - unmodified: make([]rune, 0, 100), - modified: make([]rune, 0, 100), - } - for _, d := range diffs { - text := []rune(d.Text) - switch d.Type { - case DiffEqual: - p.unmodified = append(p.unmodified, text...) - p.modified = append(p.modified, text...) - case DiffDelete: - p.unmodified = append(p.unmodified, text...) - for i := 0; i < len(text); i++ { - p.modified = append(p.modified, -1) - } - case DiffInsert: - p.modified = append(p.modified, text...) - for i := 0; i < len(text); i++ { - p.unmodified = append(p.unmodified, -1) - } - } - } - return p -} - -type mergeChunk struct { - o, a, b []rune - stable, conflict bool -} - -func newMergeChunk() *mergeChunk { - return &mergeChunk{ - o: make([]rune, 0, 32), - a: make([]rune, 0, 32), - b: make([]rune, 0, 32), - } -} - -func diffsPairToChunks(a, b *diffsPair) []*mergeChunk { - // indexA and indexB represent the current positions already traversed in A and B, respectively - // nextA and nextB represent the next position that a.o and b.o not empty(-1) - indexA := -1 - indexB := -1 - nextA := -1 - nextB := -1 - lenA := len(a.unmodified) - lenB := len(b.unmodified) - chunks := make([]*mergeChunk, 0) - chunk := newMergeChunk() - for indexA < lenA && indexB < lenB { - // update na - for i := indexA + 1; i < lenA; i++ { - if a.unmodified[i] != -1 { - nextA = i - break - } - } - // update nb - for i := indexB + 1; i < lenB; i++ { - if b.unmodified[i] != -1 { - nextB = i - break - } - } - - // that means the last common part has been traversed - if nextA == indexA { - // nothing left in pairs - if lenA == indexA+1 && lenB == indexB+1 { - // add chunk left - if len(chunk.o) != 0 { - chunks = append(chunks, chunk) - } - break - } - // left stuff must be unstable - if chunk.stable && len(chunk.o) != 0 { - chunks = append(chunks, chunk) - chunk = newMergeChunk() - chunk.stable = false - } - // unstable chunk, only append - chunk.a = append(chunk.a, a.modified[indexA+1:lenA]...) - chunk.b = append(chunk.b, b.modified[indexB+1:lenB]...) - chunks = append(chunks, chunk) - break - } - - // chunk is empty, init chunk - if len(chunk.o) == 0 { - // next index is adjacent, only delete possible, no insert. - // so only judge if o, a, b is equal - if nextA-indexA == 1 && nextB-indexB == 1 { - chunk.o = append(chunk.o, a.unmodified[nextA]) - if a.modified[nextA] != -1 { - chunk.a = append(chunk.a, a.unmodified[nextA]) - } - if b.modified[nextB] != -1 { - chunk.b = append(chunk.b, b.unmodified[nextB]) - } - // determine whether this chunk is a stable chunk or a unstable chunk - chunk.stable = a.modified[nextA] != -1 && b.modified[nextB] != -1 - indexA = nextA - indexB = nextB - continue - } - // na or nb not adjacent, so it should be a unstable chunk - chunk.stable = false - // first add insert part(index between ia-na & ib-nb) to unstable chunk - chunk.a = append(chunk.a, a.modified[indexA+1:nextA]...) - chunk.b = append(chunk.b, b.modified[indexB+1:nextB]...) - // if next origin is unstable, add it to chunk and finish - if a.modified[nextA] == -1 || b.modified[nextB] == -1 { - chunk.o = append(chunk.o, a.unmodified[nextA]) - if a.modified[nextA] != -1 { - chunk.a = append(chunk.a, a.unmodified[nextA]) - } - if b.modified[nextB] != -1 { - chunk.b = append(chunk.b, b.unmodified[nextB]) - } - indexA = nextA - indexB = nextB - continue - } - // next origin is stable, that means o = a = b - // curernt unstable chunk should be closed, and a new chunk should be created - chunks = append(chunks, chunk) - chunk = newMergeChunk() - chunk.o = append(chunk.o, a.unmodified[nextA]) - chunk.a = append(chunk.a, a.unmodified[nextA]) - chunk.b = append(chunk.b, b.unmodified[nextB]) - chunk.stable = true - indexA = nextA - indexB = nextB - continue - } - - // chunk is not empty, determine increase chunk or close & create chunk - // next index is adjacent, only delete possible, no insert. - // so only judge if o, a, b is equal - if nextA-indexA == 1 && nextB-indexB == 1 { - // o = a = b, stable index - if a.modified[nextA] != -1 && b.modified[nextB] != -1 { - // unstable chunk, close and create a new stable chunk - if !chunk.stable { - chunks = append(chunks, chunk) - chunk = newMergeChunk() - chunk.stable = true - } - // stable chunk, only append - chunk.o = append(chunk.o, a.unmodified[nextA]) - chunk.a = append(chunk.a, a.unmodified[nextA]) - chunk.b = append(chunk.b, b.unmodified[nextB]) - indexA = nextA - indexB = nextB - continue - } - // unstable index - // stable chunk, close and create a new unstable chunk - if chunk.stable { - chunks = append(chunks, chunk) - chunk = newMergeChunk() - chunk.stable = false - } - // unstable chunk, only append - chunk.o = append(chunk.o, a.unmodified[nextA]) - if a.modified[nextA] != -1 { - chunk.a = append(chunk.a, a.unmodified[nextA]) - } - if b.modified[nextB] != -1 { - chunk.b = append(chunk.b, b.unmodified[nextB]) - } - indexA = nextA - indexB = nextB - continue - } - - // na or nb not adjacent, so it should be a unstable chunk - // stable chunk, close and create a new unstable chunk - if chunk.stable { - chunks = append(chunks, chunk) - chunk = newMergeChunk() - chunk.stable = false - } - // first add insert part(index between ia-na & ib-nb) to unstable chunk - chunk.a = append(chunk.a, a.modified[indexA+1:nextA]...) - chunk.b = append(chunk.b, b.modified[indexB+1:nextB]...) - // if next origin is unstable, add it to chunk and finish - if a.modified[nextA] == -1 || b.modified[nextB] == -1 { - chunk.o = append(chunk.o, a.unmodified[nextA]) - if a.modified[nextA] != -1 { - chunk.a = append(chunk.a, a.unmodified[nextA]) - } - if b.modified[nextB] != -1 { - chunk.b = append(chunk.b, b.unmodified[nextB]) - } - indexA = nextA - indexB = nextB - continue - } - // next origin is stable, that means o = a = b - // curernt unstable chunk should be closed, and a new chunk should be created - chunks = append(chunks, chunk) - chunk = newMergeChunk() - chunk.o = append(chunk.o, a.unmodified[nextA]) - chunk.a = append(chunk.a, a.unmodified[nextA]) - chunk.b = append(chunk.b, b.unmodified[nextB]) - chunk.stable = true - indexA = nextA - indexB = nextB - continue - } - - for _, chunk := range chunks { - chunk.propagate() - } - return chunks -} - -// examine what has changed in each chunk -// decide what changes can be propagated -// if A & B both different from O and A and B not equal themself, there will be conflict -func (c *mergeChunk) propagate() { - // stable trunk doesn't need propagate - if c.stable { - return - } - switch { - case slices.Equal(c.a, c.b): - c.o = c.a - case slices.Equal(c.o, c.a): - c.o = c.b - c.a = c.b - case slices.Equal(c.o, c.b): - c.o = c.a - c.b = c.a - default: - c.conflict = true - } -} - -// Merge: Built-in text merging implementation. -// TODO: ignore CRLF --> LF ??? -func Merge(ctx context.Context, o, a, b string, labelO, labelA, labelB string) (string, bool, error) { - e := New() - e.DiffTimeout = time.Hour - return e.Merge(o, a, b, labelA, labelB) -} diff --git a/modules/diffmatchpatch/operation_string.go b/modules/diffmatchpatch/operation_string.go deleted file mode 100644 index 533ec0d..0000000 --- a/modules/diffmatchpatch/operation_string.go +++ /dev/null @@ -1,17 +0,0 @@ -// Code generated by "stringer -type=Operation -trimprefix=Diff"; DO NOT EDIT. - -package diffmatchpatch - -import "fmt" - -const _Operation_name = "DeleteEqualInsert" - -var _Operation_index = [...]uint8{0, 6, 11, 17} - -func (i Operation) String() string { - i -= -1 - if i < 0 || i >= Operation(len(_Operation_index)-1) { - return fmt.Sprintf("Operation(%d)", i+-1) - } - return _Operation_name[_Operation_index[i]:_Operation_index[i+1]] -} diff --git a/modules/diffmatchpatch/patch.go b/modules/diffmatchpatch/patch.go deleted file mode 100644 index 94f76e6..0000000 --- a/modules/diffmatchpatch/patch.go +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright (c) 2012-2016 The go-diff authors. All rights reserved. -// https://github.com/sergi/go-diff -// See the included LICENSE file for license details. -// -// go-diff is a Go implementation of Google's Diff, Match, and Patch library -// Original library is Copyright (c) 2006 Google Inc. -// http://code.google.com/p/google-diff-match-patch/ - -package diffmatchpatch - -import ( - "bytes" - "errors" - "math" - "net/url" - "regexp" - "strconv" - "strings" -) - -// Patch represents one patch operation. -type Patch struct { - diffs []Diff - Start1 int - Start2 int - Length1 int - Length2 int -} - -// String emulates GNU diff's format. -// Header: @@ -382,8 +481,9 @@ -// Indices are printed as 1-based, not 0-based. -func (p *Patch) String() string { - var coords1, coords2 string - - if p.Length1 == 0 { - coords1 = strconv.Itoa(p.Start1) + ",0" - } else if p.Length1 == 1 { - coords1 = strconv.Itoa(p.Start1 + 1) - } else { - coords1 = strconv.Itoa(p.Start1+1) + "," + strconv.Itoa(p.Length1) - } - - if p.Length2 == 0 { - coords2 = strconv.Itoa(p.Start2) + ",0" - } else if p.Length2 == 1 { - coords2 = strconv.Itoa(p.Start2 + 1) - } else { - coords2 = strconv.Itoa(p.Start2+1) + "," + strconv.Itoa(p.Length2) - } - - var text bytes.Buffer - _, _ = text.WriteString("@@ -" + coords1 + " +" + coords2 + " @@\n") - - // Escape the body of the patch with %xx notation. - for _, aDiff := range p.diffs { - switch aDiff.Type { - case DiffInsert: - _, _ = text.WriteString("+") - case DiffDelete: - _, _ = text.WriteString("-") - case DiffEqual: - _, _ = text.WriteString(" ") - } - - _, _ = text.WriteString(strings.Replace(url.QueryEscape(aDiff.Text), "+", " ", -1)) - _, _ = text.WriteString("\n") - } - - return unescaper.Replace(text.String()) -} - -// PatchAddContext increases the context until it is unique, but doesn't let the pattern expand beyond MatchMaxBits. -func (dmp *DiffMatchPatch) PatchAddContext(patch Patch, text string) Patch { - if len(text) == 0 { - return patch - } - - pattern := text[patch.Start2 : patch.Start2+patch.Length1] - padding := 0 - - // Look for the first and last matches of pattern in text. If two different matches are found, increase the pattern length. - for strings.Index(text, pattern) != strings.LastIndex(text, pattern) && - len(pattern) < dmp.MatchMaxBits-2*dmp.PatchMargin { - padding += dmp.PatchMargin - maxStart := max(0, patch.Start2-padding) - minEnd := min(len(text), patch.Start2+patch.Length1+padding) - pattern = text[maxStart:minEnd] - } - // Add one chunk for good luck. - padding += dmp.PatchMargin - - // Add the prefix. - prefix := text[max(0, patch.Start2-padding):patch.Start2] - if len(prefix) != 0 { - patch.diffs = append([]Diff{{DiffEqual, prefix}}, patch.diffs...) - } - // Add the suffix. - suffix := text[patch.Start2+patch.Length1 : min(len(text), patch.Start2+patch.Length1+padding)] - if len(suffix) != 0 { - patch.diffs = append(patch.diffs, Diff{DiffEqual, suffix}) - } - - // Roll back the start points. - patch.Start1 -= len(prefix) - patch.Start2 -= len(prefix) - // Extend the lengths. - patch.Length1 += len(prefix) + len(suffix) - patch.Length2 += len(prefix) + len(suffix) - - return patch -} - -// PatchMake computes a list of patches. -func (dmp *DiffMatchPatch) PatchMake(opt ...any) []Patch { - if len(opt) == 1 { - diffs, _ := opt[0].([]Diff) - text1 := dmp.DiffText1(diffs) - return dmp.PatchMake(text1, diffs) - } else if len(opt) == 2 { - text1 := opt[0].(string) - switch t := opt[1].(type) { - case string: - diffs := dmp.DiffMain(text1, t, true) - if len(diffs) > 2 { - diffs = dmp.DiffCleanupSemantic(diffs) - diffs = dmp.DiffCleanupEfficiency(diffs) - } - return dmp.PatchMake(text1, diffs) - case []Diff: - return dmp.patchMake2(text1, t) - } - } else if len(opt) == 3 { - return dmp.PatchMake(opt[0], opt[2]) - } - return []Patch{} -} - -// patchMake2 computes a list of patches to turn text1 into text2. -// text2 is not provided, diffs are the delta between text1 and text2. -func (dmp *DiffMatchPatch) patchMake2(text1 string, diffs []Diff) []Patch { - // Check for null inputs not needed since null can't be passed in C#. - patches := []Patch{} - if len(diffs) == 0 { - return patches // Get rid of the null case. - } - - patch := Patch{} - charCount1 := 0 // Number of characters into the text1 string. - charCount2 := 0 // Number of characters into the text2 string. - // Start with text1 (prepatchText) and apply the diffs until we arrive at text2 (postpatchText). We recreate the patches one by one to determine context info. - prepatchText := text1 - postpatchText := text1 - - for i, aDiff := range diffs { - if len(patch.diffs) == 0 && aDiff.Type != DiffEqual { - // A new patch starts here. - patch.Start1 = charCount1 - patch.Start2 = charCount2 - } - - switch aDiff.Type { - case DiffInsert: - patch.diffs = append(patch.diffs, aDiff) - patch.Length2 += len(aDiff.Text) - postpatchText = postpatchText[:charCount2] + - aDiff.Text + postpatchText[charCount2:] - case DiffDelete: - patch.Length1 += len(aDiff.Text) - patch.diffs = append(patch.diffs, aDiff) - postpatchText = postpatchText[:charCount2] + postpatchText[charCount2+len(aDiff.Text):] - case DiffEqual: - if len(aDiff.Text) <= 2*dmp.PatchMargin && - len(patch.diffs) != 0 && i != len(diffs)-1 { - // Small equality inside a patch. - patch.diffs = append(patch.diffs, aDiff) - patch.Length1 += len(aDiff.Text) - patch.Length2 += len(aDiff.Text) - } - if len(aDiff.Text) >= 2*dmp.PatchMargin { - // Time for a new patch. - if len(patch.diffs) != 0 { - patch = dmp.PatchAddContext(patch, prepatchText) - patches = append(patches, patch) - patch = Patch{} - // Unlike Unidiff, our patch lists have a rolling context. http://code.google.com/p/google-diff-match-patch/wiki/Unidiff Update prepatch text & pos to reflect the application of the just completed patch. - prepatchText = postpatchText - charCount1 = charCount2 - } - } - } - - // Update the current character count. - if aDiff.Type != DiffInsert { - charCount1 += len(aDiff.Text) - } - if aDiff.Type != DiffDelete { - charCount2 += len(aDiff.Text) - } - } - - // Pick up the leftover patch if not empty. - if len(patch.diffs) != 0 { - patch = dmp.PatchAddContext(patch, prepatchText) - patches = append(patches, patch) - } - - return patches -} - -// PatchDeepCopy returns an array that is identical to a given an array of patches. -func (dmp *DiffMatchPatch) PatchDeepCopy(patches []Patch) []Patch { - patchesCopy := []Patch{} - for _, aPatch := range patches { - patchCopy := Patch{} - for _, aDiff := range aPatch.diffs { - patchCopy.diffs = append(patchCopy.diffs, Diff{ - aDiff.Type, - aDiff.Text, - }) - } - patchCopy.Start1 = aPatch.Start1 - patchCopy.Start2 = aPatch.Start2 - patchCopy.Length1 = aPatch.Length1 - patchCopy.Length2 = aPatch.Length2 - patchesCopy = append(patchesCopy, patchCopy) - } - return patchesCopy -} - -// PatchApply merges a set of patches onto the text. Returns a patched text, as well as an array of true/false values indicating which patches were applied. -func (dmp *DiffMatchPatch) PatchApply(patches []Patch, text string) (string, []bool) { - if len(patches) == 0 { - return text, []bool{} - } - - // Deep copy the patches so that no changes are made to originals. - patches = dmp.PatchDeepCopy(patches) - - nullPadding := dmp.PatchAddPadding(patches) - text = nullPadding + text + nullPadding - patches = dmp.PatchSplitMax(patches) - - x := 0 - // delta keeps track of the offset between the expected and actual location of the previous patch. If there are patches expected at positions 10 and 20, but the first patch was found at 12, delta is 2 and the second patch has an effective expected position of 22. - delta := 0 - results := make([]bool, len(patches)) - for _, aPatch := range patches { - expectedLoc := aPatch.Start2 + delta - text1 := dmp.DiffText1(aPatch.diffs) - var startLoc int - endLoc := -1 - if len(text1) > dmp.MatchMaxBits { - // PatchSplitMax will only provide an oversized pattern in the case of a monster delete. - startLoc = dmp.MatchMain(text, text1[:dmp.MatchMaxBits], expectedLoc) - if startLoc != -1 { - endLoc = dmp.MatchMain(text, - text1[len(text1)-dmp.MatchMaxBits:], expectedLoc+len(text1)-dmp.MatchMaxBits) - if endLoc == -1 || startLoc >= endLoc { - // Can't find valid trailing context. Drop this patch. - startLoc = -1 - } - } - } else { - startLoc = dmp.MatchMain(text, text1, expectedLoc) - } - if startLoc == -1 { - // No match found. :( - results[x] = false - // Subtract the delta for this failed patch from subsequent patches. - delta -= aPatch.Length2 - aPatch.Length1 - } else { - // Found a match. :) - results[x] = true - delta = startLoc - expectedLoc - var text2 string - if endLoc == -1 { - text2 = text[startLoc:int(math.Min(float64(startLoc+len(text1)), float64(len(text))))] - } else { - text2 = text[startLoc:int(math.Min(float64(endLoc+dmp.MatchMaxBits), float64(len(text))))] - } - if text1 == text2 { - // Perfect match, just shove the Replacement text in. - text = text[:startLoc] + dmp.DiffText2(aPatch.diffs) + text[startLoc+len(text1):] - } else { - // Imperfect match. Run a diff to get a framework of equivalent indices. - diffs := dmp.DiffMain(text1, text2, false) - if len(text1) > dmp.MatchMaxBits && float64(dmp.DiffLevenshtein(diffs))/float64(len(text1)) > dmp.PatchDeleteThreshold { - // The end points match, but the content is unacceptably bad. - results[x] = false - } else { - diffs = dmp.DiffCleanupSemanticLossless(diffs) - index1 := 0 - for _, aDiff := range aPatch.diffs { - if aDiff.Type != DiffEqual { - index2 := dmp.DiffXIndex(diffs, index1) - if aDiff.Type == DiffInsert { - // Insertion - text = text[:startLoc+index2] + aDiff.Text + text[startLoc+index2:] - } else if aDiff.Type == DiffDelete { - // Deletion - startIndex := startLoc + index2 - text = text[:startIndex] + - text[startIndex+dmp.DiffXIndex(diffs, index1+len(aDiff.Text))-index2:] - } - } - if aDiff.Type != DiffDelete { - index1 += len(aDiff.Text) - } - } - } - } - } - x++ - } - // Strip the padding off. - text = text[len(nullPadding) : len(nullPadding)+(len(text)-2*len(nullPadding))] - return text, results -} - -// PatchAddPadding adds some padding on text start and end so that edges can match something. -// Intended to be called only from within patchApply. -func (dmp *DiffMatchPatch) PatchAddPadding(patches []Patch) string { - paddingLength := dmp.PatchMargin - nullPadding := "" - for x := 1; x <= paddingLength; x++ { - nullPadding += string(rune(x)) - } - - // Bump all the patches forward. - for i := range patches { - patches[i].Start1 += paddingLength - patches[i].Start2 += paddingLength - } - - // Add some padding on start of first diff. - if len(patches[0].diffs) == 0 || patches[0].diffs[0].Type != DiffEqual { - // Add nullPadding equality. - patches[0].diffs = append([]Diff{{DiffEqual, nullPadding}}, patches[0].diffs...) - patches[0].Start1 -= paddingLength // Should be 0. - patches[0].Start2 -= paddingLength // Should be 0. - patches[0].Length1 += paddingLength - patches[0].Length2 += paddingLength - } else if paddingLength > len(patches[0].diffs[0].Text) { - // Grow first equality. - extraLength := paddingLength - len(patches[0].diffs[0].Text) - patches[0].diffs[0].Text = nullPadding[len(patches[0].diffs[0].Text):] + patches[0].diffs[0].Text - patches[0].Start1 -= extraLength - patches[0].Start2 -= extraLength - patches[0].Length1 += extraLength - patches[0].Length2 += extraLength - } - - // Add some padding on end of last diff. - last := len(patches) - 1 - if len(patches[last].diffs) == 0 || patches[last].diffs[len(patches[last].diffs)-1].Type != DiffEqual { - // Add nullPadding equality. - patches[last].diffs = append(patches[last].diffs, Diff{DiffEqual, nullPadding}) - patches[last].Length1 += paddingLength - patches[last].Length2 += paddingLength - } else if paddingLength > len(patches[last].diffs[len(patches[last].diffs)-1].Text) { - // Grow last equality. - lastDiff := patches[last].diffs[len(patches[last].diffs)-1] - extraLength := paddingLength - len(lastDiff.Text) - patches[last].diffs[len(patches[last].diffs)-1].Text += nullPadding[:extraLength] - patches[last].Length1 += extraLength - patches[last].Length2 += extraLength - } - - return nullPadding -} - -// PatchSplitMax looks through the patches and breaks up any which are longer than the maximum limit of the match algorithm. -// Intended to be called only from within patchApply. -func (dmp *DiffMatchPatch) PatchSplitMax(patches []Patch) []Patch { - patchSize := dmp.MatchMaxBits - for x := 0; x < len(patches); x++ { - if patches[x].Length1 <= patchSize { - continue - } - bigpatch := patches[x] - // Remove the big old patch. - patches = append(patches[:x], patches[x+1:]...) - x-- - - Start1 := bigpatch.Start1 - Start2 := bigpatch.Start2 - precontext := "" - for len(bigpatch.diffs) != 0 { - // Create one of several smaller patches. - patch := Patch{} - empty := true - patch.Start1 = Start1 - len(precontext) - patch.Start2 = Start2 - len(precontext) - if len(precontext) != 0 { - patch.Length1 = len(precontext) - patch.Length2 = len(precontext) - patch.diffs = append(patch.diffs, Diff{DiffEqual, precontext}) - } - for len(bigpatch.diffs) != 0 && patch.Length1 < patchSize-dmp.PatchMargin { - diffType := bigpatch.diffs[0].Type - diffText := bigpatch.diffs[0].Text - if diffType == DiffInsert { - // Insertions are harmless. - patch.Length2 += len(diffText) - Start2 += len(diffText) - patch.diffs = append(patch.diffs, bigpatch.diffs[0]) - bigpatch.diffs = bigpatch.diffs[1:] - empty = false - } else if diffType == DiffDelete && len(patch.diffs) == 1 && patch.diffs[0].Type == DiffEqual && len(diffText) > 2*patchSize { - // This is a large deletion. Let it pass in one chunk. - patch.Length1 += len(diffText) - Start1 += len(diffText) - empty = false - patch.diffs = append(patch.diffs, Diff{diffType, diffText}) - bigpatch.diffs = bigpatch.diffs[1:] - } else { - // Deletion or equality. Only take as much as we can stomach. - diffText = diffText[:min(len(diffText), patchSize-patch.Length1-dmp.PatchMargin)] - - patch.Length1 += len(diffText) - Start1 += len(diffText) - if diffType == DiffEqual { - patch.Length2 += len(diffText) - Start2 += len(diffText) - } else { - empty = false - } - patch.diffs = append(patch.diffs, Diff{diffType, diffText}) - if diffText == bigpatch.diffs[0].Text { - bigpatch.diffs = bigpatch.diffs[1:] - } else { - bigpatch.diffs[0].Text = - bigpatch.diffs[0].Text[len(diffText):] - } - } - } - // Compute the head context for the next patch. - precontext = dmp.DiffText2(patch.diffs) - precontext = precontext[max(0, len(precontext)-dmp.PatchMargin):] - - postcontext := "" - // Append the end context for this patch. - if len(dmp.DiffText1(bigpatch.diffs)) > dmp.PatchMargin { - postcontext = dmp.DiffText1(bigpatch.diffs)[:dmp.PatchMargin] - } else { - postcontext = dmp.DiffText1(bigpatch.diffs) - } - - if len(postcontext) != 0 { - patch.Length1 += len(postcontext) - patch.Length2 += len(postcontext) - if len(patch.diffs) != 0 && patch.diffs[len(patch.diffs)-1].Type == DiffEqual { - patch.diffs[len(patch.diffs)-1].Text += postcontext - } else { - patch.diffs = append(patch.diffs, Diff{DiffEqual, postcontext}) - } - } - if !empty { - x++ - patches = append(patches[:x], append([]Patch{patch}, patches[x:]...)...) - } - } - } - return patches -} - -// PatchToText takes a list of patches and returns a textual representation. -func (dmp *DiffMatchPatch) PatchToText(patches []Patch) string { - var text bytes.Buffer - for _, aPatch := range patches { - _, _ = text.WriteString(aPatch.String()) - } - return text.String() -} - -// PatchFromText parses a textual representation of patches and returns a List of Patch objects. -func (dmp *DiffMatchPatch) PatchFromText(textline string) ([]Patch, error) { - patches := []Patch{} - if len(textline) == 0 { - return patches, nil - } - text := strings.Split(textline, "\n") - textPointer := 0 - patchHeader := regexp.MustCompile(`^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$`) - - var patch Patch - var sign uint8 - var line string - for textPointer < len(text) { - - if !patchHeader.MatchString(text[textPointer]) { - return patches, errors.New("Invalid patch string: " + text[textPointer]) - } - - patch = Patch{} - m := patchHeader.FindStringSubmatch(text[textPointer]) - - patch.Start1, _ = strconv.Atoi(m[1]) - if len(m[2]) == 0 { - patch.Start1-- - patch.Length1 = 1 - } else if m[2] == "0" { - patch.Length1 = 0 - } else { - patch.Start1-- - patch.Length1, _ = strconv.Atoi(m[2]) - } - - patch.Start2, _ = strconv.Atoi(m[3]) - - if len(m[4]) == 0 { - patch.Start2-- - patch.Length2 = 1 - } else if m[4] == "0" { - patch.Length2 = 0 - } else { - patch.Start2-- - patch.Length2, _ = strconv.Atoi(m[4]) - } - textPointer++ - - for textPointer < len(text) { - if len(text[textPointer]) > 0 { - sign = text[textPointer][0] - } else { - textPointer++ - continue - } - - line = text[textPointer][1:] - line = strings.Replace(line, "+", "%2b", -1) - line, _ = url.QueryUnescape(line) - if sign == '-' { - // Deletion. - patch.diffs = append(patch.diffs, Diff{DiffDelete, line}) - } else if sign == '+' { - // Insertion. - patch.diffs = append(patch.diffs, Diff{DiffInsert, line}) - } else if sign == ' ' { - // Minor equality. - patch.diffs = append(patch.diffs, Diff{DiffEqual, line}) - } else if sign == '@' { - // Start of next patch. - break - } else { - // WTF? - return patches, errors.New("Invalid patch mode '" + string(sign) + "' in: " + string(line)) - } - textPointer++ - } - - patches = append(patches, patch) - } - return patches, nil -} diff --git a/modules/diffmatchpatch/patch_test.go b/modules/diffmatchpatch/patch_test.go deleted file mode 100644 index 9855ef8..0000000 --- a/modules/diffmatchpatch/patch_test.go +++ /dev/null @@ -1,339 +0,0 @@ -// Copyright (c) 2012-2016 The go-diff authors. All rights reserved. -// https://github.com/sergi/go-diff -// See the included LICENSE file for license details. -// -// go-diff is a Go implementation of Google's Diff, Match, and Patch library -// Original library is Copyright (c) 2006 Google Inc. -// http://code.google.com/p/google-diff-match-patch/ - -package diffmatchpatch - -import ( - "fmt" - "strings" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestPatchString(t *testing.T) { - type TestCase struct { - Patch Patch - - Expected string - } - - for i, tc := range []TestCase{ - { - Patch: Patch{ - Start1: 20, - Start2: 21, - Length1: 18, - Length2: 17, - - diffs: []Diff{ - {DiffEqual, "jump"}, - {DiffDelete, "s"}, - {DiffInsert, "ed"}, - {DiffEqual, " over "}, - {DiffDelete, "the"}, - {DiffInsert, "a"}, - {DiffEqual, "\nlaz"}, - }, - }, - - Expected: "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n", - }, - } { - actual := tc.Patch.String() - assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %#v", i, tc)) - } -} - -func TestPatchFromText(t *testing.T) { - type TestCase struct { - Patch string - - ErrorMessagePrefix string - } - - dmp := New() - - for i, tc := range []TestCase{ - {"", ""}, - {"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n", ""}, - {"@@ -1 +1 @@\n-a\n+b\n", ""}, - {"@@ -1,3 +0,0 @@\n-abc\n", ""}, - {"@@ -0,0 +1,3 @@\n+abc\n", ""}, - {"@@ _0,0 +0,0 @@\n+abc\n", "Invalid patch string: @@ _0,0 +0,0 @@"}, - {"Bad\nPatch\n", "Invalid patch string"}, - } { - patches, err := dmp.PatchFromText(tc.Patch) - if tc.ErrorMessagePrefix == "" { - assert.Nil(t, err) - - if tc.Patch == "" { - assert.Equal(t, []Patch{}, patches, fmt.Sprintf("Test case #%d, %#v", i, tc)) - } else { - assert.Equal(t, tc.Patch, patches[0].String(), fmt.Sprintf("Test case #%d, %#v", i, tc)) - } - } else { - e := err.Error() - if strings.HasPrefix(e, tc.ErrorMessagePrefix) { - e = tc.ErrorMessagePrefix - } - assert.Equal(t, tc.ErrorMessagePrefix, e) - } - } - - diffs := []Diff{ - {DiffDelete, "`1234567890-=[]\\;',./"}, - {DiffInsert, "~!@#$%^&*()_+{}|:\"<>?"}, - } - - patches, err := dmp.PatchFromText("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n") - assert.Len(t, patches, 1) - assert.Equal(t, diffs, - patches[0].diffs, - ) - assert.Nil(t, err) -} - -func TestPatchToText(t *testing.T) { - type TestCase struct { - Patch string - } - - dmp := New() - - for i, tc := range []TestCase{ - {"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"}, - {"@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"}, - } { - patches, err := dmp.PatchFromText(tc.Patch) - assert.Nil(t, err) - - actual := dmp.PatchToText(patches) - assert.Equal(t, tc.Patch, actual, fmt.Sprintf("Test case #%d, %#v", i, tc)) - } -} - -func TestPatchAddContext(t *testing.T) { - type TestCase struct { - Name string - - Patch string - Text string - - Expected string - } - - dmp := New() - dmp.PatchMargin = 4 - - for i, tc := range []TestCase{ - {"Simple case", "@@ -21,4 +21,10 @@\n-jump\n+somersault\n", "The quick brown fox jumps over the lazy dog.", "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n"}, - {"Not enough trailing context", "@@ -21,4 +21,10 @@\n-jump\n+somersault\n", "The quick brown fox jumps.", "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n"}, - {"Not enough leading context", "@@ -3 +3,2 @@\n-e\n+at\n", "The quick brown fox jumps.", "@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n"}, - {"Ambiguity", "@@ -3 +3,2 @@\n-e\n+at\n", "The quick brown fox jumps. The quick brown fox crashes.", "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n"}, - } { - patches, err := dmp.PatchFromText(tc.Patch) - assert.Nil(t, err) - - actual := dmp.PatchAddContext(patches[0], tc.Text) - assert.Equal(t, tc.Expected, actual.String(), fmt.Sprintf("Test case #%d, %s", i, tc.Name)) - } -} - -func TestPatchMakeAndPatchToText(t *testing.T) { - type TestCase struct { - Name string - - Input1 any - Input2 any - Input3 any - - Expected string - } - - dmp := New() - - text1 := "The quick brown fox jumps over the lazy dog." - text2 := "That quick brown fox jumped over a lazy dog." - - for i, tc := range []TestCase{ - {"Null case", "", "", nil, ""}, - {"Text2+Text1 inputs", text2, text1, nil, "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"}, - {"Text1+Text2 inputs", text1, text2, nil, "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"}, - {"Diff input", dmp.DiffMain(text1, text2, false), nil, nil, "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"}, - {"Text1+Diff inputs", text1, dmp.DiffMain(text1, text2, false), nil, "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"}, - {"Text1+Text2+Diff inputs (deprecated)", text1, text2, dmp.DiffMain(text1, text2, false), "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"}, - {"Character encoding", "`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?", nil, "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n"}, - {"Long string with repeats", strings.Repeat("abcdef", 100), strings.Repeat("abcdef", 100) + "123", nil, "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"}, - {"Corner case of #31 fixed by #32", "2016-09-01T03:07:14.807830741Z", "2016-09-01T03:07:15.154800781Z", nil, "@@ -15,16 +15,16 @@\n 07:1\n+5.15\n 4\n-.\n 80\n+0\n 78\n-3074\n 1Z\n"}, - } { - var patches []Patch - if tc.Input3 != nil { - patches = dmp.PatchMake(tc.Input1, tc.Input2, tc.Input3) - } else if tc.Input2 != nil { - patches = dmp.PatchMake(tc.Input1, tc.Input2) - } else if ps, ok := tc.Input1.([]Patch); ok { - patches = ps - } else { - patches = dmp.PatchMake(tc.Input1) - } - - actual := dmp.PatchToText(patches) - assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) - } - - // Corner case of #28 wrong patch with timeout of 0 - dmp.DiffTimeout = 0 - - text1 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus ut risus et enim consectetur convallis a non ipsum. Sed nec nibh cursus, interdum libero vel." - text2 = "Lorem a ipsum dolor sit amet, consectetur adipiscing elit. Vivamus ut risus et enim consectetur convallis a non ipsum. Sed nec nibh cursus, interdum liberovel." - - diffs := dmp.DiffMain(text1, text2, true) - // Additional check that the diff texts are equal to the originals even if we are using DiffMain with checklines=true #29 - assert.Equal(t, text1, dmp.DiffText1(diffs)) - assert.Equal(t, text2, dmp.DiffText2(diffs)) - - patches := dmp.PatchMake(text1, diffs) - - actual := dmp.PatchToText(patches) - assert.Equal(t, "@@ -1,14 +1,16 @@\n Lorem \n+a \n ipsum do\n@@ -148,13 +148,12 @@\n m libero\n- \n vel.\n", actual) - - // Check that empty Patch array is returned for no parameter call - patches = dmp.PatchMake() - assert.Equal(t, []Patch{}, patches) -} - -func TestPatchSplitMax(t *testing.T) { - type TestCase struct { - Text1 string - Text2 string - - Expected string - } - - dmp := New() - - for i, tc := range []TestCase{ - {"abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0", "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n"}, - {"abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz", "@@ -3,78 +3,8 @@\n cdef\n-1234567890123456789012345678901234567890123456789012345678901234567890\n uvwx\n"}, - {"1234567890123456789012345678901234567890123456789012345678901234567890", "abc", "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n"}, - {"abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1", "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n"}, - } { - patches := dmp.PatchMake(tc.Text1, tc.Text2) - patches = dmp.PatchSplitMax(patches) - - actual := dmp.PatchToText(patches) - assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %#v", i, tc)) - } -} - -func TestPatchAddPadding(t *testing.T) { - type TestCase struct { - Name string - - Text1 string - Text2 string - - Expected string - ExpectedWithPadding string - } - - dmp := New() - - for i, tc := range []TestCase{ - {"Both edges full", "", "test", "@@ -0,0 +1,4 @@\n+test\n", "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n"}, - {"Both edges partial", "XY", "XtestY", "@@ -1,2 +1,6 @@\n X\n+test\n Y\n", "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n"}, - {"Both edges none", "XXXXYYYY", "XXXXtestYYYY", "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", "@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n"}, - } { - patches := dmp.PatchMake(tc.Text1, tc.Text2) - - actual := dmp.PatchToText(patches) - assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) - - dmp.PatchAddPadding(patches) - - actualWithPadding := dmp.PatchToText(patches) - assert.Equal(t, tc.ExpectedWithPadding, actualWithPadding, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) - } -} - -func TestPatchApply(t *testing.T) { - type TestCase struct { - Name string - - Text1 string - Text2 string - TextBase string - - Expected string - ExpectedApplies []bool - } - - dmp := New() - dmp.MatchDistance = 1000 - dmp.MatchThreshold = 0.5 - dmp.PatchDeleteThreshold = 0.5 - - for i, tc := range []TestCase{ - {"Null case", "", "", "Hello world.", "Hello world.", []bool{}}, - {"Exact match", "The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog.", "The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog.", []bool{true, true}}, - {"Partial match", "The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog.", "The quick red rabbit jumps over the tired tiger.", "That quick red rabbit jumped over a tired tiger.", []bool{true, true}}, - {"Failed match", "The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog.", "I am the very model of a modern major general.", "I am the very model of a modern major general.", []bool{false, false}}, - {"Big delete, small Diff", "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy", "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y", "xabcy", []bool{true, true}}, - {"Big delete, big Diff 1", "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy", "x12345678901234567890---------------++++++++++---------------12345678901234567890y", "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y", []bool{false, true}}, - } { - patches := dmp.PatchMake(tc.Text1, tc.Text2) - - actual, actualApplies := dmp.PatchApply(patches, tc.TextBase) - assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) - assert.Equal(t, tc.ExpectedApplies, actualApplies, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) - } - - dmp.PatchDeleteThreshold = 0.6 - - for i, tc := range []TestCase{ - {"Big delete, big Diff 2", "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy", "x12345678901234567890---------------++++++++++---------------12345678901234567890y", "xabcy", []bool{true, true}}, - } { - patches := dmp.PatchMake(tc.Text1, tc.Text2) - - actual, actualApplies := dmp.PatchApply(patches, tc.TextBase) - assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) - assert.Equal(t, tc.ExpectedApplies, actualApplies, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) - } - - dmp.MatchDistance = 0 - dmp.MatchThreshold = 0.0 - dmp.PatchDeleteThreshold = 0.5 - - for i, tc := range []TestCase{ - {"Compensate for failed patch", "abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890", "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890", "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890", []bool{false, true}}, - } { - patches := dmp.PatchMake(tc.Text1, tc.Text2) - - actual, actualApplies := dmp.PatchApply(patches, tc.TextBase) - assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) - assert.Equal(t, tc.ExpectedApplies, actualApplies, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) - } - - dmp.MatchThreshold = 0.5 - dmp.MatchDistance = 1000 - - for i, tc := range []TestCase{ - {"No side effects", "", "test", "", "test", []bool{true}}, - {"No side effects with major delete", "The quick brown fox jumps over the lazy dog.", "Woof", "The quick brown fox jumps over the lazy dog.", "Woof", []bool{true, true}}, - {"Edge exact match", "", "test", "", "test", []bool{true}}, - {"Near edge exact match", "XY", "XtestY", "XY", "XtestY", []bool{true}}, - {"Edge partial match", "y", "y123", "x", "x123", []bool{true}}, - } { - patches := dmp.PatchMake(tc.Text1, tc.Text2) - - actual, actualApplies := dmp.PatchApply(patches, tc.TextBase) - assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) - assert.Equal(t, tc.ExpectedApplies, actualApplies, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) - } -} diff --git a/modules/diffmatchpatch/stringutil.go b/modules/diffmatchpatch/stringutil.go deleted file mode 100644 index 361b831..0000000 --- a/modules/diffmatchpatch/stringutil.go +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright (c) 2012-2016 The go-diff authors. All rights reserved. -// https://github.com/sergi/go-diff -// See the included LICENSE file for license details. -// -// go-diff is a Go implementation of Google's Diff, Match, and Patch library -// Original library is Copyright (c) 2006 Google Inc. -// http://code.google.com/p/google-diff-match-patch/ - -package diffmatchpatch - -import ( - "fmt" - "slices" - "strings" - "unicode/utf8" -) - -const UNICODE_INVALID_RANGE_START = 0xD800 -const UNICODE_INVALID_RANGE_END = 0xDFFF -const UNICODE_INVALID_RANGE_DELTA = UNICODE_INVALID_RANGE_END - UNICODE_INVALID_RANGE_START + 1 -const UNICODE_RANGE_MAX = 0x10FFFF - -// unescaper unescapes selected chars for compatibility with JavaScript's encodeURI. -// In speed critical applications this could be dropped since the receiving application will certainly decode these fine. Note that this function is case-sensitive. Thus "%3F" would not be unescaped. But this is ok because it is only called with the output of HttpUtility.UrlEncode which returns lowercase hex. Example: "%3f" -> "?", "%24" -> "$", etc. -var unescaper = strings.NewReplacer( - "%21", "!", "%7E", "~", "%27", "'", - "%28", "(", "%29", ")", "%3B", ";", - "%2F", "/", "%3F", "?", "%3A", ":", - "%40", "@", "%26", "&", "%3D", "=", - "%2B", "+", "%24", "$", "%2C", ",", "%23", "#", "%2A", "*") - -// indexOf returns the first index of pattern in str, starting at str[i]. -func indexOf(str string, pattern string, i int) int { - if i > len(str)-1 { - return -1 - } - if i <= 0 { - return strings.Index(str, pattern) - } - ind := strings.Index(str[i:], pattern) - if ind == -1 { - return -1 - } - return ind + i -} - -// lastIndexOf returns the last index of pattern in str, starting at str[i]. -func lastIndexOf(str string, pattern string, i int) int { - if i < 0 { - return -1 - } - if i >= len(str) { - return strings.LastIndex(str, pattern) - } - _, size := utf8.DecodeRuneInString(str[i:]) - return strings.LastIndex(str[:i+size], pattern) -} - -// runesIndexOf returns the index of pattern in target, starting at target[i]. -func runesIndexOf(target, pattern []rune, i int) int { - if i > len(target)-1 { - return -1 - } - if i <= 0 { - return runesIndex(target, pattern) - } - ind := runesIndex(target[i:], pattern) - if ind == -1 { - return -1 - } - return ind + i -} - -// runesIndex is the equivalent of strings.Index for rune slices. -func runesIndex(r1, r2 []rune) int { - last := len(r1) - len(r2) - for i := 0; i <= last; i++ { - if slices.Equal(r1[i:i+len(r2)], r2) { - return i - } - } - return -1 -} - -// These constants define the number of bits representable -// in 1,2,3,4 byte utf8 sequences, respectively. -const ONE_BYTE_BITS = 7 -const TWO_BYTE_BITS = 11 -const THREE_BYTE_BITS = 16 -const FOUR_BYTE_BITS = 21 - -// Helper for getting a sequence of bits from an integer. -func getBits(i uint32, cnt byte, from byte) byte { - return byte((i >> from) & ((1 << cnt) - 1)) -} - -// Converts an integer in the range 0~1112060 into a rune. -// Based on the ranges table in https://en.wikipedia.org/wiki/UTF-8 -func intToRune(i uint32) rune { - if i < (1 << ONE_BYTE_BITS) { - return rune(i) - } - - if i < (1 << TWO_BYTE_BITS) { - r, size := utf8.DecodeRune([]byte{0b11000000 | getBits(i, 5, 6), 0b10000000 | getBits(i, 6, 0)}) - if size != 2 || r == utf8.RuneError { - panic(fmt.Sprintf("Error encoding an int %d with size 2, got rune %v and size %d", size, r, i)) - } - return r - } - - // Last -3 here needed because for some reason 3rd to last codepoint 65533 in this range - // was returning utf8.RuneError during encoding. - if i < ((1 << THREE_BYTE_BITS) - UNICODE_INVALID_RANGE_DELTA - 3) { - if i >= UNICODE_INVALID_RANGE_START { - i += UNICODE_INVALID_RANGE_DELTA - } - - r, size := utf8.DecodeRune([]byte{0b11100000 | getBits(i, 4, 12), 0b10000000 | getBits(i, 6, 6), 0b10000000 | getBits(i, 6, 0)}) - if size != 3 || r == utf8.RuneError { - panic(fmt.Sprintf("Error encoding an int %d with size 3, got rune %v and size %d", size, r, i)) - } - return r - } - - if i < (1<= UNICODE_INVALID_RANGE_END { - return result - UNICODE_INVALID_RANGE_DELTA - } - - return result - } - - if size == 4 { - result := uint32(bytes[0]&0b111)<<18 | uint32(bytes[1]&0b111111)<<12 | uint32(bytes[2]&0b111111)<<6 | uint32(bytes[3]&0b111111) - return result - UNICODE_INVALID_RANGE_DELTA - 3 - } - - panic(fmt.Sprintf("Unexpected state decoding rune=%v size=%d", r, size)) -} diff --git a/modules/diffmatchpatch/stringutil_test.go b/modules/diffmatchpatch/stringutil_test.go deleted file mode 100644 index 73ab6ca..0000000 --- a/modules/diffmatchpatch/stringutil_test.go +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright (c) 2012-2016 The go-diff authors. All rights reserved. -// https://github.com/sergi/go-diff -// See the included LICENSE file for license details. -// -// go-diff is a Go implementation of Google's Diff, Match, and Patch library -// Original library is Copyright (c) 2006 Google Inc. -// http://code.google.com/p/google-diff-match-patch/ - -package diffmatchpatch - -import ( - "fmt" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestRunesIndexOf(t *testing.T) { - type TestCase struct { - Pattern string - Start int - - Expected int - } - - for i, tc := range []TestCase{ - {"abc", 0, 0}, - {"cde", 0, 2}, - {"e", 0, 4}, - {"cdef", 0, -1}, - {"abcdef", 0, -1}, - {"abc", 2, -1}, - {"cde", 2, 2}, - {"e", 2, 4}, - {"cdef", 2, -1}, - {"abcdef", 2, -1}, - {"e", 6, -1}, - } { - actual := runesIndexOf([]rune("abcde"), []rune(tc.Pattern), tc.Start) - assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %#v", i, tc)) - } -} - -func TestIndexOf(t *testing.T) { - type TestCase struct { - String string - Pattern string - Position int - - Expected int - } - - for i, tc := range []TestCase{ - {"hi world", "world", -1, 3}, - {"hi world", "world", 0, 3}, - {"hi world", "world", 1, 3}, - {"hi world", "world", 2, 3}, - {"hi world", "world", 3, 3}, - {"hi world", "world", 4, -1}, - {"abbc", "b", -1, 1}, - {"abbc", "b", 0, 1}, - {"abbc", "b", 1, 1}, - {"abbc", "b", 2, 2}, - {"abbc", "b", 3, -1}, - {"abbc", "b", 4, -1}, - // The greek letter beta is the two-byte sequence of "\u03b2". - {"a\u03b2\u03b2c", "\u03b2", -1, 1}, - {"a\u03b2\u03b2c", "\u03b2", 0, 1}, - {"a\u03b2\u03b2c", "\u03b2", 1, 1}, - {"a\u03b2\u03b2c", "\u03b2", 3, 3}, - {"a\u03b2\u03b2c", "\u03b2", 5, -1}, - {"a\u03b2\u03b2c", "\u03b2", 6, -1}, - } { - actual := indexOf(tc.String, tc.Pattern, tc.Position) - assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %#v", i, tc)) - } -} - -func TestLastIndexOf(t *testing.T) { - type TestCase struct { - String string - Pattern string - Position int - - Expected int - } - - for i, tc := range []TestCase{ - {"hi world", "world", -1, -1}, - {"hi world", "world", 0, -1}, - {"hi world", "world", 1, -1}, - {"hi world", "world", 2, -1}, - {"hi world", "world", 3, -1}, - {"hi world", "world", 4, -1}, - {"hi world", "world", 5, -1}, - {"hi world", "world", 6, -1}, - {"hi world", "world", 7, 3}, - {"hi world", "world", 8, 3}, - {"abbc", "b", -1, -1}, - {"abbc", "b", 0, -1}, - {"abbc", "b", 1, 1}, - {"abbc", "b", 2, 2}, - {"abbc", "b", 3, 2}, - {"abbc", "b", 4, 2}, - // The greek letter beta is the two-byte sequence of "\u03b2". - {"a\u03b2\u03b2c", "\u03b2", -1, -1}, - {"a\u03b2\u03b2c", "\u03b2", 0, -1}, - {"a\u03b2\u03b2c", "\u03b2", 1, 1}, - {"a\u03b2\u03b2c", "\u03b2", 3, 3}, - {"a\u03b2\u03b2c", "\u03b2", 5, 3}, - {"a\u03b2\u03b2c", "\u03b2", 6, 3}, - } { - actual := lastIndexOf(tc.String, tc.Pattern, tc.Position) - assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %#v", i, tc)) - } -} - -// Exhaustive check for all ints from 0 to 1112060 for correctness of implementation -// of `intToRune() -> runeToInt()`. -// This test is slow and runs longer than 5 seconds but it does provide a safety -// guarantee that these 2 functions are correct for the ranges we support. -func TestRuneToInt(t *testing.T) { - - for i := uint32(0); i <= UNICODE_RANGE_MAX-UNICODE_INVALID_RANGE_DELTA-3; i += 1 { - r := intToRune(i) - ic := runeToInt(r) - - assert.Equal(t, i, ic, fmt.Sprintf("intToRune(%d)=%d and runeToInt(%d)=%d", i, r, r, ic)) - } - - assert.Panics(t, func() { - intToRune(UNICODE_RANGE_MAX - UNICODE_INVALID_RANGE_DELTA - 2) - }) -} diff --git a/modules/plumbing/color/color.go b/modules/plumbing/color/color.go deleted file mode 100644 index 2cd74bd..0000000 --- a/modules/plumbing/color/color.go +++ /dev/null @@ -1,38 +0,0 @@ -package color - -// TODO read colors from a github.com/go-git/go-git/plumbing/format/config.Config struct -// TODO implement color parsing, see https://github.com/git/git/blob/v2.26.2/color.c - -// Colors. See https://github.com/git/git/blob/v2.26.2/color.h#L24-L53. -const ( - Normal = "" - Reset = "\033[m" - Bold = "\033[1m" - Red = "\033[31m" - Green = "\033[32m" - Yellow = "\033[33m" - Blue = "\033[34m" - Magenta = "\033[35m" - Cyan = "\033[36m" - BoldRed = "\033[1;31m" - BoldGreen = "\033[1;32m" - BoldYellow = "\033[1;33m" - BoldBlue = "\033[1;34m" - BoldMagenta = "\033[1;35m" - BoldCyan = "\033[1;36m" - FaintRed = "\033[2;31m" - FaintGreen = "\033[2;32m" - FaintYellow = "\033[2;33m" - FaintBlue = "\033[2;34m" - FaintMagenta = "\033[2;35m" - FaintCyan = "\033[2;36m" - BgRed = "\033[41m" - BgGreen = "\033[42m" - BgYellow = "\033[43m" - BgBlue = "\033[44m" - BgMagenta = "\033[45m" - BgCyan = "\033[46m" - Faint = "\033[2m" - FaintItalic = "\033[2;3m" - Reverse = "\033[7m" -) diff --git a/modules/plumbing/format/diff/colorconfig.go b/modules/plumbing/format/diff/colorconfig.go deleted file mode 100644 index 8921e28..0000000 --- a/modules/plumbing/format/diff/colorconfig.go +++ /dev/null @@ -1,97 +0,0 @@ -package diff - -import "github.com/antgroup/hugescm/modules/plumbing/color" - -// A ColorKey is a key into a ColorConfig map and also equal to the key in the -// diff.color subsection of the config. See -// https://github.com/git/git/blob/v2.26.2/diff.c#L83-L106. -type ColorKey string - -// ColorKeys. -const ( - Context ColorKey = "context" - Meta ColorKey = "meta" - Frag ColorKey = "frag" - Old ColorKey = "old" - New ColorKey = "new" - Commit ColorKey = "commit" - Whitespace ColorKey = "whitespace" - Func ColorKey = "func" - OldMoved ColorKey = "oldMoved" - OldMovedAlternative ColorKey = "oldMovedAlternative" - OldMovedDimmed ColorKey = "oldMovedDimmed" - OldMovedAlternativeDimmed ColorKey = "oldMovedAlternativeDimmed" - NewMoved ColorKey = "newMoved" - NewMovedAlternative ColorKey = "newMovedAlternative" - NewMovedDimmed ColorKey = "newMovedDimmed" - NewMovedAlternativeDimmed ColorKey = "newMovedAlternativeDimmed" - ContextDimmed ColorKey = "contextDimmed" - OldDimmed ColorKey = "oldDimmed" - NewDimmed ColorKey = "newDimmed" - ContextBold ColorKey = "contextBold" - OldBold ColorKey = "oldBold" - NewBold ColorKey = "newBold" -) - -// A ColorConfig is a color configuration. A nil or empty ColorConfig -// corresponds to no color. -type ColorConfig map[ColorKey]string - -// A ColorConfigOption sets an option on a ColorConfig. -type ColorConfigOption func(ColorConfig) - -// WithColor sets the color for key. -func WithColor(key ColorKey, color string) ColorConfigOption { - return func(cc ColorConfig) { - cc[key] = color - } -} - -// defaultColorConfig is the default color configuration. See -// https://github.com/git/git/blob/v2.26.2/diff.c#L57-L81. -var defaultColorConfig = ColorConfig{ - Context: color.Normal, - Meta: color.Bold, - Frag: color.Cyan, - Old: color.Red, - New: color.Green, - Commit: color.Yellow, - Whitespace: color.BgRed, - Func: color.Normal, - OldMoved: color.BoldMagenta, - OldMovedAlternative: color.BoldBlue, - OldMovedDimmed: color.Faint, - OldMovedAlternativeDimmed: color.FaintItalic, - NewMoved: color.BoldCyan, - NewMovedAlternative: color.BoldYellow, - NewMovedDimmed: color.Faint, - NewMovedAlternativeDimmed: color.FaintItalic, - ContextDimmed: color.Faint, - OldDimmed: color.FaintRed, - NewDimmed: color.FaintGreen, - ContextBold: color.Bold, - OldBold: color.BoldRed, - NewBold: color.BoldGreen, -} - -// NewColorConfig returns a new ColorConfig. -func NewColorConfig(options ...ColorConfigOption) ColorConfig { - cc := make(ColorConfig) - for key, value := range defaultColorConfig { - cc[key] = value - } - for _, option := range options { - option(cc) - } - return cc -} - -// Reset returns the ANSI escape sequence to reset the color with key set from -// cc. If no color was set then no reset is needed so it returns the empty -// string. -func (cc ColorConfig) Reset(key ColorKey) string { - if cc[key] == "" { - return "" - } - return color.Reset -} diff --git a/modules/plumbing/format/diff/patch.go b/modules/plumbing/format/diff/patch.go deleted file mode 100644 index 0aaac57..0000000 --- a/modules/plumbing/format/diff/patch.go +++ /dev/null @@ -1,60 +0,0 @@ -package diff - -import ( - "github.com/antgroup/hugescm/modules/plumbing" - "github.com/antgroup/hugescm/modules/plumbing/filemode" -) - -// Operation defines the operation of a diff item. -type Operation int - -const ( - // Equal item represents an equals diff. - Equal Operation = iota - // Add item represents an insert diff. - Add - // Delete item represents a delete diff. - Delete -) - -// Patch represents a collection of steps to transform several files. -type Patch interface { - // FilePatches returns a slice of patches per file. - FilePatches() []FilePatch - // Message returns an optional message that can be at the top of the - // Patch representation. - Message() string -} - -// FilePatch represents the necessary steps to transform one file into another. -type FilePatch interface { - // IsBinary returns true if this patch is representing a binary file. - IsBinary() bool - // IsFragments returns true if this patch is representing a fragments file. - IsFragments() bool - // Files returns the from and to Files, with all the necessary metadata - // about them. If the patch creates a new file, "from" will be nil. - // If the patch deletes a file, "to" will be nil. - Files() (from, to File) - // Chunks returns a slice of ordered changes to transform "from" File into - // "to" File. If the file is a binary one, Chunks will be empty. - Chunks() []Chunk -} - -// File contains all the file metadata necessary to print some patch formats. -type File interface { - // Hash returns the File Hash. - Hash() plumbing.Hash - // Mode returns the FileMode. - Mode() filemode.FileMode - // Path returns the complete Path to the file, including the filename. - Path() string -} - -// Chunk represents a portion of a file transformation into another. -type Chunk interface { - // Content contains the portion of the file. - Content() string - // Type contains the Operation to do with this Chunk. - Type() Operation -} diff --git a/modules/plumbing/format/diff/unified_encoder.go b/modules/plumbing/format/diff/unified_encoder.go deleted file mode 100644 index 39cbace..0000000 --- a/modules/plumbing/format/diff/unified_encoder.go +++ /dev/null @@ -1,403 +0,0 @@ -package diff - -import ( - "fmt" - "io" - "regexp" - "strconv" - "strings" - - "github.com/antgroup/hugescm/modules/plumbing" -) - -// DefaultContextLines is the default number of context lines. -const DefaultContextLines = 3 - -var ( - splitLinesRegexp = regexp.MustCompile(`[^\n]*(\n|$)`) - - operationChar = map[Operation]byte{ - Add: '+', - Delete: '-', - Equal: ' ', - } - - operationColorKey = map[Operation]ColorKey{ - Add: New, - Delete: Old, - Equal: Context, - } -) - -// UnifiedEncoder encodes an unified diff into the provided Writer. It does not -// support similarity index for renames or sorting hash representations. -type UnifiedEncoder struct { - io.Writer - - // contextLines is the count of unchanged lines that will appear surrounding - // a change. - contextLines int - - // srcPrefix and dstPrefix are prepended to file paths when encoding a diff. - srcPrefix string - dstPrefix string - - // colorConfig is the color configuration. The default is no color. - color ColorConfig -} - -// NewUnifiedEncoder returns a new UnifiedEncoder that writes to w. -func NewUnifiedEncoder(w io.Writer, contextLines int) *UnifiedEncoder { - return &UnifiedEncoder{ - Writer: w, - srcPrefix: "a/", - dstPrefix: "b/", - contextLines: contextLines, - } -} - -// SetColor sets e's color configuration and returns e. -func (e *UnifiedEncoder) SetColor(colorConfig ColorConfig) *UnifiedEncoder { - e.color = colorConfig - return e -} - -// SetSrcPrefix sets e's srcPrefix and returns e. -func (e *UnifiedEncoder) SetSrcPrefix(prefix string) *UnifiedEncoder { - e.srcPrefix = prefix - return e -} - -// SetDstPrefix sets e's dstPrefix and returns e. -func (e *UnifiedEncoder) SetDstPrefix(prefix string) *UnifiedEncoder { - e.dstPrefix = prefix - return e -} - -// Encode encodes patch. -func (e *UnifiedEncoder) Encode(patch Patch) error { - sb := &strings.Builder{} - - if message := patch.Message(); message != "" { - sb.WriteString(message) - if !strings.HasSuffix(message, "\n") { - sb.WriteByte('\n') - } - } - - for _, filePatch := range patch.FilePatches() { - e.writeFilePatchHeader(sb, filePatch) - g := newHunksGenerator(filePatch.Chunks(), e.contextLines) - for _, hunk := range g.Generate() { - hunk.writeTo(sb, e.color) - } - } - - _, err := e.Write([]byte(sb.String())) - return err -} - -func (e *UnifiedEncoder) writeFilePatchHeader(sb *strings.Builder, filePatch FilePatch) { - from, to := filePatch.Files() - if from == nil && to == nil { - return - } - isBinary := filePatch.IsBinary() - isFragments := filePatch.IsFragments() - - var lines []string - switch { - case from != nil && to != nil: - hashEquals := from.Hash() == to.Hash() - lines = append(lines, - fmt.Sprintf("diff --zeta %s%s %s%s", - e.srcPrefix, from.Path(), e.dstPrefix, to.Path()), - ) - if from.Mode() != to.Mode() { - lines = append(lines, - fmt.Sprintf("old mode %o", from.Mode()), - fmt.Sprintf("new mode %o", to.Mode()), - ) - } - if from.Path() != to.Path() { - lines = append(lines, - fmt.Sprintf("rename from %s", from.Path()), - fmt.Sprintf("rename to %s", to.Path()), - ) - } - if from.Mode() != to.Mode() && !hashEquals { - lines = append(lines, - fmt.Sprintf("index %s..%s", from.Hash(), to.Hash()), - ) - } else if !hashEquals { - lines = append(lines, - fmt.Sprintf("index %s..%s %o", from.Hash(), to.Hash(), from.Mode()), - ) - } - if !hashEquals { - lines = e.appendPathLines(lines, e.srcPrefix+from.Path(), e.dstPrefix+to.Path(), isBinary, isFragments) - } - case from == nil: - lines = append(lines, - fmt.Sprintf("diff --zeta %s %s", e.srcPrefix+to.Path(), e.dstPrefix+to.Path()), - fmt.Sprintf("new file mode %o", to.Mode()), - fmt.Sprintf("index %s..%s", plumbing.ZeroHash, to.Hash()), - ) - lines = e.appendPathLines(lines, "/dev/null", e.dstPrefix+to.Path(), isBinary, isFragments) - case to == nil: - lines = append(lines, - fmt.Sprintf("diff --zeta %s %s", e.srcPrefix+from.Path(), e.dstPrefix+from.Path()), - fmt.Sprintf("deleted file mode %o", from.Mode()), - fmt.Sprintf("index %s..%s", from.Hash(), plumbing.ZeroHash), - ) - lines = e.appendPathLines(lines, e.srcPrefix+from.Path(), "/dev/null", isBinary, isFragments) - } - - sb.WriteString(e.color[Meta]) - sb.WriteString(lines[0]) - for _, line := range lines[1:] { - sb.WriteByte('\n') - sb.WriteString(line) - } - sb.WriteString(e.color.Reset(Meta)) - sb.WriteByte('\n') -} - -func (e *UnifiedEncoder) appendPathLines(lines []string, fromPath, toPath string, isBinary bool, isFragments bool) []string { - if isFragments { - return append(lines, - fmt.Sprintf("Fragments files %s and %s differ", fromPath, toPath), - ) - } - if isBinary { - return append(lines, - fmt.Sprintf("Binary files %s and %s differ", fromPath, toPath), - ) - } - return append(lines, - fmt.Sprintf("--- %s", fromPath), - fmt.Sprintf("+++ %s", toPath), - ) -} - -type hunksGenerator struct { - fromLine, toLine int - ctxLines int - chunks []Chunk - current *hunk - hunks []*hunk - beforeContext, afterContext []string -} - -func newHunksGenerator(chunks []Chunk, ctxLines int) *hunksGenerator { - return &hunksGenerator{ - chunks: chunks, - ctxLines: ctxLines, - } -} - -func (g *hunksGenerator) Generate() []*hunk { - for i, chunk := range g.chunks { - lines := splitLines(chunk.Content()) - nLines := len(lines) - - switch chunk.Type() { - case Equal: - g.fromLine += nLines - g.toLine += nLines - g.processEqualsLines(lines, i) - case Delete: - if nLines != 0 { - g.fromLine++ - } - - g.processHunk(i, chunk.Type()) - g.fromLine += nLines - 1 - g.current.AddOp(chunk.Type(), lines...) - case Add: - if nLines != 0 { - g.toLine++ - } - g.processHunk(i, chunk.Type()) - g.toLine += nLines - 1 - g.current.AddOp(chunk.Type(), lines...) - } - - if i == len(g.chunks)-1 && g.current != nil { - g.hunks = append(g.hunks, g.current) - } - } - - return g.hunks -} - -func (g *hunksGenerator) processHunk(i int, op Operation) { - if g.current != nil { - return - } - - var ctxPrefix string - linesBefore := len(g.beforeContext) - if linesBefore > g.ctxLines { - ctxPrefix = g.beforeContext[linesBefore-g.ctxLines-1] - g.beforeContext = g.beforeContext[linesBefore-g.ctxLines:] - linesBefore = g.ctxLines - } - - g.current = &hunk{ctxPrefix: strings.TrimSuffix(ctxPrefix, "\n")} - g.current.AddOp(Equal, g.beforeContext...) - - switch op { - case Delete: - g.current.fromLine, g.current.toLine = - g.addLineNumbers(g.fromLine, g.toLine, linesBefore, i, Add) - case Add: - g.current.toLine, g.current.fromLine = - g.addLineNumbers(g.toLine, g.fromLine, linesBefore, i, Delete) - } - - g.beforeContext = nil -} - -// addLineNumbers obtains the line numbers in a new chunk. -func (g *hunksGenerator) addLineNumbers(la, lb int, linesBefore int, i int, op Operation) (cla, clb int) { - cla = la - linesBefore - // we need to search for a reference for the next diff - switch { - case linesBefore != 0 && g.ctxLines != 0: - if lb > g.ctxLines { - clb = lb - g.ctxLines + 1 - } else { - clb = 1 - } - case g.ctxLines == 0: - clb = lb - case i != len(g.chunks)-1: - next := g.chunks[i+1] - if next.Type() == op || next.Type() == Equal { - // this diff will be into this chunk - clb = lb + 1 - } - } - - return -} - -func (g *hunksGenerator) processEqualsLines(ls []string, i int) { - if g.current == nil { - g.beforeContext = append(g.beforeContext, ls...) - return - } - - g.afterContext = append(g.afterContext, ls...) - if len(g.afterContext) <= g.ctxLines*2 && i != len(g.chunks)-1 { - g.current.AddOp(Equal, g.afterContext...) - g.afterContext = nil - } else { - ctxLines := g.ctxLines - if ctxLines > len(g.afterContext) { - ctxLines = len(g.afterContext) - } - g.current.AddOp(Equal, g.afterContext[:ctxLines]...) - g.hunks = append(g.hunks, g.current) - - g.current = nil - g.beforeContext = g.afterContext[ctxLines:] - g.afterContext = nil - } -} - -func splitLines(s string) []string { - out := splitLinesRegexp.FindAllString(s, -1) - if out[len(out)-1] == "" { - out = out[:len(out)-1] - } - return out -} - -type hunk struct { - fromLine int - toLine int - - fromCount int - toCount int - - ctxPrefix string - ops []*op -} - -func (h *hunk) writeTo(sb *strings.Builder, color ColorConfig) { - sb.WriteString(color[Frag]) - sb.WriteString("@@ -") - - if h.fromCount == 1 { - sb.WriteString(strconv.Itoa(h.fromLine)) - } else { - sb.WriteString(strconv.Itoa(h.fromLine)) - sb.WriteByte(',') - sb.WriteString(strconv.Itoa(h.fromCount)) - } - - sb.WriteString(" +") - - if h.toCount == 1 { - sb.WriteString(strconv.Itoa(h.toLine)) - } else { - sb.WriteString(strconv.Itoa(h.toLine)) - sb.WriteByte(',') - sb.WriteString(strconv.Itoa(h.toCount)) - } - - sb.WriteString(" @@") - sb.WriteString(color.Reset(Frag)) - - if h.ctxPrefix != "" { - sb.WriteByte(' ') - sb.WriteString(color[Func]) - sb.WriteString(h.ctxPrefix) - sb.WriteString(color.Reset(Func)) - } - - sb.WriteByte('\n') - - for _, op := range h.ops { - op.writeTo(sb, color) - } -} - -func (h *hunk) AddOp(t Operation, ss ...string) { - n := len(ss) - switch t { - case Add: - h.toCount += n - case Delete: - h.fromCount += n - case Equal: - h.toCount += n - h.fromCount += n - } - - for _, s := range ss { - h.ops = append(h.ops, &op{s, t}) - } -} - -type op struct { - text string - t Operation -} - -func (o *op) writeTo(sb *strings.Builder, color ColorConfig) { - colorKey := operationColorKey[o.t] - sb.WriteString(color[colorKey]) - sb.WriteByte(operationChar[o.t]) - if strings.HasSuffix(o.text, "\n") { - sb.WriteString(strings.TrimSuffix(o.text, "\n")) - sb.WriteString(color.Reset(colorKey)) - sb.WriteByte('\n') - return - } - sb.WriteString(o.text) - sb.WriteString(color.Reset(colorKey)) - sb.WriteString("\n\\ No newline at end of file\n") -} diff --git a/modules/zeta/object/change.go b/modules/zeta/object/change.go index 003493e..422041b 100644 --- a/modules/zeta/object/change.go +++ b/modules/zeta/object/change.go @@ -6,9 +6,11 @@ package object import ( "bytes" "context" + "errors" "fmt" "strings" + "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/merkletrie" ) @@ -21,14 +23,20 @@ type Change struct { To ChangeEntry } -var empty ChangeEntry +var ( + empty ChangeEntry + ErrMalformedChange = errors.New("malformed change: empty from and to") +) + +func (c *Change) Name() string { + return c.name() +} // Action returns the kind of action represented by the change, an // insertion, a deletion or a modification. func (c *Change) Action() (merkletrie.Action, error) { if c.From.Equal(&empty) && c.To.Equal(&empty) { - return merkletrie.Action(0), - fmt.Errorf("malformed change: empty from and to") + return merkletrie.Action(0), ErrMalformedChange } if c.From.Equal(&empty) { @@ -77,20 +85,6 @@ func (c *Change) String() string { return fmt.Sprintf("", action, c.name()) } -// Patch returns a Patch with all the file changes in chunks. This -// representation can be used to create several diff outputs. -func (c *Change) Patch(codecvt bool) (*Patch, error) { - return c.PatchContext(context.Background(), codecvt) -} - -// Patch returns a Patch with all the file changes in chunks. This -// representation can be used to create several diff outputs. -// If context expires, an non-nil error will be returned -// Provided context must be non-nil -func (c *Change) PatchContext(ctx context.Context, codecvt bool) (*Patch, error) { - return getPatchContext(ctx, "", codecvt, c) -} - func (c *Change) name() string { if !c.From.Equal(&empty) { return c.From.Name @@ -109,10 +103,6 @@ type ChangeEntry struct { TreeEntry TreeEntry } -func (e *ChangeEntry) IsFragments() bool { - return e.TreeEntry.IsFragments() -} - func (e *ChangeEntry) Equal(o *ChangeEntry) bool { return e.Name == o.Name && e.Tree.Equal(o.Tree) && e.TreeEntry.Equal(&o.TreeEntry) } @@ -148,16 +138,12 @@ func (c Changes) String() string { return buffer.String() } -// Patch returns a Patch with all the changes in chunks. This -// representation can be used to create several diff outputs. -func (c Changes) Patch(codecvt bool) (*Patch, error) { - return c.PatchContext(context.Background(), codecvt) +func (c Changes) Stats(ctx context.Context, opts *PatchOptions) (FileStats, error) { + return getStatsContext(ctx, opts, c...) } // Patch returns a Patch with all the changes in chunks. This // representation can be used to create several diff outputs. -// If context expires, an non-nil error will be returned -// Provided context must be non-nil -func (c Changes) PatchContext(ctx context.Context, codecvt bool) (*Patch, error) { - return getPatchContext(ctx, "", codecvt, c...) +func (c Changes) Patch(ctx context.Context, opts *PatchOptions) ([]*diferenco.Unified, error) { + return getPatchContext(ctx, opts, c...) } diff --git a/modules/zeta/object/change_adaptor.go b/modules/zeta/object/change_adaptor.go index 935fd75..24427ee 100644 --- a/modules/zeta/object/change_adaptor.go +++ b/modules/zeta/object/change_adaptor.go @@ -44,7 +44,7 @@ func newChangeEntry(p noder.Path) (ChangeEntry, error) { TreeEntry: TreeEntry{ Name: asTreeNoder.name, Size: asTreeNoder.size, - Mode: asTreeNoder.Mode(), + Mode: asTreeNoder.TrueMode(), Hash: asTreeNoder.HashRaw(), }, }, nil diff --git a/modules/zeta/object/commit.go b/modules/zeta/object/commit.go index c3bec06..1f1b42d 100644 --- a/modules/zeta/object/commit.go +++ b/modules/zeta/object/commit.go @@ -313,31 +313,9 @@ func (c *Commit) Root(ctx context.Context) (*Tree, error) { return resolveTree(ctx, c.b, c.Tree) } -// PatchContext returns the Patch between the actual commit and the provided one. -// Error will be return if context expires. Provided context must be non-nil. -// -// NOTE: Since version 5.1.0 the renames are correctly handled, the settings -// used are the recommended options DefaultDiffTreeOptions. -func (c *Commit) PatchContext(ctx context.Context, to *Commit, m noder.Matcher, codecvt bool) (*Patch, error) { - fromTree, err := c.Root(ctx) - if err != nil { - return nil, err - } - - var toTree *Tree - if to != nil { - toTree, err = to.Root(ctx) - if err != nil { - return nil, err - } - } - - return fromTree.PatchContext(ctx, toTree, m, codecvt) -} - // StatsContext returns the stats of a commit. Error will be return if context // expires. Provided context must be non-nil. -func (c *Commit) StatsContext(ctx context.Context, m noder.Matcher, codecvt bool) (FileStats, error) { +func (c *Commit) StatsContext(ctx context.Context, m noder.Matcher, opts *PatchOptions) (FileStats, error) { from, err := c.Root(ctx) if err != nil { return nil, err @@ -355,13 +333,7 @@ func (c *Commit) StatsContext(ctx context.Context, m noder.Matcher, codecvt bool return nil, err } } - - patch, err := to.PatchContext(ctx, from, m, codecvt) - if err != nil { - return nil, err - } - - return getFileStatsFromFilePatches(patch.FilePatches()), nil + return to.StatsContext(ctx, from, m, opts) } // CommitIter is a generic closable interface for iterating over commits. diff --git a/modules/zeta/object/file.go b/modules/zeta/object/file.go index 79111da..6856461 100644 --- a/modules/zeta/object/file.go +++ b/modules/zeta/object/file.go @@ -8,6 +8,7 @@ import ( "context" "io" + "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/plumbing/filemode" "github.com/antgroup/hugescm/modules/streamio" @@ -35,6 +36,20 @@ type readCloser struct { io.Closer } +func (f *File) IsFragments() bool { + if f == nil { + return false + } + return f.Mode.IsFragments() +} + +func (f *File) asFile() *diferenco.File { + if f == nil { + return nil + } + return &diferenco.File{Name: f.Name, Hash: f.Hash.String(), Mode: uint32(f.Mode.Origin())} +} + // OriginReader return ReadCloser func (f *File) OriginReader(ctx context.Context) (io.ReadCloser, int64, error) { if f.b == nil { diff --git a/modules/zeta/object/patch.go b/modules/zeta/object/patch.go index 487d156..e512be5 100644 --- a/modules/zeta/object/patch.go +++ b/modules/zeta/object/patch.go @@ -4,7 +4,6 @@ package object import ( - "bytes" "context" "errors" "fmt" @@ -12,270 +11,158 @@ import ( "strconv" "strings" - "github.com/antgroup/hugescm/modules/diff" - dmp "github.com/antgroup/hugescm/modules/diffmatchpatch" + "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/plumbing" - "github.com/antgroup/hugescm/modules/plumbing/filemode" - fdiff "github.com/antgroup/hugescm/modules/plumbing/format/diff" ) var ( ErrCanceled = errors.New("operation canceled") ) -func getPatchContext(ctx context.Context, message string, codecvt bool, changes ...*Change) (*Patch, error) { - var filePatches []fdiff.FilePatch - for _, c := range changes { - select { - case <-ctx.Done(): - return nil, ErrCanceled - default: - } - - fp, err := filePatchWithContext(ctx, codecvt, c) - if err != nil { - return nil, err - } - - filePatches = append(filePatches, fp) - } - return &Patch{message, filePatches}, nil +type PatchOptions struct { + Algorithm diferenco.Algorithm + Textconv bool + Match func(string) bool } func sizeOverflow(f *File) bool { return f != nil && f.Size > MAX_DIFF_SIZE } -func filePatchWithContext(ctx context.Context, codecvt bool, c *Change) (fdiff.FilePatch, error) { - if c.From.IsFragments() || c.To.IsFragments() { - return &textFilePatch{from: c.From, to: c.To, fragments: true}, nil +func fileStatName(from, to *File) string { + if from == nil { + // New File is created. + return to.Name } + if to == nil { + // File is deleted. + return from.Name + } + if from.Name != to.Name { + // File is renamed. + return fmt.Sprintf("%s => %s", from.Name, to.Name) + } + return from.Name +} + +func fileStatWithContext(ctx context.Context, opts *PatchOptions, c *Change) (*FileStat, error) { from, to, err := c.Files() if err != nil { return nil, err } + if from == nil && to == nil { + return nil, ErrMalformedChange + } + s := &FileStat{ + Name: fileStatName(from, to), + } + if from.IsFragments() || to.IsFragments() { + return s, nil + } // --- check size limit if sizeOverflow(from) || sizeOverflow(to) { - return &textFilePatch{from: c.From, to: c.To}, nil - } - fromContent, err := from.UnifiedText(ctx, codecvt) - if plumbing.IsNoSuchObject(err) { - return &textFilePatch{from: c.From, to: c.To}, nil + return s, nil } - if err == ErrNotTextContent { - return &textFilePatch{from: c.From, to: c.To}, nil + fromContent, err := from.UnifiedText(ctx, opts.Textconv) + if plumbing.IsNoSuchObject(err) || err == ErrNotTextContent { + return s, nil } if err != nil { return nil, err } - toContent, err := to.UnifiedText(ctx, codecvt) - if plumbing.IsNoSuchObject(err) { - return &textFilePatch{from: c.From, to: c.To}, nil + toContent, err := to.UnifiedText(ctx, opts.Textconv) + if plumbing.IsNoSuchObject(err) || err == ErrNotTextContent { + return s, nil } - if err == ErrNotTextContent { - return &textFilePatch{from: c.From, to: c.To}, nil + if err != nil { + return nil, err } + stat, err := diferenco.Stat(ctx, &diferenco.Options{S1: fromContent, S2: toContent, A: opts.Algorithm}) if err != nil { return nil, err } + s.Addition = stat.Addition + s.Deletion = stat.Deletion + return s, nil +} - diffs, err := diff.Do(fromContent, toContent) - if err != nil { - return &textFilePatch{from: c.From, to: c.To}, nil +func getStatsContext(ctx context.Context, opts *PatchOptions, changes ...*Change) ([]FileStat, error) { + if opts.Match == nil { + opts.Match = func(s string) bool { + return true + } } - var chunks []fdiff.Chunk - for _, d := range diffs { - select { - case <-ctx.Done(): - return nil, ErrCanceled - default: + stats := make([]FileStat, 0, 100) + for _, c := range changes { + if !opts.Match(c.name()) { + continue } - - var op fdiff.Operation - switch d.Type { - case dmp.DiffEqual: - op = fdiff.Equal - case dmp.DiffDelete: - op = fdiff.Delete - case dmp.DiffInsert: - op = fdiff.Add + s, err := fileStatWithContext(ctx, opts, c) + if err != nil { + return nil, err } - - chunks = append(chunks, &textChunk{d.Text, op}) + stats = append(stats, *s) } - - return &textFilePatch{ - chunks: chunks, - from: c.From, - to: c.To, - }, nil - -} - -// Patch is an implementation of fdiff.Patch interface -type Patch struct { - message string - filePatches []fdiff.FilePatch + return stats, nil } -func NewPatch(message string, filePatches []fdiff.FilePatch) *Patch { - return &Patch{message: message, filePatches: filePatches} -} - -func (p *Patch) FilePatches() []fdiff.FilePatch { - return p.filePatches -} - -func (p *Patch) Message() string { - return p.message -} - -func (p *Patch) Encode(w io.Writer) error { - e := fdiff.NewUnifiedEncoder(w, fdiff.DefaultContextLines) - - return e.Encode(p) -} - -func (p *Patch) EncodeEx(w io.Writer, useColor bool) error { - e := fdiff.NewUnifiedEncoder(w, fdiff.DefaultContextLines) - if useColor { - e.SetColor(fdiff.NewColorConfig()) +func filePatchWithContext(ctx context.Context, opts *PatchOptions, c *Change) (*diferenco.Unified, error) { + from, to, err := c.Files() + if err != nil { + return nil, err + } + if from == nil && to == nil { + return nil, ErrMalformedChange + } + if from.IsFragments() || to.IsFragments() { + return &diferenco.Unified{From: from.asFile(), To: to.asFile(), IsFragments: true}, nil + } + // --- check size limit + if sizeOverflow(from) || sizeOverflow(to) { + return &diferenco.Unified{From: from.asFile(), To: to.asFile(), IsBinary: true}, nil + } + fromContent, err := from.UnifiedText(ctx, opts.Textconv) + if plumbing.IsNoSuchObject(err) || err == ErrNotTextContent { + return &diferenco.Unified{From: from.asFile(), To: to.asFile(), IsBinary: true}, nil } - return e.Encode(p) -} - -func (p *Patch) Stats() FileStats { - return getFileStatsFromFilePatches(p.FilePatches()) -} - -func (p *Patch) String() string { - buf := bytes.NewBuffer(nil) - err := p.Encode(buf) if err != nil { - return fmt.Sprintf("malformed patch: %s", err.Error()) + return nil, err } - - return buf.String() -} - -// changeEntryWrapper is an implementation of fdiff.File interface -type changeEntryWrapper struct { - ce ChangeEntry -} - -func (f *changeEntryWrapper) Hash() plumbing.Hash { - if !f.ce.TreeEntry.Mode.IsFile() { - return plumbing.ZeroHash + toContent, err := to.UnifiedText(ctx, opts.Textconv) + if plumbing.IsNoSuchObject(err) || err == ErrNotTextContent { + return &diferenco.Unified{From: from.asFile(), To: to.asFile(), IsBinary: true}, nil } - - return f.ce.TreeEntry.Hash -} - -func (f *changeEntryWrapper) Mode() filemode.FileMode { - return f.ce.TreeEntry.Mode.Origin() -} -func (f *changeEntryWrapper) Path() string { - if !f.ce.TreeEntry.Mode.IsFile() { - return "" + if err != nil { + return nil, err } - - return f.ce.Name -} - -func (f *changeEntryWrapper) Empty() bool { - return !f.ce.TreeEntry.Mode.IsFile() -} - -// textFilePatch is an implementation of fdiff.FilePatch interface -type textFilePatch struct { - chunks []fdiff.Chunk - from, to ChangeEntry - fragments bool -} - -func NewTextFilePatch(chunks []fdiff.Chunk, from, to ChangeEntry, fragments bool) fdiff.FilePatch { - return &textFilePatch{chunks: chunks, from: from, to: to, fragments: fragments} + return diferenco.DoUnified(ctx, &diferenco.Options{From: from.asFile(), To: to.asFile(), S1: fromContent, S2: toContent, A: opts.Algorithm}) } -func (tf *textFilePatch) Files() (from fdiff.File, to fdiff.File) { - f := &changeEntryWrapper{tf.from} - t := &changeEntryWrapper{tf.to} - - if !f.Empty() { - from = f +func getPatchContext(ctx context.Context, opts *PatchOptions, changes ...*Change) ([]*diferenco.Unified, error) { + if opts.Match == nil { + opts.Match = func(s string) bool { + return true + } } - - if !t.Empty() { - to = t + patch := make([]*diferenco.Unified, 0, len(changes)) + for _, c := range changes { + if !opts.Match(c.name()) { + continue + } + p, err := filePatchWithContext(ctx, opts, c) + if err != nil { + return nil, err + } + patch = append(patch, p) } - - return -} - -func (tf *textFilePatch) IsFragments() bool { - return tf.fragments -} - -func (tf *textFilePatch) IsBinary() bool { - return len(tf.chunks) == 0 -} - -func (tf *textFilePatch) Chunks() []fdiff.Chunk { - return tf.chunks -} - -type filePatchWrapper struct { - chunks []fdiff.Chunk - from, to fdiff.File - fragments bool -} - -func (f *filePatchWrapper) Files() (from fdiff.File, to fdiff.File) { - from = f.from - to = f.to - - return -} - -func (f *filePatchWrapper) IsFragments() bool { - return f.fragments -} - -func (f *filePatchWrapper) IsBinary() bool { - return len(f.chunks) == 0 -} - -func (f *filePatchWrapper) Chunks() []fdiff.Chunk { - return f.chunks -} - -func NewFilePatchWrapper(chunks []fdiff.Chunk, from, to fdiff.File, fragments bool) fdiff.FilePatch { - return &filePatchWrapper{chunks: chunks, from: from, to: to, fragments: fragments} -} - -// textChunk is an implementation of fdiff.Chunk interface -type textChunk struct { - content string - op fdiff.Operation -} - -func (t *textChunk) Content() string { - return t.content -} - -func (t *textChunk) Type() fdiff.Operation { - return t.op -} - -func NewTextChunk(content string, op fdiff.Operation) fdiff.Chunk { - return &textChunk{content: content, op: op} + return patch, nil } // FileStat stores the status of changes in content of a file. type FileStat struct { - Name string - Addition int - Deletion int + Name string `json:"name"` + Addition int `json:"addition"` + Deletion int `json:"deletion"` } func (fs FileStat) String() string { @@ -344,53 +231,3 @@ func StatsWriteTo(w io.Writer, fileStats []FileStat, color bool) { _, _ = fmt.Fprintf(w, " %s%s | %s%d %s%s\n", fs.Name, namePad, changePad, total, adds, dels) } } - -func getFileStatsFromFilePatches(filePatches []fdiff.FilePatch) FileStats { - var fileStats FileStats - - for _, fp := range filePatches { - // ignore empty patches (binary files, submodule refs updates) - if len(fp.Chunks()) == 0 { - continue - } - - cs := FileStat{} - from, to := fp.Files() - if from == nil { - // New File is created. - cs.Name = to.Path() - } else if to == nil { - // File is deleted. - cs.Name = from.Path() - } else if from.Path() != to.Path() { - // File is renamed. - cs.Name = fmt.Sprintf("%s => %s", from.Path(), to.Path()) - } else { - cs.Name = from.Path() - } - - for _, chunk := range fp.Chunks() { - s := chunk.Content() - if len(s) == 0 { - continue - } - - switch chunk.Type() { - case fdiff.Add: - cs.Addition += strings.Count(s, "\n") - if s[len(s)-1] != '\n' { - cs.Addition++ - } - case fdiff.Delete: - cs.Deletion += strings.Count(s, "\n") - if s[len(s)-1] != '\n' { - cs.Deletion++ - } - } - } - - fileStats = append(fileStats, cs) - } - - return fileStats -} diff --git a/modules/zeta/object/tree.go b/modules/zeta/object/tree.go index 4ec4f75..3e6337a 100644 --- a/modules/zeta/object/tree.go +++ b/modules/zeta/object/tree.go @@ -585,26 +585,13 @@ func (t *Tree) DiffContext(ctx context.Context, to *Tree, m noder.Matcher) (Chan return DiffTreeWithOptions(ctx, t, to, DefaultDiffTreeOptions, m) } -// Patch returns a slice of Patch objects with all the changes between trees -// in chunks. This representation can be used to create several diff outputs. -func (t *Tree) Patch(to *Tree, m noder.Matcher, codecvt bool) (*Patch, error) { - return t.PatchContext(context.Background(), to, m, codecvt) -} - -// PatchContext returns a slice of Patch objects with all the changes between -// trees in chunks. This representation can be used to create several diff -// outputs. If context expires, an error will be returned. Provided context must -// be non-nil. -// -// NOTE: Since version 5.1.0 the renames are correctly handled, the settings -// used are the recommended options DefaultDiffTreeOptions. -func (t *Tree) PatchContext(ctx context.Context, to *Tree, m noder.Matcher, codecvt bool) (*Patch, error) { +// StatsContext: stats +func (t *Tree) StatsContext(ctx context.Context, to *Tree, m noder.Matcher, opts *PatchOptions) (FileStats, error) { changes, err := t.DiffContext(ctx, to, m) if err != nil { return nil, err } - - return changes.PatchContext(ctx, codecvt) + return changes.Stats(ctx, opts) } // treeEntryIter facilitates iterating through the TreeEntry objects in a Tree. diff --git a/pkg/command/command_diff.go b/pkg/command/command_diff.go index d2a989f..5aacf81 100644 --- a/pkg/command/command_diff.go +++ b/pkg/command/command_diff.go @@ -11,6 +11,7 @@ import ( "path/filepath" "strings" + "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/pkg/zeta" ) @@ -26,10 +27,15 @@ type Diff struct { Textconv bool `name:"textconv" help:"Convert text to Unicode and compare differences"` MergeBase string `name:"merge-base" help:"If --merge-base is given, use the common ancestor of and HEAD instead"` Output string `name:"output" help:"Output to a specific file instead of stdout" placeholder:""` + Histogram bool `name:"histogram" help:"Generate a diff using the \"Histogram diff\" algorithm"` + ONP bool `name:"onp" help:"Generate a diff using the \"O(NP) diff\" algorithm"` + Myers bool `name:"myers" help:"Generate a diff using the \"Myers diff\" algorithm"` + Patience bool `name:"patience" help:"Generate a diff using the \"Patience diff\" algorithm"` + Minimal bool `name:"minimal" help:"Spend extra time to make sure the smallest possible diff is produced"` + DiffAlgorithm string `name:"diff-algorithm" help:"Choose a diff algorithm, supported: histogram|onp|myers|patience|minimal"` From string `arg:"" optional:"" name:"from" help:"Revision from"` To string `arg:"" optional:"" name:"to" help:"Revision to"` passthroughArgs []string `kong:"-"` - useColor bool `kong:"-"` } const ( @@ -55,8 +61,47 @@ func (c *Diff) Passthrough(paths []string) { c.passthroughArgs = append(c.passthroughArgs, paths...) } -func (c *Diff) NewOptions() *zeta.DiffContextOptions { - opts := &zeta.DiffContextOptions{ +var ( + diffAlgorithms = map[string]diferenco.Algorithm{ + "histogram": diferenco.Histogram, + "onp": diferenco.ONP, + "myers": diferenco.Myers, + "patience": diferenco.Patience, + "minimal": diferenco.Minimal, + } +) + +func (c *Diff) checkAlgorithm() (diferenco.Algorithm, error) { + if len(c.DiffAlgorithm) != 0 { + if a, ok := diffAlgorithms[c.DiffAlgorithm]; ok { + return a, nil + } + return diferenco.Unspecified, fmt.Errorf("unsupport algorithms %s'", c.DiffAlgorithm) + } + if c.Histogram { + return diferenco.Histogram, nil + } + if c.ONP { + return diferenco.ONP, nil + } + if c.Myers { + return diferenco.Myers, nil + } + if c.Patience { + return diferenco.Patience, nil + } + if c.Minimal { + return diferenco.Minimal, nil + } + return diferenco.Unspecified, nil +} + +func (c *Diff) NewOptions() (*zeta.DiffOptions, error) { + a, err := c.checkAlgorithm() + if err != nil { + return nil, err + } + opts := &zeta.DiffOptions{ NameOnly: c.NameOnly, NameStatus: c.NameStatus, NumStat: c.Numstat, @@ -68,36 +113,36 @@ func (c *Diff) NewOptions() *zeta.DiffContextOptions { From: c.From, To: c.To, MergeBase: c.MergeBase, - UseColor: c.useColor, Textconv: c.Textconv, + Algorithm: a, + NewOutput: c.NewOutput, } if len(c.To) == 0 { if from, to, ok := strings.Cut(c.From, "..."); ok { opts.From = from opts.To = to - opts.ThreeWayCompare = true - return opts + opts.W3 = true + return opts, nil } if from, to, ok := strings.Cut(c.From, ".."); ok { opts.From = from opts.To = to - return opts + return opts, nil } } - return opts + return opts, nil } -func (c *Diff) NewOut(ctx context.Context) (io.WriteCloser, error) { +func (c *Diff) NewOutput(ctx context.Context) (io.WriteCloser, bool, error) { if len(c.Output) != 0 { if err := os.MkdirAll(filepath.Dir(c.Output), 0755); err != nil { - return nil, err + return nil, false, err } fd, err := os.Create(c.Output) - return fd, err + return fd, false, err } printer := zeta.NewPrinter(ctx) - c.useColor = printer.UseColor() - return printer, nil + return printer, printer.UseColor(), nil } func (c *Diff) Run(g *Globals) error { @@ -111,16 +156,13 @@ func (c *Diff) Run(g *Globals) error { } defer r.Close() w := r.Worktree() - newCtx, cancelCtx := context.WithCancelCause(context.Background()) - defer cancelCtx(nil) - out, err := c.NewOut(newCtx) + opts, err := c.NewOptions() if err != nil { - fmt.Fprintf(os.Stderr, "new output file error: %v\n", err) + fmt.Fprintf(os.Stderr, "parse options error: %v\n", err) return err } - defer out.Close() - if err = w.DiffContext(newCtx, c.NewOptions(), out); err != nil { - cancelCtx(err) + if err = w.DiffContext(context.Background(), opts); err != nil { + return err } - return err + return nil } diff --git a/pkg/tr/languages/zh-CN.toml b/pkg/tr/languages/zh-CN.toml index 366cfcb..d5009d0 100644 --- a/pkg/tr/languages/zh-CN.toml +++ b/pkg/tr/languages/zh-CN.toml @@ -270,6 +270,12 @@ "If --merge-base is given, use the common ancestor of and HEAD instead" = "如果给定 --merge-base,则使用 与 HEAD 的共同祖先" "Convert text to Unicode and compare differences" = "将文本转变为 Unicode 然后再比较差异" "Output to a specific file instead of stdout" = "输出到特定文件而不是 stdout" +"Generate a diff using the \"Histogram diff\" algorithm" = "使用 \"Histogram diff\" 算法生成差异" +"Generate a diff using the \"O(NP) diff\" algorithm" = "使用 \"O(NP) diff\" 算法生成差异" +"Generate a diff using the \"Myers diff\" algorithm" = "使用 \"Myers diff\" 算法生成差异" +"Generate a diff using the \"Patience diff\" algorithm" = "使用 \"Patience diff\" 算法生成差异" +"Choose a diff algorithm, supported: histogram|onp|myers|patience|minimal" = "选择一个 diff 算法,支持:histogram|onp|myers|patience|minimal" +"Spend extra time to make sure the smallest possible diff is produced" = "花费额外的时间来确保产生尽可能最小的差异" # RM "Remove files from the working tree and from the index" = "从工作树和索引中删除文件" "Override the up-to-date check" = "忽略文件更新状态检查" diff --git a/pkg/zeta/options.go b/pkg/zeta/options.go index 25f303b..f6cef89 100644 --- a/pkg/zeta/options.go +++ b/pkg/zeta/options.go @@ -4,22 +4,15 @@ package zeta import ( - "context" "errors" - "fmt" - "io" - "math" "os" "regexp" "strconv" "strings" - "syscall" "time" "github.com/ProtonMail/go-crypto/openpgp" - "github.com/antgroup/hugescm/modules/merkletrie" "github.com/antgroup/hugescm/modules/plumbing" - fdiff "github.com/antgroup/hugescm/modules/plumbing/format/diff" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/modules/zeta/object" ) @@ -461,189 +454,3 @@ func (o *GrepOptions) validate(r *Repository) error { return nil } - -type DiffContextOptions struct { - NameOnly bool - NameStatus bool // name status - NumStat bool - Stat bool - ShortStat bool - Staged bool - NewLine byte - MergeBase string - From string - To string - PathSpec []string - Textconv bool - UseColor bool - ThreeWayCompare bool -} - -func (opts *DiffContextOptions) PatchContext(ctx context.Context, cs object.Changes) (*object.Patch, error) { - return cs.PatchContext(ctx, opts.Textconv) -} - -func (opts *DiffContextOptions) formatChanges(changes merkletrie.Changes, w io.Writer) error { - m := NewMatcher(opts.PathSpec) - if opts.NameOnly { - for _, c := range changes { - name := nameFromAction(&c) - if !m.Match(name) { - continue - } - fmt.Fprintf(w, "%s%c", name, opts.NewLine) - } - return nil - } - for _, c := range changes { - name := nameFromAction(&c) - if !m.Match(name) { - continue - } - a, err := c.Action() - if err != nil { - return err - } - fmt.Fprintf(w, "%c %s%c", a.Byte(), name, opts.NewLine) - } - return nil -} - -func numPadding(i int, padding int) string { - s := strconv.Itoa(i) - if len(s) >= padding { - return s - } - return s + strings.Repeat(" ", padding-len(s)) -} - -func numPaddingLeft(i int, padding int) string { - s := strconv.Itoa(i) - if len(s) >= padding { - return s - } - return strings.Repeat(" ", padding-len(s)) + s -} - -func (opts *DiffContextOptions) formatStats(w io.Writer, stats object.FileStats) error { - var added, deleted int - var nameLen, modified int - for _, s := range stats { - added += s.Addition - deleted += s.Deletion - nameLen = max(nameLen, len(s.Name)) - modified = max(modified, s.Addition+s.Deletion) - } - scaleFactor := 1.0 - sizePadding := len(strconv.Itoa(modified)) - for _, fs := range stats { - addn := float64(fs.Addition) - deln := float64(fs.Deletion) - addc := int(math.Floor(addn / scaleFactor)) - delc := int(math.Floor(deln / scaleFactor)) - if addc < 0 { - addc = 0 - } - if delc < 0 { - delc = 0 - } - adds := strings.Repeat("+", addc) - dels := strings.Repeat("-", delc) - fmt.Fprintf(w, "%s%s | %s %s%s%c", fs.Name, strings.Repeat(" ", nameLen-len(fs.Name)), numPaddingLeft(fs.Addition+fs.Deletion, sizePadding), adds, dels, opts.NewLine) - } - return nil -} - -func IsErrUnexpectedEOF(err error) bool { - return errors.Is(err, syscall.EPIPE) || err == io.ErrClosedPipe || err == io.ErrUnexpectedEOF -} - -func (opts *DiffContextOptions) format(patch *object.Patch, w io.Writer) error { - if opts.ShortStat { - stats := patch.Stats() - var added, deleted int - for _, s := range stats { - added += s.Addition - deleted += s.Deletion - } - fmt.Fprintf(w, " %d files changed, %d insertions(+), %d deletions(-)%c", len(stats), added, deleted, opts.NewLine) - return nil - } - if opts.NumStat { - stats := patch.Stats() - var ma, md int - for _, s := range stats { - ma = max(ma, s.Addition) - md = max(md, s.Deletion) - } - addPadding := len(strconv.Itoa(ma)) + 4 - deletePadding := len(strconv.Itoa(md)) + 4 - for _, s := range stats { - fmt.Fprintf(w, "%s %s %s%c", numPadding(s.Addition, addPadding), numPadding(s.Deletion, deletePadding), s.Name, opts.NewLine) - } - return nil - } - if opts.Stat { - stats := patch.Stats() - return opts.formatStats(w, stats) - } - if err := patch.EncodeEx(w, opts.UseColor); err != nil && !IsErrUnexpectedEOF(err) { - return err - } - return nil -} - -func nameFromFilePacth(p fdiff.FilePatch) (name string) { - a, b := p.Files() - if a != nil { - name = a.Path() - } - if len(name) == 0 && b != nil { - name = b.Path() - } - return name -} - -func statFormFilePatch(p fdiff.FilePatch) byte { - a, b := p.Files() - if a == nil { - return 'A' - } - if b == nil { - return 'D' - } - return 'M' -} - -func (opts *DiffContextOptions) formatEx(patch *object.Patch, w io.Writer) error { - m := NewMatcher(opts.PathSpec) - if opts.NameOnly { - for _, p := range patch.FilePatches() { - name := nameFromFilePacth(p) - if !m.Match(name) { - continue - } - fmt.Fprintf(w, "%s%c", name, opts.NewLine) - } - return nil - } - if opts.NameStatus { - for _, p := range patch.FilePatches() { - name := nameFromFilePacth(p) - if !m.Match(name) { - continue - } - fmt.Fprintf(w, "%c %s%c", statFormFilePatch(p), name, opts.NewLine) - } - return nil - } - var patchs []fdiff.FilePatch - for _, p := range patch.FilePatches() { - name := nameFromFilePacth(p) - if !m.Match(name) { - continue - } - patchs = append(patchs, p) - } - return opts.format(object.NewPatch(patch.Message(), patchs), w) -} diff --git a/pkg/zeta/showdiff.go b/pkg/zeta/showdiff.go new file mode 100644 index 0000000..a465b74 --- /dev/null +++ b/pkg/zeta/showdiff.go @@ -0,0 +1,245 @@ +package zeta + +import ( + "context" + "fmt" + "io" + "math" + "strconv" + "strings" + + "github.com/antgroup/hugescm/modules/diferenco" + "github.com/antgroup/hugescm/modules/diferenco/color" + "github.com/antgroup/hugescm/modules/merkletrie" + "github.com/antgroup/hugescm/modules/zeta/object" +) + +type DiffOptions struct { + NameOnly bool + NameStatus bool // name status + NumStat bool + Stat bool + ShortStat bool + Staged bool + NewLine byte + MergeBase string + From string + To string + PathSpec []string + Textconv bool + UseColor bool + W3 bool + Algorithm diferenco.Algorithm + NewOutput func(context.Context) (io.WriteCloser, bool, error) // new writer func +} + +func (opts *DiffOptions) po() *object.PatchOptions { + m := NewMatcher(opts.PathSpec) + return &object.PatchOptions{Textconv: opts.Textconv, Algorithm: opts.Algorithm, Match: m.Match} +} + +func (opts *DiffOptions) ShowChanges(ctx context.Context, changes object.Changes) error { + if opts.NameOnly { + return opts.showNameOnly(ctx, changes) + } + if opts.NameStatus { + return opts.showNameStatus(ctx, changes) + } + if opts.showStatsOnly() { + fileStats, err := changes.Stats(ctx, opts.po()) + if err != nil { + return err + } + return opts.showStats(ctx, fileStats) + } + patch, err := changes.Patch(ctx, opts.po()) + if err != nil { + return err + } + return opts.showPatch(ctx, patch) +} + +func (opts *DiffOptions) showNameOnly(ctx context.Context, changes object.Changes) error { + w, _, err := opts.NewOutput(ctx) + if err != nil { + return err + } + defer w.Close() + m := NewMatcher(opts.PathSpec) + for _, c := range changes { + name := c.Name() + if !m.Match(name) { + continue + } + fmt.Fprintf(w, "%s%c", name, opts.NewLine) + } + return nil +} + +func changeStat(c *object.Change) (string, byte) { + action, err := c.Action() + if err != nil { + return "", ' ' + } + switch action { + case merkletrie.Insert: + return c.To.Name, 'A' + case merkletrie.Delete: + return c.From.Name, 'D' + case merkletrie.Modify: + if c.From.Name != c.To.Name { + return c.From.Name, 'R' + } + return c.From.Name, 'M' + } + return "", ' ' +} + +func (opts *DiffOptions) showNameStatus(ctx context.Context, changes object.Changes) error { + w, _, err := opts.NewOutput(ctx) + if err != nil { + return err + } + defer w.Close() + m := NewMatcher(opts.PathSpec) + for _, c := range changes { + name, stat := changeStat(c) + if !m.Match(name) { + continue + } + fmt.Fprintf(w, "%c %s%c", stat, name, opts.NewLine) + } + return nil +} + +func (opts *DiffOptions) showStatsOnly() bool { + return opts.NameStatus || opts.NumStat || opts.ShortStat || opts.Stat +} + +func numPadding(i int, padding int) string { + s := strconv.Itoa(i) + if len(s) >= padding { + return s + } + return s + strings.Repeat(" ", padding-len(s)) +} + +func numPaddingLeft(i int, padding int) string { + s := strconv.Itoa(i) + if len(s) >= padding { + return s + } + return strings.Repeat(" ", padding-len(s)) + s +} + +// showStats: show stats +// +// Original implementation: https://github.com/git/git/blob/1a87c842ece327d03d08096395969aca5e0a6996/diff.c#L2615 +// Parts of the output: +// |<+++/---> +// example: " main.go | 10 +++++++--- " +func (opts *DiffOptions) showStats(ctx context.Context, fileStats object.FileStats) error { + w, useColor, err := opts.NewOutput(ctx) + if err != nil { + return err + } + defer w.Close() + if opts.ShortStat { + var added, deleted int + for _, s := range fileStats { + added += s.Addition + deleted += s.Deletion + } + fmt.Fprintf(w, " %d files changed, %d insertions(+), %d deletions(-)%c", len(fileStats), added, deleted, opts.NewLine) + return nil + } + if opts.NumStat { + var ma, md int + for _, s := range fileStats { + ma = max(ma, s.Addition) + md = max(md, s.Deletion) + } + addPadding := len(strconv.Itoa(ma)) + 4 + deletePadding := len(strconv.Itoa(md)) + 4 + for _, s := range fileStats { + fmt.Fprintf(w, "%s %s %s%c", numPadding(s.Addition, addPadding), numPadding(s.Deletion, deletePadding), s.Name, opts.NewLine) + } + return nil + } + var added, deleted int + var nameLen, modified int + for _, s := range fileStats { + added += s.Addition + deleted += s.Deletion + nameLen = max(nameLen, len(s.Name)) + modified = max(modified, s.Addition+s.Deletion) + } + scaleFactor := 1.0 + sizePadding := len(strconv.Itoa(modified)) + for _, fs := range fileStats { + addn := float64(fs.Addition) + deln := float64(fs.Deletion) + addc := int(math.Floor(addn / scaleFactor)) + delc := int(math.Floor(deln / scaleFactor)) + if addc < 0 { + addc = 0 + } + if delc < 0 { + delc = 0 + } + adds := strings.Repeat("+", addc) + dels := strings.Repeat("-", delc) + if useColor { + _, _ = fmt.Fprintf(w, " %s%s | %s \x1b[32m%s\x1b[31m%s\x1b[0m\n", fs.Name, strings.Repeat(" ", nameLen-len(fs.Name)), numPaddingLeft(fs.Addition+fs.Deletion, sizePadding), adds, dels) + continue + } + fmt.Fprintf(w, "%s%s | %s %s%s%c", fs.Name, strings.Repeat(" ", nameLen-len(fs.Name)), numPaddingLeft(fs.Addition+fs.Deletion, sizePadding), adds, dels, opts.NewLine) + } + fmt.Fprintf(w, " %d files changed, %d insertions(+), %d deletions(-)%c", len(fileStats), added, deleted, opts.NewLine) + return nil +} + +func (opts *DiffOptions) showPatch(ctx context.Context, patch []*diferenco.Unified) error { + w, useColor, err := opts.NewOutput(ctx) + if err != nil { + return err + } + defer w.Close() + e := diferenco.NewUnifiedEncoder(w) + if useColor { + e.SetColor(color.NewColorConfig()) + } + _ = e.Encode(patch) + return nil +} + +func (opts *DiffOptions) showChangesStatus(ctx context.Context, changes merkletrie.Changes) error { + w, _, err := opts.NewOutput(ctx) + if err != nil { + return err + } + defer w.Close() + m := NewMatcher(opts.PathSpec) + if opts.NameOnly { + for _, c := range changes { + name := nameFromAction(&c) + if !m.Match(name) { + continue + } + fmt.Fprintf(w, "%s%c", name, opts.NewLine) + } + return nil + } + for _, c := range changes { + name := nameFromAction(&c) + if !m.Match(name) { + continue + } + a, err := c.Action() + if err != nil { + return err + } + fmt.Fprintf(w, "%c %s%c", a.Byte(), name, opts.NewLine) + } + return nil +} diff --git a/pkg/zeta/worktree_commit.go b/pkg/zeta/worktree_commit.go index 11da56d..8e72a3f 100644 --- a/pkg/zeta/worktree_commit.go +++ b/pkg/zeta/worktree_commit.go @@ -293,7 +293,7 @@ func (w *Worktree) Stats(ctx context.Context) error { die_error("open HEAD: %v", err) return err } - stats, err := cc.StatsContext(ctx, noder.NewSparseTreeMatcher(w.Core.SparseDirs), false) + stats, err := cc.StatsContext(ctx, noder.NewSparseTreeMatcher(w.Core.SparseDirs), &object.PatchOptions{}) if plumbing.IsNoSuchObject(err) { fmt.Fprintf(os.Stderr, "incomplete checkout, skipping change line count statistics\n") return nil diff --git a/pkg/zeta/worktree_diff.go b/pkg/zeta/worktree_diff.go index ed6b058..66603b0 100644 --- a/pkg/zeta/worktree_diff.go +++ b/pkg/zeta/worktree_diff.go @@ -7,43 +7,17 @@ import ( "context" "errors" "fmt" - "io" "os" - "github.com/antgroup/hugescm/modules/diff" - dmp "github.com/antgroup/hugescm/modules/diffmatchpatch" + "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/merkletrie" "github.com/antgroup/hugescm/modules/merkletrie/filesystem" mindex "github.com/antgroup/hugescm/modules/merkletrie/index" "github.com/antgroup/hugescm/modules/merkletrie/noder" "github.com/antgroup/hugescm/modules/plumbing" - "github.com/antgroup/hugescm/modules/plumbing/filemode" - fdiff "github.com/antgroup/hugescm/modules/plumbing/format/diff" "github.com/antgroup/hugescm/modules/zeta/object" ) -type fileWrapper struct { - name string - hash plumbing.Hash - mode filemode.FileMode -} - -func (f *fileWrapper) Path() string { - return f.name -} - -func (f *fileWrapper) Hash() plumbing.Hash { - return f.hash -} - -func (f *fileWrapper) Mode() filemode.FileMode { - return f.mode -} - -var ( - _ fdiff.File = &fileWrapper{} -) - func (w *Worktree) openText(p string, size int64, textConv bool) (string, error) { fd, err := w.fs.Open(p) if err != nil { @@ -64,49 +38,45 @@ func (w *Worktree) openBlobText(ctx context.Context, oid plumbing.Hash, textConv return content, err } -const ( - diffSizeLimit = 50 * 1024 * 1024 // 50M -) - -func (w *Worktree) resolveContent(ctx context.Context, p noder.Path, textconv bool) (f fdiff.File, content string, fragments bool, bin bool, err error) { +func (w *Worktree) readContent(ctx context.Context, p noder.Path, textConv bool) (f *diferenco.File, content string, fragments bool, bin bool, err error) { if p == nil { return nil, "", false, false, nil } name := p.String() switch a := p.Last().(type) { case *filesystem.Node: - f = &fileWrapper{name: name, hash: a.HashRaw(), mode: a.Mode()} - if a.Size() > diffSizeLimit { + f = &diferenco.File{Name: name, Hash: a.HashRaw().String(), Mode: uint32(a.Mode())} + if a.Size() > object.MAX_DIFF_SIZE { return f, "", false, true, nil } - content, err = w.openText(name, a.Size(), textconv) + content, err = w.openText(name, a.Size(), textConv) if err == object.ErrNotTextContent { return f, "", false, true, nil } return f, content, false, false, nil case *mindex.Node: - f = &fileWrapper{name: name, hash: a.HashRaw(), mode: a.Mode()} + f = &diferenco.File{Name: name, Hash: a.HashRaw().String(), Mode: uint32(a.Mode())} if a.IsFragments() { return f, "", true, false, err } - if a.Size() > diffSizeLimit { + if a.Size() > object.MAX_DIFF_SIZE { return f, "", false, true, nil } - content, err = w.openBlobText(ctx, a.HashRaw(), textconv) + content, err = w.openBlobText(ctx, a.HashRaw(), textConv) // When the current repository uses an incomplete checkout mechanism, we treat these files as binary files, i.e. no differences can be calculated. if err == object.ErrNotTextContent || plumbing.IsNoSuchObject(err) { return f, "", false, true, nil } return f, content, false, false, nil case *object.TreeNoder: - f = &fileWrapper{name: name, hash: a.HashRaw(), mode: a.Mode()} + f = &diferenco.File{Name: name, Hash: a.HashRaw().String(), Mode: uint32(a.Mode())} if a.IsFragments() { return f, "", true, false, err } - if a.Size() > diffSizeLimit { + if a.Size() > object.MAX_DIFF_SIZE { return f, "", false, true, nil } - content, err = w.openBlobText(ctx, a.HashRaw(), textconv) + content, err = w.openBlobText(ctx, a.HashRaw(), textConv) if err == object.ErrNotTextContent || plumbing.IsNoSuchObject(err) { return f, "", false, true, nil } @@ -116,55 +86,96 @@ func (w *Worktree) resolveContent(ctx context.Context, p noder.Path, textconv bo return nil, "", false, false, errors.New("unsupport noder type") } -func (w *Worktree) filePatchWithContext(ctx context.Context, c *merkletrie.Change, textconv bool) (fdiff.FilePatch, error) { +func (w *Worktree) filePatchWithContext(ctx context.Context, c *merkletrie.Change, textconv bool) (*diferenco.Unified, error) { if c.From == nil && c.To == nil { return nil, errors.New("malformed change: nil from and to") } - from, fromContent, isFragmentsA, isBinA, err := w.resolveContent(ctx, c.From, textconv) + from, fromContent, isFragmentsA, isBinA, err := w.readContent(ctx, c.From, textconv) if err != nil { return nil, err } - to, toContent, isFragmentsB, isBinB, err := w.resolveContent(ctx, c.To, textconv) + to, toContent, isFragmentsB, isBinB, err := w.readContent(ctx, c.To, textconv) if err != nil { return nil, err } if isFragmentsA || isFragmentsB { - return object.NewFilePatchWrapper(nil, from, to, true), nil + return &diferenco.Unified{From: from, To: to, IsFragments: true}, nil } if isBinA || isBinB { - return object.NewFilePatchWrapper(nil, from, to, false), nil - } - diffs, err := diff.Do(fromContent, toContent) - if err != nil { - return object.NewFilePatchWrapper(nil, from, to, false), nil + return &diferenco.Unified{From: from, To: to, IsBinary: true}, nil } + return diferenco.DoUnified(ctx, &diferenco.Options{From: from, To: to, S1: fromContent, S2: toContent}) +} - var chunks []fdiff.Chunk - for _, d := range diffs { +// getPatchContext: In the object package, there is no patch implementation for worktree diff, so we need +func (w *Worktree) getPatchContext(ctx context.Context, changes merkletrie.Changes, m *Matcher, textConv bool) ([]*diferenco.Unified, error) { + var filePatches []*diferenco.Unified + for _, c := range changes { select { case <-ctx.Done(): return nil, object.ErrCanceled default: } - - var op fdiff.Operation - switch d.Type { - case dmp.DiffEqual: - op = fdiff.Equal - case dmp.DiffDelete: - op = fdiff.Delete - case dmp.DiffInsert: - op = fdiff.Add + name := nameFromAction(&c) + if !m.Match(name) { + continue + } + p, err := w.filePatchWithContext(ctx, &c, textConv) + if err != nil { + return nil, err } - chunks = append(chunks, object.NewTextChunk(d.Text, op)) + filePatches = append(filePatches, p) } - return object.NewFilePatchWrapper(chunks, from, to, false), nil + return filePatches, nil } -// getPatchContext: In the object package, there is no patch implementation for worktree diff, so we need -func (w *Worktree) getPatchContext(ctx context.Context, changes merkletrie.Changes, m *Matcher, textconv bool) ([]fdiff.FilePatch, error) { - var filePatches []fdiff.FilePatch +func nameFromDifeName(from, to *diferenco.File) string { + if from == nil && to == nil { + return "" + } + if from == nil { + return to.Name + } + if to == nil { + return from.Name + } + if from.Name != to.Name { + return fmt.Sprintf("%s => %s", from.Name, to.Name) + } + return from.Name +} + +func (w *Worktree) fileStatWithContext(ctx context.Context, c *merkletrie.Change, textconv bool) (*object.FileStat, error) { + if c.From == nil && c.To == nil { + return nil, errors.New("malformed change: nil from and to") + } + from, fromContent, isFragmentsA, isBinA, err := w.readContent(ctx, c.From, textconv) + if err != nil { + return nil, err + } + to, toContent, isFragmentsB, isBinB, err := w.readContent(ctx, c.To, textconv) + if err != nil { + return nil, err + } + s := &object.FileStat{Name: nameFromDifeName(from, to)} + if isFragmentsA || isFragmentsB { + return s, nil + } + if isBinA || isBinB { + return s, nil + } + stat, err := diferenco.Stat(ctx, &diferenco.Options{From: from, To: to, S1: fromContent, S2: toContent}) + if err != nil { + return nil, err + } + s.Addition = stat.Addition + s.Deletion = stat.Deletion + return s, nil +} + +func (w *Worktree) getStatsContext(ctx context.Context, changes merkletrie.Changes, m *Matcher, textConv bool) (object.FileStats, error) { + var fileStats []object.FileStat for _, c := range changes { select { case <-ctx.Done(): @@ -175,33 +186,45 @@ func (w *Worktree) getPatchContext(ctx context.Context, changes merkletrie.Chang if !m.Match(name) { continue } - fp, err := w.filePatchWithContext(ctx, &c, textconv) + s, err := w.fileStatWithContext(ctx, &c, textConv) if err != nil { return nil, err } - filePatches = append(filePatches, fp) + fileStats = append(fileStats, *s) } - return filePatches, nil + return fileStats, nil } -func (w *Worktree) diffWorktree(ctx context.Context, opts *DiffContextOptions, writer io.Writer) error { - changes, err := w.diffStagingWithWorktree(ctx, false, true) - if err != nil { - return err - } +func (w *Worktree) showChanges(ctx context.Context, opts *DiffOptions, changes merkletrie.Changes) error { if opts.NameOnly || opts.NameStatus { - return opts.formatChanges(changes, writer) + return opts.showChangesStatus(ctx, changes) } m := NewMatcher(opts.PathSpec) + if opts.showStatsOnly() { + fileStats, err := w.getStatsContext(ctx, changes, m, opts.Textconv) + if err != nil { + return err + } + return opts.showStats(ctx, fileStats) + } + filePatchs, err := w.getPatchContext(ctx, changes, m, opts.Textconv) if err != nil { return err } - return opts.format(object.NewPatch("", filePatchs), writer) + return opts.showPatch(ctx, filePatchs) +} + +func (w *Worktree) diffWorktree(ctx context.Context, opts *DiffOptions) error { + changes, err := w.diffStagingWithWorktree(ctx, false, true) + if err != nil { + return err + } + return w.showChanges(ctx, opts, changes) } -func (w *Worktree) readBaseTree(ctx context.Context, oid plumbing.Hash, opts *DiffContextOptions) (*object.Tree, error) { +func (w *Worktree) readBaseTree(ctx context.Context, oid plumbing.Hash, opts *DiffOptions) (*object.Tree, error) { if len(opts.MergeBase) == 0 { return w.readTree(ctx, oid, "") } @@ -223,7 +246,7 @@ func (w *Worktree) readBaseTree(ctx context.Context, oid plumbing.Hash, opts *Di return bases[0].Root(ctx) } -func (w *Worktree) DiffTreeWithIndex(ctx context.Context, oid plumbing.Hash, opts *DiffContextOptions, writer io.Writer) error { +func (w *Worktree) DiffTreeWithIndex(ctx context.Context, oid plumbing.Hash, opts *DiffOptions) error { tree, err := w.readBaseTree(ctx, oid, opts) if err != nil { return err @@ -232,18 +255,10 @@ func (w *Worktree) DiffTreeWithIndex(ctx context.Context, oid plumbing.Hash, opt if err != nil { return err } - if opts.NameOnly || opts.NameStatus { - return opts.formatChanges(changes, writer) - } - m := NewMatcher(opts.PathSpec) - filePatchs, err := w.getPatchContext(ctx, changes, m, opts.Textconv) - if err != nil { - return err - } - return opts.format(object.NewPatch("", filePatchs), writer) + return w.showChanges(ctx, opts, changes) } -func (w *Worktree) DiffTreeWithWorktree(ctx context.Context, oid plumbing.Hash, opts *DiffContextOptions, writer io.Writer) error { +func (w *Worktree) DiffTreeWithWorktree(ctx context.Context, oid plumbing.Hash, opts *DiffOptions) error { tree, err := w.readBaseTree(ctx, oid, opts) if err != nil { return err @@ -253,19 +268,11 @@ func (w *Worktree) DiffTreeWithWorktree(ctx context.Context, oid plumbing.Hash, return err } changes := w.excludeIgnoredChanges(rawChanges) - if opts.NameOnly || opts.NameStatus { - return opts.formatChanges(changes, writer) - } - m := NewMatcher(opts.PathSpec) - filePatchs, err := w.getPatchContext(ctx, changes, m, opts.Textconv) - if err != nil { - return err - } - return opts.format(object.NewPatch("", filePatchs), writer) + return w.showChanges(ctx, opts, changes) } -func (w *Worktree) resolveBetweenTree(ctx context.Context, opts *DiffContextOptions) (oldTree *object.Tree, newTree *object.Tree, err error) { - if !opts.ThreeWayCompare { +func (w *Worktree) resolveBetweenTree(ctx context.Context, opts *DiffOptions) (oldTree *object.Tree, newTree *object.Tree, err error) { + if !opts.W3 { if oldTree, err = w.parseTreeExhaustive(ctx, opts.From, ""); err != nil { fmt.Fprintf(os.Stderr, "resolve tree: %s error: %v\n", opts.From, err) return @@ -309,7 +316,8 @@ func (w *Worktree) resolveBetweenTree(ctx context.Context, opts *DiffContextOpti return } -func (w *Worktree) between(ctx context.Context, opts *DiffContextOptions, writer io.Writer) error { +func (w *Worktree) between(ctx context.Context, opts *DiffOptions) error { + w.DbgPrint("from %s to %s", opts.From, opts.To) oldTree, newTree, err := w.resolveBetweenTree(ctx, opts) if err != nil { return err @@ -318,23 +326,19 @@ func (w *Worktree) between(ctx context.Context, opts *DiffContextOptions, writer DetectRenames: true, OnlyExactRenames: true, } + w.DbgPrint("oldTree %s newTree %s", oldTree.Hash, newTree.Hash) changes, err := object.DiffTreeWithOptions(ctx, oldTree, newTree, o, noder.NewSparseTreeMatcher(w.Core.SparseDirs)) if err != nil { fmt.Fprintf(os.Stderr, "diff tree error: %v\n", err) return err } - patch, err := opts.PatchContext(ctx, changes) - if err != nil { - die_error("patch %v", err) - return err - } - return opts.formatEx(patch, writer) + return opts.ShowChanges(ctx, changes) } -func (w *Worktree) DiffContext(ctx context.Context, opts *DiffContextOptions, writer io.Writer) error { +func (w *Worktree) DiffContext(ctx context.Context, opts *DiffOptions) error { if len(opts.From) != 0 && len(opts.To) != 0 { - w.DbgPrint("from %s to %s", opts.From, opts.To) - return w.between(ctx, opts, writer) + + return w.between(ctx, opts) } if len(opts.From) != 0 { oid, err := w.Revision(ctx, opts.From) @@ -343,14 +347,14 @@ func (w *Worktree) DiffContext(ctx context.Context, opts *DiffContextOptions, wr return err } if opts.Staged { - if err := w.DiffTreeWithIndex(ctx, oid, opts, writer); err != nil { + if err := w.DiffTreeWithIndex(ctx, oid, opts); err != nil { fmt.Fprintf(os.Stderr, "zeta diff --cached error: %v\n", err) return err } return nil } w.DbgPrint("from %s to worktree", oid) - if err := w.DiffTreeWithWorktree(ctx, oid, opts, writer); err != nil { + if err := w.DiffTreeWithWorktree(ctx, oid, opts); err != nil { fmt.Fprintf(os.Stderr, "zeta diff error: %v\n", err) return err } @@ -362,13 +366,13 @@ func (w *Worktree) DiffContext(ctx context.Context, opts *DiffContextOptions, wr fmt.Fprintf(os.Stderr, "resolve current branch error: %v\n", err) return err } - if err := w.DiffTreeWithIndex(ctx, ref.Hash(), opts, writer); err != nil { + if err := w.DiffTreeWithIndex(ctx, ref.Hash(), opts); err != nil { fmt.Fprintf(os.Stderr, "zeta diff --cached error: %v\n", err) return err } return nil } - if err := w.diffWorktree(ctx, opts, writer); err != nil { + if err := w.diffWorktree(ctx, opts); err != nil { fmt.Fprintf(os.Stderr, "zeta diff error: %v\n", err) return err } diff --git a/pkg/zeta/worktree_merge.go b/pkg/zeta/worktree_merge.go index 843b409..5d8eebc 100644 --- a/pkg/zeta/worktree_merge.go +++ b/pkg/zeta/worktree_merge.go @@ -485,11 +485,10 @@ func (w *Worktree) mergeStat(ctx context.Context, oldRev, newRev plumbing.Hash) die_error("unable diff tree: old %v new %v: %v", oldRev, newRev, err) return err } - patch, err := changes.PatchContext(ctx, false) + stats, err := changes.Stats(ctx, &object.PatchOptions{}) if err != nil { return err } - stats := patch.Stats() var added, deleted int for _, s := range stats { added += s.Addition diff --git a/pkg/zeta/worktree_stash.go b/pkg/zeta/worktree_stash.go index 19f3ca2..8ecc37d 100644 --- a/pkg/zeta/worktree_stash.go +++ b/pkg/zeta/worktree_stash.go @@ -212,7 +212,7 @@ func (w *Worktree) StashShow(ctx context.Context, stashRev string) error { die_error("open HEAD: %v", err) return err } - stats, err := cc.StatsContext(ctx, noder.NewSparseTreeMatcher(w.Core.SparseDirs), false) + stats, err := cc.StatsContext(ctx, noder.NewSparseTreeMatcher(w.Core.SparseDirs), &object.PatchOptions{}) if plumbing.IsNoSuchObject(err) { fmt.Fprintf(os.Stderr, "incomplete checkout, skipping change line count statistics\n") return nil diff --git a/pkg/zeta/worktree_test.go b/pkg/zeta/worktree_test.go index f8e450f..dbde9ec 100644 --- a/pkg/zeta/worktree_test.go +++ b/pkg/zeta/worktree_test.go @@ -308,179 +308,6 @@ func TestGrep(t *testing.T) { } } -func TestPatch(t *testing.T) { - r, err := Open(context.Background(), &OpenOptions{ - Worktree: "/private/tmp/xh5", - }) - if err != nil { - fmt.Fprintf(os.Stderr, "open repo error: %v\n", err) - return - } - defer r.Close() - w := r.Worktree() - oldTree, err := w.odb.Tree(context.TODO(), plumbing.NewHash("341fe34daec03aa84fb1fa5663bca597433a43da09fa93430116737f237cc81a")) - if err != nil { - fmt.Fprintf(os.Stderr, "open tree error: %v\n", err) - return - } - newTree, err := w.odb.Tree(context.TODO(), plumbing.NewHash("7475afa32e8a99c9caffc626d96138c55369c121cc399be68e8da1801724e951")) - if err != nil { - fmt.Fprintf(os.Stderr, "open tree error: %v\n", err) - return - } - cs, err := oldTree.DiffContext(context.Background(), newTree, noder.NewSparseTreeMatcher(r.Core.SparseDirs)) - if err != nil { - fmt.Fprintf(os.Stderr, "diff error: %v\n", err) - return - } - p, err := cs.PatchContext(context.Background(), false) - if err != nil { - fmt.Fprintf(os.Stderr, "patch error: %v\n", err) - return - } - _ = p.Encode(os.Stderr) -} - -func TestPatchFragments(t *testing.T) { - r, err := Open(context.Background(), &OpenOptions{ - Worktree: "/private/tmp/xh5", - }) - if err != nil { - fmt.Fprintf(os.Stderr, "open repo error: %v\n", err) - return - } - defer r.Close() - w := r.Worktree() - oldTree, err := w.odb.Tree(context.TODO(), plumbing.NewHash("0bf97c2dbd2952873e27625dcdb969653f27906bc809a030d1a7634aa468e65e")) - if err != nil { - fmt.Fprintf(os.Stderr, "open tree error: %v\n", err) - return - } - newTree, err := w.odb.Tree(context.TODO(), plumbing.NewHash("0b3baff41289624d0ece7c02c9dc7470489b136d786528cdb13df720ae40f4ec")) - if err != nil { - fmt.Fprintf(os.Stderr, "open tree error: %v\n", err) - return - } - cs, err := oldTree.DiffContext(context.Background(), newTree, noder.NewSparseTreeMatcher(r.Core.SparseDirs)) - if err != nil { - fmt.Fprintf(os.Stderr, "diff error: %v\n", err) - return - } - p, err := cs.PatchContext(context.Background(), false) - if err != nil { - fmt.Fprintf(os.Stderr, "patch error: %v\n", err) - return - } - _ = p.Encode(os.Stderr) -} - -func TestDiff0(t *testing.T) { - r, err := Open(context.Background(), &OpenOptions{ - Worktree: "/private/tmp/xh5", - }) - if err != nil { - fmt.Fprintf(os.Stderr, "open repo error: %v\n", err) - return - } - defer r.Close() - w := r.Worktree() - if err := w.diffWorktree(context.Background(), &DiffContextOptions{}, os.Stderr); err != nil { - fmt.Fprintf(os.Stderr, "diff worktree error: %v\n", err) - } -} - -func TestDiff1(t *testing.T) { - r, err := Open(context.Background(), &OpenOptions{ - Worktree: "/private/tmp/xh5", - }) - if err != nil { - fmt.Fprintf(os.Stderr, "open repo error: %v\n", err) - return - } - defer r.Close() - w := r.Worktree() - if err := w.DiffTreeWithIndex(context.Background(), plumbing.NewHash("e43a48f0bd80ba287bfe4f2ae059564e10fca0a6c7dccb4fe160945ff657cdee"), &DiffContextOptions{}, os.Stderr); err != nil { - fmt.Fprintf(os.Stderr, "diff staging error: %v\n", err) - } -} - -// b35c23072713e3bcf9053faf377c39edddb90c5eac321ca5711f308eebbac9f0 - -func TestDiff2(t *testing.T) { - r, err := Open(context.Background(), &OpenOptions{ - Worktree: "/private/tmp/xh5", - }) - if err != nil { - fmt.Fprintf(os.Stderr, "open repo error: %v\n", err) - return - } - defer r.Close() - w := r.Worktree() - if err := w.DiffTreeWithIndex(context.Background(), plumbing.NewHash("b35c23072713e3bcf9053faf377c39edddb90c5eac321ca5711f308eebbac9f0"), &DiffContextOptions{}, os.Stderr); err != nil { - fmt.Fprintf(os.Stderr, "diff staging error: %v\n", err) - } -} - -func TestDiff3(t *testing.T) { - r, err := Open(context.Background(), &OpenOptions{ - Worktree: "/private/tmp/xh5", - }) - if err != nil { - fmt.Fprintf(os.Stderr, "open repo error: %v\n", err) - return - } - defer r.Close() - w := r.Worktree() - if err := w.DiffTreeWithIndex(context.Background(), plumbing.NewHash("e43a48f0bd80ba287bfe4f2ae059564e10fca0a6c7dccb4fe160945ff657cdee"), &DiffContextOptions{NameStatus: true, NewLine: '\n'}, os.Stderr); err != nil { - fmt.Fprintf(os.Stderr, "diff staging error: %v\n", err) - } -} - -func TestDiff4(t *testing.T) { - r, err := Open(context.Background(), &OpenOptions{ - Worktree: "/private/tmp/xh5", - }) - if err != nil { - fmt.Fprintf(os.Stderr, "open repo error: %v\n", err) - return - } - defer r.Close() - w := r.Worktree() - if err := w.DiffTreeWithIndex(context.Background(), plumbing.NewHash("e43a48f0bd80ba287bfe4f2ae059564e10fca0a6c7dccb4fe160945ff657cdee"), &DiffContextOptions{ShortStat: true, NewLine: '\n'}, os.Stderr); err != nil { - fmt.Fprintf(os.Stderr, "diff staging error: %v\n", err) - } -} - -func TestDiff5(t *testing.T) { - r, err := Open(context.Background(), &OpenOptions{ - Worktree: "/private/tmp/xh5", - }) - if err != nil { - fmt.Fprintf(os.Stderr, "open repo error: %v\n", err) - return - } - defer r.Close() - w := r.Worktree() - if err := w.DiffTreeWithIndex(context.Background(), plumbing.NewHash("e43a48f0bd80ba287bfe4f2ae059564e10fca0a6c7dccb4fe160945ff657cdee"), &DiffContextOptions{NumStat: true, NewLine: '\n'}, os.Stderr); err != nil { - fmt.Fprintf(os.Stderr, "diff staging error: %v\n", err) - } -} - -func TestDiff6(t *testing.T) { - r, err := Open(context.Background(), &OpenOptions{ - Worktree: "/private/tmp/xh5", - }) - if err != nil { - fmt.Fprintf(os.Stderr, "open repo error: %v\n", err) - return - } - defer r.Close() - w := r.Worktree() - if err := w.DiffTreeWithIndex(context.Background(), plumbing.NewHash("e43a48f0bd80ba287bfe4f2ae059564e10fca0a6c7dccb4fe160945ff657cdee"), &DiffContextOptions{Stat: true, NewLine: '\n'}, os.Stderr); err != nil { - fmt.Fprintf(os.Stderr, "diff staging error: %v\n", err) - } -} - func TestStat(t *testing.T) { r, err := Open(context.Background(), &OpenOptions{ Worktree: "/private/tmp/xh5", @@ -495,7 +322,7 @@ func TestStat(t *testing.T) { fmt.Fprintf(os.Stderr, "open commit error: %v\n", err) return } - ss, err := cc.StatsContext(context.Background(), noder.NewSparseTreeMatcher(r.Core.SparseDirs), false) + ss, err := cc.StatsContext(context.Background(), noder.NewSparseTreeMatcher(r.Core.SparseDirs), &object.PatchOptions{}) if err != nil { fmt.Fprintf(os.Stderr, "stats commit error: %v\n", err) return diff --git a/utils/diff2/main.go b/utils/diff2/main.go index aa7a566..4c56389 100644 --- a/utils/diff2/main.go +++ b/utils/diff2/main.go @@ -67,10 +67,10 @@ func main() { u, err := diferenco.DoUnified(context.Background(), &diferenco.Options{ A: diferenco.Histogram, From: &diferenco.File{ - Path: os.Args[1], + Name: os.Args[1], }, To: &diferenco.File{ - Path: os.Args[2], + Name: os.Args[2], }, R1: fd1, R2: fd2, diff --git a/utils/diffbug/difffix_test.go b/utils/diffbug/difffix_test.go index 751d3a9..0e9304c 100644 --- a/utils/diffbug/difffix_test.go +++ b/utils/diffbug/difffix_test.go @@ -29,10 +29,10 @@ func TestDiffText(t *testing.T) { } u, err := diferenco.DoUnified(context.Background(), &diferenco.Options{ From: &diferenco.File{ - Path: "a.go", + Name: "a.go", }, To: &diferenco.File{ - Path: "a.go", + Name: "a.go", }, S1: string(bytesA), S2: string(bytesB),