-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Copy over the initial functionality from FollowTheProcess/test (#2)
## Summary <!-- Describe your changes in detail here, if it closes an open issue, include "Closes #<issue>" --> I had a branch with the very basics of this in `FollowTheProcess/test` before I decided it's actually better as it's own package
- Loading branch information
1 parent
518717a
commit fc8005e
Showing
23 changed files
with
982 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,10 @@ | ||
module github.com/FollowTheProcess/snapshot | ||
|
||
go 1.23 | ||
go 1.23 | ||
|
||
require ( | ||
github.com/FollowTheProcess/test v0.17.1 | ||
golang.org/x/tools v0.27.0 | ||
) | ||
|
||
require github.com/google/go-cmp v0.6.0 // indirect |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
github.com/FollowTheProcess/test v0.17.1 h1:j4TkMqzxvYoyAP9alaTNPgKOPUJHOBCs0z4fNJb7Kr0= | ||
github.com/FollowTheProcess/test v0.17.1/go.mod h1:LlRdAk8bwBZ5kP10xHOcOTknNUrHU347IH7RgAm2Dgs= | ||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= | ||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= | ||
golang.org/x/tools v0.27.0 h1:qEKojBykQkQ4EynWy4S8Weg69NumxKdn40Fce3uc/8o= | ||
golang.org/x/tools v0.27.0/go.mod h1:sUi0ZgbwW9ZPAq26Ekut+weQPR5eIM6GQLQ1Yjm1H0Q= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
// Package colour implements basic text colouring for showing text diffs. | ||
// | ||
// I'm really putting "a little copying is better than a little dependency" into action here | ||
// this is from FollowTheProcess/test. | ||
package colour | ||
|
||
// ANSI codes for coloured output, they are all the same length so as not to throw off | ||
// alignment of [text/tabwriter]. | ||
const ( | ||
codeRed = "\x1b[0;0031m" // Red, used for diff lines starting with '-' | ||
codeHeader = "\x1b[1;0036m" // Bold cyan, used for diff headers starting with '@@' | ||
codeGreen = "\x1b[0;0032m" // Green, used for diff lines starting with '+' | ||
codeReset = "\x1b[000000m" // Reset all attributes | ||
) | ||
|
||
// Header returns a diff header styled string. | ||
func Header(text string) string { | ||
return sprint(codeHeader, text) | ||
} | ||
|
||
// Green returns a green styled string. | ||
func Green(text string) string { | ||
return sprint(codeGreen, text) | ||
} | ||
|
||
// Red returns a red styled string. | ||
func Red(text string) string { | ||
return sprint(codeRed, text) | ||
} | ||
|
||
// sprint returns a string with a given colour and the reset code. | ||
// | ||
// It handles checking for NO_COLOR and FORCE_COLOR. | ||
func sprint(code, text string) string { | ||
return code + text + codeReset | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,271 @@ | ||
// Taken from Go's internal/diff with only very minor tweaks; those being: | ||
// - Adding an extra space between the diff character (+/-) and the line so we can easily colour it | ||
// - Lint ignores | ||
// - Renaming diff test package to diff_test | ||
// | ||
// Copyright 2022 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package diff | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"sort" | ||
"strings" | ||
) | ||
|
||
// A pair is a pair of values tracked for both the x and y side of a diff. | ||
// It is typically a pair of line indexes. | ||
type pair struct{ x, y int } | ||
|
||
// Diff returns an anchored diff of the two texts old and new | ||
// in the “unified diff” format. If old and new are identical, | ||
// Diff returns a nil slice (no output). | ||
// | ||
// Unix diff implementations typically look for a diff with | ||
// the smallest number of lines inserted and removed, | ||
// which can in the worst case take time quadratic in the | ||
// number of lines in the texts. As a result, many implementations | ||
// either can be made to run for a long time or cut off the search | ||
// after a predetermined amount of work. | ||
// | ||
// In contrast, this implementation looks for a diff with the | ||
// smallest number of “unique” lines inserted and removed, | ||
// where unique means a line that appears just once in both old and new. | ||
// We call this an “anchored diff” because the unique lines anchor | ||
// the chosen matching regions. An anchored diff is usually clearer | ||
// than a standard diff, because the algorithm does not try to | ||
// reuse unrelated blank lines or closing braces. | ||
// The algorithm also guarantees to run in O(n log n) time | ||
// instead of the standard O(n²) time. | ||
// | ||
// Some systems call this approach a “patience diff,” named for | ||
// the “patience sorting” algorithm, itself named for a solitaire card game. | ||
// We avoid that name for two reasons. First, the name has been used | ||
// for a few different variants of the algorithm, so it is imprecise. | ||
// Second, the name is frequently interpreted as meaning that you have | ||
// to wait longer (to be patient) for the diff, meaning that it is a slower algorithm, | ||
// when in fact the algorithm is faster than the standard one. | ||
func Diff( //nolint: gocyclo | ||
oldName string, | ||
old []byte, | ||
newName string, | ||
new []byte, //nolint: predeclared | ||
) []byte { | ||
if bytes.Equal(old, new) { | ||
return nil | ||
} | ||
x := lines(old) | ||
y := lines(new) | ||
|
||
// Print diff header. | ||
var out bytes.Buffer | ||
fmt.Fprintf(&out, "diff %s %s\n", oldName, newName) | ||
fmt.Fprintf(&out, "--- %s\n", oldName) | ||
fmt.Fprintf(&out, "+++ %s\n", newName) | ||
|
||
// Loop over matches to consider, | ||
// expanding each match to include surrounding lines, | ||
// and then printing diff chunks. | ||
// To avoid setup/teardown cases outside the loop, | ||
// tgs returns a leading {0,0} and trailing {len(x), len(y)} pair | ||
// in the sequence of matches. | ||
var ( | ||
done pair // printed up to x[:done.x] and y[:done.y] | ||
chunk pair // start lines of current chunk | ||
count pair // number of lines from each side in current chunk | ||
ctext []string // lines for current chunk | ||
) | ||
for _, m := range tgs(x, y) { | ||
if m.x < done.x { | ||
// Already handled scanning forward from earlier match. | ||
continue | ||
} | ||
|
||
// Expand matching lines as far possible, | ||
// establishing that x[start.x:end.x] == y[start.y:end.y]. | ||
// Note that on the first (or last) iteration we may (or definitey do) | ||
// have an empty match: start.x==end.x and start.y==end.y. | ||
start := m | ||
for start.x > done.x && start.y > done.y && x[start.x-1] == y[start.y-1] { | ||
start.x-- | ||
start.y-- | ||
} | ||
end := m | ||
for end.x < len(x) && end.y < len(y) && x[end.x] == y[end.y] { | ||
end.x++ | ||
end.y++ | ||
} | ||
|
||
// Emit the mismatched lines before start into this chunk. | ||
// (No effect on first sentinel iteration, when start = {0,0}.) | ||
for _, s := range x[done.x:start.x] { | ||
ctext = append(ctext, "- "+s) | ||
count.x++ | ||
} | ||
for _, s := range y[done.y:start.y] { | ||
ctext = append(ctext, "+ "+s) | ||
count.y++ | ||
} | ||
|
||
// If we're not at EOF and have too few common lines, | ||
// the chunk includes all the common lines and continues. | ||
const C = 3 // number of context lines | ||
if (end.x < len(x) || end.y < len(y)) && | ||
(end.x-start.x < C || (len(ctext) > 0 && end.x-start.x < 2*C)) { | ||
for _, s := range x[start.x:end.x] { | ||
ctext = append(ctext, " "+s) | ||
count.x++ | ||
count.y++ | ||
} | ||
done = end | ||
continue | ||
} | ||
|
||
// End chunk with common lines for context. | ||
if len(ctext) > 0 { | ||
n := end.x - start.x | ||
if n > C { | ||
n = C | ||
} | ||
for _, s := range x[start.x : start.x+n] { | ||
ctext = append(ctext, " "+s) | ||
count.x++ | ||
count.y++ | ||
} | ||
done = pair{start.x + n, start.y + n} | ||
|
||
// Format and emit chunk. | ||
// Convert line numbers to 1-indexed. | ||
// Special case: empty file shows up as 0,0 not 1,0. | ||
if count.x > 0 { | ||
chunk.x++ | ||
} | ||
if count.y > 0 { | ||
chunk.y++ | ||
} | ||
fmt.Fprintf(&out, "@@ -%d,%d +%d,%d @@\n", chunk.x, count.x, chunk.y, count.y) | ||
for _, s := range ctext { | ||
out.WriteString(s) | ||
} | ||
count.x = 0 | ||
count.y = 0 | ||
ctext = ctext[:0] | ||
} | ||
|
||
// If we reached EOF, we're done. | ||
if end.x >= len(x) && end.y >= len(y) { | ||
break | ||
} | ||
|
||
// Otherwise start a new chunk. | ||
chunk = pair{end.x - C, end.y - C} | ||
for _, s := range x[chunk.x:end.x] { | ||
ctext = append(ctext, " "+s) | ||
count.x++ | ||
count.y++ | ||
} | ||
done = end | ||
} | ||
|
||
return out.Bytes() | ||
} | ||
|
||
// lines returns the lines in the file x, including newlines. | ||
// If the file does not end in a newline, one is supplied | ||
// along with a warning about the missing newline. | ||
func lines(x []byte) []string { | ||
l := strings.SplitAfter(string(x), "\n") | ||
if l[len(l)-1] == "" { | ||
l = l[:len(l)-1] | ||
} else { | ||
// Treat last line as having a message about the missing newline attached, | ||
// using the same text as BSD/GNU diff (including the leading backslash). | ||
l[len(l)-1] += "\n\\ No newline at end of file\n" | ||
} | ||
return l | ||
} | ||
|
||
// tgs returns the pairs of indexes of the longest common subsequence | ||
// of unique lines in x and y, where a unique line is one that appears | ||
// once in x and once in y. | ||
// | ||
// The longest common subsequence algorithm is as described in | ||
// Thomas G. Szymanski, “A Special Case of the Maximal Common | ||
// Subsequence Problem,” Princeton TR #170 (January 1975), | ||
// available at https://research.swtch.com/tgs170.pdf. | ||
func tgs(x, y []string) []pair { | ||
// Count the number of times each string appears in a and b. | ||
// We only care about 0, 1, many, counted as 0, -1, -2 | ||
// for the x side and 0, -4, -8 for the y side. | ||
// Using negative numbers now lets us distinguish positive line numbers later. | ||
m := make(map[string]int) | ||
for _, s := range x { | ||
if c := m[s]; c > -2 { | ||
m[s] = c - 1 | ||
} | ||
} | ||
for _, s := range y { | ||
if c := m[s]; c > -8 { | ||
m[s] = c - 4 //nolint: mnd | ||
} | ||
} | ||
|
||
// Now unique strings can be identified by m[s] = -1+-4. | ||
// | ||
// Gather the indexes of those strings in x and y, building: | ||
// xi[i] = increasing indexes of unique strings in x. | ||
// yi[i] = increasing indexes of unique strings in y. | ||
// inv[i] = index j such that x[xi[i]] = y[yi[j]]. | ||
var xi, yi, inv []int | ||
for i, s := range y { | ||
if m[s] == -1+-4 { | ||
m[s] = len(yi) | ||
yi = append(yi, i) | ||
} | ||
} | ||
for i, s := range x { | ||
if j, ok := m[s]; ok && j >= 0 { | ||
xi = append(xi, i) | ||
inv = append(inv, j) | ||
} | ||
} | ||
|
||
// Apply Algorithm A from Szymanski's paper. | ||
// In those terms, A = J = inv and B = [0, n). | ||
// We add sentinel pairs {0,0}, and {len(x),len(y)} | ||
// to the returned sequence, to help the processing loop. | ||
J := inv | ||
n := len(xi) | ||
T := make([]int, n) | ||
L := make([]int, n) | ||
for i := range T { | ||
T[i] = n + 1 | ||
} | ||
for i := 0; i < n; i++ { | ||
k := sort.Search(n, func(k int) bool { | ||
return T[k] >= J[i] | ||
}) | ||
T[k] = J[i] | ||
L[i] = k + 1 | ||
} | ||
k := 0 | ||
for _, v := range L { | ||
if k < v { | ||
k = v | ||
} | ||
} | ||
seq := make([]pair, 2+k) //nolint:mnd | ||
seq[1+k] = pair{len(x), len(y)} // sentinel at end | ||
lastj := n | ||
for i := n - 1; i >= 0; i-- { | ||
if L[i] == k && J[i] < lastj { | ||
seq[k] = pair{xi[i], yi[J[i]]} | ||
k-- | ||
} | ||
} | ||
seq[0] = pair{0, 0} // sentinel at start | ||
return seq | ||
} |
Oops, something went wrong.