diff --git a/cli/internal/fs/hash.go b/cli/internal/fs/hash.go index fed7d872cb4ec..9761bc50805c8 100644 --- a/cli/internal/fs/hash.go +++ b/cli/internal/fs/hash.go @@ -5,9 +5,9 @@ import ( "encoding/hex" "fmt" "io" - "os" "strconv" + "github.com/vercel/turbo/cli/internal/turbopath" "github.com/vercel/turbo/cli/internal/xxhash" ) @@ -19,25 +19,10 @@ func HashObject(i interface{}) (string, error) { return hex.EncodeToString(hash.Sum(nil)), err } -func HashFile(filePath string) (string, error) { - file, err := os.Open(filePath) - if err != nil { - return "", err - } - defer file.Close() - - hash := xxhash.New() - if _, err := io.Copy(hash, file); err != nil { - return "", err - } - - return hex.EncodeToString(hash.Sum(nil)), nil -} - // GitLikeHashFile is a function that mimics how Git // calculates the SHA1 for a file (or, in Git terms, a "blob") (without git) -func GitLikeHashFile(filePath string) (string, error) { - file, err := os.Open(filePath) +func GitLikeHashFile(filePath turbopath.AbsoluteSystemPath) (string, error) { + file, err := filePath.Open() if err != nil { return "", err } diff --git a/cli/internal/hashing/package_deps_hash.go b/cli/internal/hashing/package_deps_hash.go index 517cddddb7fc7..c51c056b9dcf7 100644 --- a/cli/internal/hashing/package_deps_hash.go +++ b/cli/internal/hashing/package_deps_hash.go @@ -10,6 +10,8 @@ import ( "sync" "github.com/pkg/errors" + gitignore "github.com/sabhiram/go-gitignore" + "github.com/vercel/turbo/cli/internal/doublestar" "github.com/vercel/turbo/cli/internal/encoding/gitoutput" "github.com/vercel/turbo/cli/internal/fs" "github.com/vercel/turbo/cli/internal/globby" @@ -26,145 +28,230 @@ type PackageDepsOptions struct { InputPatterns []string } -// GetPackageDeps Builds an object containing git hashes for the files under the specified `packagePath` folder. -func GetPackageDeps(rootPath turbopath.AbsoluteSystemPath, p *PackageDepsOptions) (map[turbopath.AnchoredUnixPath]string, error) { - pkgPath := rootPath.UntypedJoin(p.PackagePath.ToStringDuringMigration()) - // Add all the checked in hashes. +func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) { var result map[turbopath.AnchoredUnixPath]string + absolutePackagePath := packagePath.RestoreAnchor(rootPath) - // make a copy of the inputPatterns array, because we may be appending to it later. - calculatedInputs := make([]string, len(p.InputPatterns)) - copy(calculatedInputs, p.InputPatterns) + // Get the state of the git index. + gitLsTreeOutput, err := gitLsTree(absolutePackagePath) + if err != nil { + return nil, fmt.Errorf("could not get git hashes for files in package %s: %w", packagePath, err) + } + result = gitLsTreeOutput - if len(calculatedInputs) == 0 { - gitLsTreeOutput, err := gitLsTree(pkgPath) - if err != nil { - return nil, fmt.Errorf("could not get git hashes for files in package %s: %w", p.PackagePath, err) - } - result = gitLsTreeOutput + // Update the with the state of the working directory. + // The paths returned from this call are anchored at the package directory + gitStatusOutput, err := gitStatus(absolutePackagePath) + if err != nil { + return nil, fmt.Errorf("Could not get git hashes from git status: %v", err) + } - // Update the checked in hashes with the current repo status - // The paths returned from this call are anchored at the package directory - gitStatusOutput, err := gitStatus(pkgPath, calculatedInputs) - if err != nil { - return nil, fmt.Errorf("Could not get git hashes from git status: %v", err) + // Review status output to identify the delta. + var filesToHash []turbopath.AnchoredSystemPath + for filePath, status := range gitStatusOutput { + if status.isDelete() { + delete(result, filePath) + } else { + filesToHash = append(filesToHash, filePath.ToSystemPath()) } + } + + // Get the hashes for any modified files in the working directory. + hashes, err := GetHashesForFiles(absolutePackagePath, filesToHash) + if err != nil { + return nil, err + } + + // Zip up file paths and hashes together + for filePath, hash := range hashes { + result[filePath] = hash + } + + return result, nil +} + +func safeCompileIgnoreFile(filepath turbopath.AbsoluteSystemPath) (*gitignore.GitIgnore, error) { + if filepath.FileExists() { + return gitignore.CompileIgnoreFile(filepath.ToString()) + } + // no op + return gitignore.CompileIgnoreLines([]string{}...), nil +} + +func getPackageFileHashesFromProcessingGitIgnore(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath, inputs []string) (map[turbopath.AnchoredUnixPath]string, error) { + result := make(map[turbopath.AnchoredUnixPath]string) + absolutePackagePath := packagePath.RestoreAnchor(rootPath) + + // Instead of implementing all gitignore properly, we hack it. We only respect .gitignore in the root and in + // the directory of a package. + ignore, err := safeCompileIgnoreFile(rootPath.UntypedJoin(".gitignore")) + if err != nil { + return nil, err + } + + ignorePkg, err := safeCompileIgnoreFile(absolutePackagePath.UntypedJoin(".gitignore")) + if err != nil { + return nil, err + } - var filesToHash []turbopath.AnchoredSystemPath - for filePath, status := range gitStatusOutput { - if status.isDelete() { - delete(result, filePath) + includePattern := "" + excludePattern := "" + if len(inputs) > 0 { + var includePatterns []string + var excludePatterns []string + for _, pattern := range inputs { + if len(pattern) > 0 && pattern[0] == '!' { + excludePatterns = append(excludePatterns, absolutePackagePath.UntypedJoin(pattern[1:]).ToString()) } else { - filesToHash = append(filesToHash, filePath.ToSystemPath()) + includePatterns = append(includePatterns, absolutePackagePath.UntypedJoin(pattern).ToString()) } } - - hashes, err := gitHashObject(turbopath.AbsoluteSystemPathFromUpstream(pkgPath.ToString()), filesToHash) - if err != nil { - return nil, err + if len(includePatterns) > 0 { + includePattern = "{" + strings.Join(includePatterns, ",") + "}" } - - // Zip up file paths and hashes together - for filePath, hash := range hashes { - result[filePath] = hash + if len(excludePatterns) > 0 { + excludePattern = "{" + strings.Join(excludePatterns, ",") + "}" } - } else { - // Add in package.json and turbo.json to input patterns. Both file paths are relative to pkgPath - // - // - package.json is an input because if the `scripts` in - // the package.json change (i.e. the tasks that turbo executes), we want - // a cache miss, since any existing cache could be invalid. - // - turbo.json because it's the definition of the tasks themselves. The root turbo.json - // is similarly included in the global hash. This file may not exist in the workspace, but - // that is ok, because it will get ignored downstream. - calculatedInputs = append(calculatedInputs, "package.json") - calculatedInputs = append(calculatedInputs, "turbo.json") - - // The input patterns are relative to the package. - // However, we need to change the globbing to be relative to the repo root. - // Prepend the package path to each of the input patterns. - prefixedInputPatterns := []string{} - prefixedExcludePatterns := []string{} - for _, pattern := range calculatedInputs { - if len(pattern) > 0 && pattern[0] == '!' { - rerooted, err := rootPath.PathTo(pkgPath.UntypedJoin(pattern[1:])) + } + + err = fs.Walk(absolutePackagePath.ToStringDuringMigration(), func(name string, isDir bool) error { + convertedName := turbopath.AbsoluteSystemPathFromUpstream(name) + rootMatch := ignore.MatchesPath(convertedName.ToString()) + otherMatch := ignorePkg.MatchesPath(convertedName.ToString()) + if !rootMatch && !otherMatch { + if !isDir { + if includePattern != "" { + val, err := doublestar.PathMatch(includePattern, convertedName.ToString()) + if err != nil { + return err + } + if !val { + return nil + } + } + if excludePattern != "" { + val, err := doublestar.PathMatch(excludePattern, convertedName.ToString()) + if err != nil { + return err + } + if val { + return nil + } + } + hash, err := fs.GitLikeHashFile(convertedName) if err != nil { - return nil, err + return fmt.Errorf("could not hash file %v. \n%w", convertedName.ToString(), err) } - prefixedExcludePatterns = append(prefixedExcludePatterns, rerooted) - } else { - rerooted, err := rootPath.PathTo(pkgPath.UntypedJoin(pattern)) + + relativePath, err := convertedName.RelativeTo(absolutePackagePath) if err != nil { - return nil, err + return fmt.Errorf("File path cannot be made relative: %w", err) } - prefixedInputPatterns = append(prefixedInputPatterns, rerooted) + result[relativePath.ToUnixPath()] = hash } } - absoluteFilesToHash, err := globby.GlobFiles(rootPath.ToStringDuringMigration(), prefixedInputPatterns, prefixedExcludePatterns) - - if err != nil { - return nil, errors.Wrapf(err, "failed to resolve input globs %v", calculatedInputs) - } + return nil + }) + if err != nil { + return nil, err + } + return result, nil +} - filesToHash := make([]turbopath.AnchoredSystemPath, len(absoluteFilesToHash)) - for i, rawPath := range absoluteFilesToHash { - relativePathString, err := pkgPath.RelativePathString(rawPath) +func getPackageFileHashesFromInputs(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath, inputs []string) (map[turbopath.AnchoredUnixPath]string, error) { + absolutePackagePath := packagePath.RestoreAnchor(rootPath) + // Add all the checked in hashes. + // make a copy of the inputPatterns array, because we may be appending to it later. + calculatedInputs := make([]string, len(inputs)) + copy(calculatedInputs, inputs) + + // Add in package.json and turbo.json to input patterns. Both file paths are relative to pkgPath + // + // - package.json is an input because if the `scripts` in + // the package.json change (i.e. the tasks that turbo executes), we want + // a cache miss, since any existing cache could be invalid. + // - turbo.json because it's the definition of the tasks themselves. The root turbo.json + // is similarly included in the global hash. This file may not exist in the workspace, but + // that is ok, because it will get ignored downstream. + calculatedInputs = append(calculatedInputs, "package.json") + calculatedInputs = append(calculatedInputs, "turbo.json") + + // The input patterns are relative to the package. + // However, we need to change the globbing to be relative to the repo root. + // Prepend the package path to each of the input patterns. + prefixedInputPatterns := []string{} + prefixedExcludePatterns := []string{} + for _, pattern := range calculatedInputs { + if len(pattern) > 0 && pattern[0] == '!' { + rerooted, err := rootPath.PathTo(absolutePackagePath.UntypedJoin(pattern[1:])) if err != nil { - return nil, errors.Wrapf(err, "not relative to package: %v", rawPath) + return nil, err } - - filesToHash[i] = turbopath.AnchoredSystemPathFromUpstream(relativePathString) + prefixedExcludePatterns = append(prefixedExcludePatterns, rerooted) + } else { + rerooted, err := rootPath.PathTo(absolutePackagePath.UntypedJoin(pattern)) + if err != nil { + return nil, err + } + prefixedInputPatterns = append(prefixedInputPatterns, rerooted) } + } + absoluteFilesToHash, err := globby.GlobFiles(rootPath.ToStringDuringMigration(), prefixedInputPatterns, prefixedExcludePatterns) - hashes, err := gitHashObject(turbopath.AbsoluteSystemPathFromUpstream(pkgPath.ToStringDuringMigration()), filesToHash) - if err != nil { - return nil, errors.Wrap(err, "failed hashing resolved inputs globs") - } - result = hashes - // Note that in this scenario, we don't need to check git status, we're using hash-object directly which - // hashes the current state, not state at a commit + if err != nil { + return nil, errors.Wrapf(err, "failed to resolve input globs %v", calculatedInputs) } - return result, nil -} + filesToHash := make([]turbopath.AnchoredSystemPath, len(absoluteFilesToHash)) + for i, rawPath := range absoluteFilesToHash { + relativePathString, err := absolutePackagePath.RelativePathString(rawPath) -func manuallyHashFiles(rootPath turbopath.AbsoluteSystemPath, files []turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) { - hashObject := make(map[turbopath.AnchoredUnixPath]string) - for _, file := range files { - hash, err := fs.GitLikeHashFile(file.ToString()) if err != nil { - return nil, fmt.Errorf("could not hash file %v. \n%w", file.ToString(), err) + return nil, errors.Wrapf(err, "not relative to package: %v", rawPath) } - hashObject[file.ToUnixPath()] = hash + filesToHash[i] = turbopath.AnchoredSystemPathFromUpstream(relativePathString) } - return hashObject, nil -} -// GetHashableDeps hashes the list of given files, then returns a map of normalized path to hash -// this map is suitable for cross-platform caching. -func GetHashableDeps(rootPath turbopath.AbsoluteSystemPath, files []turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]string, error) { - output := make([]turbopath.AnchoredSystemPath, len(files)) - convertedRootPath := turbopath.AbsoluteSystemPathFromUpstream(rootPath.ToString()) + // Note that in this scenario, we don't need to check git status. + // We're hashing the current state, not state at a commit. + result, err := GetHashesForFiles(absolutePackagePath, filesToHash) + if err != nil { + return nil, errors.Wrap(err, "failed hashing resolved inputs globs") + } - for index, file := range files { - anchoredSystemPath, err := file.RelativeTo(convertedRootPath) + return result, nil +} + +// GetPackageFileHashes Builds an object containing git hashes for the files under the specified `packagePath` folder. +func GetPackageFileHashes(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath, inputs []string) (map[turbopath.AnchoredUnixPath]string, error) { + if len(inputs) == 0 { + result, err := getPackageFileHashesFromGitIndex(rootPath, packagePath) if err != nil { - return nil, err + return getPackageFileHashesFromProcessingGitIgnore(rootPath, packagePath, nil) } - output[index] = anchoredSystemPath + return result, nil } - hashObject, err := gitHashObject(convertedRootPath, output) + + result, err := getPackageFileHashesFromInputs(rootPath, packagePath, inputs) if err != nil { - manuallyHashedObject, err := manuallyHashFiles(convertedRootPath, output) - if err != nil { - return nil, err - } - hashObject = manuallyHashedObject + return getPackageFileHashesFromProcessingGitIgnore(rootPath, packagePath, inputs) } + return result, nil +} - return hashObject, nil +// GetHashesForFiles hashes the list of given files, then returns a map of normalized path to hash. +// This map is suitable for cross-platform caching. +func GetHashesForFiles(rootPath turbopath.AbsoluteSystemPath, files []turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) { + // Try to use `git` first. + gitHashedFiles, err := gitHashObject(rootPath, files) + if err == nil { + return gitHashedFiles, nil + } + + // Fall back to manual hashing. + return manuallyHashFiles(rootPath, files) } // gitHashObject returns a map of paths to their SHA hashes calculated by passing the paths to `git hash-object`. @@ -284,6 +371,19 @@ func gitHashObject(anchor turbopath.AbsoluteSystemPath, filesToHash []turbopath. return output, nil } +func manuallyHashFiles(rootPath turbopath.AbsoluteSystemPath, files []turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) { + hashObject := make(map[turbopath.AnchoredUnixPath]string) + for _, file := range files { + hash, err := fs.GitLikeHashFile(file.RestoreAnchor(rootPath)) + if err != nil { + return nil, fmt.Errorf("could not hash file %v. \n%w", file.ToString(), err) + } + + hashObject[file.ToUnixPath()] = hash + } + return hashObject, nil +} + // runGitCommand provides boilerplate command handling for `ls-tree`, `ls-files`, and `status` // Rather than doing string processing, it does stream processing of `stdout`. func runGitCommand(cmd *exec.Cmd, commandName string, handler func(io.Reader) *gitoutput.Reader) ([][]string, error) { @@ -404,7 +504,7 @@ func (s statusCode) isDelete() bool { // We need to calculate where the repository's location is in order to determine what the full path is // before we can return those paths relative to the calling directory, normalizing to the behavior of // `ls-files` and `ls-tree`. -func gitStatus(rootPath turbopath.AbsoluteSystemPath, patterns []string) (map[turbopath.AnchoredUnixPath]statusCode, error) { +func gitStatus(rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]statusCode, error) { cmd := exec.Command( "git", // Using `git` from $PATH, "status", // tell me about the status of the working tree, @@ -413,13 +513,8 @@ func gitStatus(rootPath turbopath.AbsoluteSystemPath, patterns []string) (map[tu "-z", // with each file path relative to the repository root and \000-terminated, "--", // and any additional argument you see is a path, promise. ) - if len(patterns) == 0 { - cmd.Args = append(cmd.Args, ".") // Operate in the current directory instead of the root of the working tree. - } else { - // FIXME: Globbing is using `git`'s globbing rules which are not consistent with `doublestar``. - cmd.Args = append(cmd.Args, patterns...) // Pass in input patterns as arguments. - } - cmd.Dir = rootPath.ToString() // Include files only from this directory. + cmd.Args = append(cmd.Args, ".") // Operate in the current directory instead of the root of the working tree. + cmd.Dir = rootPath.ToString() // Include files only from this directory. entries, err := runGitCommand(cmd, "status", gitoutput.NewStatusReader) if err != nil { diff --git a/cli/internal/hashing/package_deps_hash_test.go b/cli/internal/hashing/package_deps_hash_test.go index 8f68d38d39239..66f3d58552da5 100644 --- a/cli/internal/hashing/package_deps_hash_test.go +++ b/cli/internal/hashing/package_deps_hash_test.go @@ -367,11 +367,7 @@ func TestGetPackageDeps(t *testing.T) { }, } for _, tt := range tests { - got, err := GetPackageDeps(repoRoot, tt.opts) - if err != nil { - t.Errorf("GetPackageDeps got error %v", err) - continue - } + got, _ := GetPackageFileHashes(repoRoot, tt.opts.PackagePath, tt.opts.InputPatterns) assert.DeepEqual(t, got, tt.expected) } } @@ -384,3 +380,141 @@ func Test_memoizedGetTraversePath(t *testing.T) { assert.Check(t, gotOne == gotTwo, "The strings are identical.") } + +func Test_getPackageFileHashesFromProcessingGitIgnore(t *testing.T) { + rootIgnore := strings.Join([]string{ + "ignoreme", + "ignorethisdir/", + }, "\n") + pkgIgnore := strings.Join([]string{ + "pkgignoreme", + "pkgignorethisdir/", + }, "\n") + root := t.TempDir() + repoRoot := turbopath.AbsoluteSystemPathFromUpstream(root) + pkgName := turbopath.AnchoredUnixPath("child-dir/libA").ToSystemPath() + type fileHash struct { + contents string + hash string + } + files := map[turbopath.AnchoredUnixPath]fileHash{ + "top-level-file": {"top-level-file-contents", ""}, + "other-dir/other-dir-file": {"other-dir-file-contents", ""}, + "ignoreme": {"anything", ""}, + "child-dir/libA/some-file": {"some-file-contents", "7e59c6a6ea9098c6d3beb00e753e2c54ea502311"}, + "child-dir/libA/some-dir/other-file": {"some-file-contents", "7e59c6a6ea9098c6d3beb00e753e2c54ea502311"}, + "child-dir/libA/some-dir/another-one": {"some-file-contents", "7e59c6a6ea9098c6d3beb00e753e2c54ea502311"}, + "child-dir/libA/some-dir/excluded-file": {"some-file-contents", "7e59c6a6ea9098c6d3beb00e753e2c54ea502311"}, + "child-dir/libA/ignoreme": {"anything", ""}, + "child-dir/libA/ignorethisdir/anything": {"anything", ""}, + "child-dir/libA/pkgignoreme": {"anything", ""}, + "child-dir/libA/pkgignorethisdir/file": {"anything", ""}, + } + + rootIgnoreFile, err := repoRoot.Join(".gitignore").Create() + if err != nil { + t.Fatalf("failed to create .gitignore: %v", err) + } + _, err = rootIgnoreFile.WriteString(rootIgnore) + if err != nil { + t.Fatalf("failed to write contents to .gitignore: %v", err) + } + err = rootIgnoreFile.Close() + if err != nil { + t.Fatalf("failed to close root ignore file") + } + pkgIgnoreFilename := pkgName.RestoreAnchor(repoRoot).Join(".gitignore") + err = pkgIgnoreFilename.EnsureDir() + if err != nil { + t.Fatalf("failed to ensure directories for %v: %v", pkgIgnoreFilename, err) + } + pkgIgnoreFile, err := pkgIgnoreFilename.Create() + if err != nil { + t.Fatalf("failed to create libA/.gitignore: %v", err) + } + _, err = pkgIgnoreFile.WriteString(pkgIgnore) + if err != nil { + t.Fatalf("failed to write contents to libA/.gitignore: %v", err) + } + err = pkgIgnoreFile.Close() + if err != nil { + t.Fatalf("failed to close package ignore file") + } + for path, spec := range files { + filename := path.ToSystemPath().RestoreAnchor(repoRoot) + err = filename.EnsureDir() + if err != nil { + t.Fatalf("failed to ensure directories for %v: %v", filename, err) + } + f, err := filename.Create() + if err != nil { + t.Fatalf("failed to create file: %v: %v", filename, err) + } + _, err = f.WriteString(spec.contents) + if err != nil { + t.Fatalf("failed to write contents to %v: %v", filename, err) + } + err = f.Close() + if err != nil { + t.Fatalf("failed to close package ignore file") + } + } + // now that we've created the repo, expect our .gitignore file too + files[turbopath.AnchoredUnixPath("child-dir/libA/.gitignore")] = fileHash{contents: "", hash: "3237694bc3312ded18386964a855074af7b066af"} + + pkg := &fs.PackageJSON{ + Dir: pkgName, + } + hashes, err := getPackageFileHashesFromProcessingGitIgnore(repoRoot, pkg.Dir, []string{}) + if err != nil { + t.Fatalf("failed to calculate manual hashes: %v", err) + } + + count := 0 + for path, spec := range files { + systemPath := path.ToSystemPath() + if systemPath.HasPrefix(pkgName) { + relPath := systemPath[len(pkgName)+1:] + got, ok := hashes[relPath.ToUnixPath()] + if !ok { + if spec.hash != "" { + t.Errorf("did not find hash for %v, but wanted one", path) + } + } else if got != spec.hash { + t.Errorf("hash of %v, got %v want %v", path, got, spec.hash) + } else { + count++ + } + } + } + if count != len(hashes) { + t.Errorf("found extra hashes in %v", hashes) + } + + count = 0 + justFileHashes, err := getPackageFileHashesFromProcessingGitIgnore(repoRoot, pkg.Dir, []string{filepath.FromSlash("**/*file"), "!" + filepath.FromSlash("some-dir/excluded-file")}) + if err != nil { + t.Fatalf("failed to calculate manual hashes: %v", err) + } + for path, spec := range files { + systemPath := path.ToSystemPath() + if systemPath.HasPrefix(pkgName) { + shouldInclude := strings.HasSuffix(systemPath.ToString(), "file") && !strings.HasSuffix(systemPath.ToString(), "excluded-file") + relPath := systemPath[len(pkgName)+1:] + got, ok := justFileHashes[relPath.ToUnixPath()] + if !ok && shouldInclude { + if spec.hash != "" { + t.Errorf("did not find hash for %v, but wanted one", path) + } + } else if shouldInclude && got != spec.hash { + t.Errorf("hash of %v, got %v want %v", path, got, spec.hash) + } else if shouldInclude { + count++ + } + } + } + if count != len(justFileHashes) { + t.Errorf("found extra hashes in %v", hashes) + } + +} diff --git a/cli/internal/run/global_hash.go b/cli/internal/run/global_hash.go index 5034d7e26407d..364598459abb6 100644 --- a/cli/internal/run/global_hash.go +++ b/cli/internal/run/global_hash.go @@ -172,12 +172,18 @@ func getGlobalHashInputs( // No prefix, global deps already have full paths globalDepsArray := globalDeps.UnsafeListOfStrings() - globalDepsPaths := make([]turbopath.AbsoluteSystemPath, len(globalDepsArray)) + globalDepsPaths := make([]turbopath.AnchoredSystemPath, len(globalDepsArray)) for i, path := range globalDepsArray { - globalDepsPaths[i] = turbopath.AbsoluteSystemPathFromUpstream(path) + fullyQualifiedPath := turbopath.AbsoluteSystemPathFromUpstream(path) + anchoredPath, err := fullyQualifiedPath.RelativeTo(rootpath) + if err != nil { + return GlobalHashableInputs{}, err + } + + globalDepsPaths[i] = anchoredPath } - globalFileHashMap, err := hashing.GetHashableDeps(rootpath, globalDepsPaths) + globalFileHashMap, err := hashing.GetHashesForFiles(rootpath, globalDepsPaths) if err != nil { return GlobalHashableInputs{}, fmt.Errorf("error hashing files: %w", err) } diff --git a/cli/internal/taskhash/taskhash.go b/cli/internal/taskhash/taskhash.go index a912ad9370b44..7da00f4e9bf43 100644 --- a/cli/internal/taskhash/taskhash.go +++ b/cli/internal/taskhash/taskhash.go @@ -9,8 +9,6 @@ import ( "github.com/hashicorp/go-hclog" "github.com/pyr-sh/dag" - gitignore "github.com/sabhiram/go-gitignore" - "github.com/vercel/turbo/cli/internal/doublestar" "github.com/vercel/turbo/cli/internal/env" "github.com/vercel/turbo/cli/internal/fs" "github.com/vercel/turbo/cli/internal/hashing" @@ -33,12 +31,12 @@ type Tracker struct { globalHash string pipeline fs.Pipeline - packageInputsHashes packageFileHashes + packageInputsHashes map[string]string // packageInputsExpandedHashes is a map of a hashkey to a list of files that are inputs to the task. // Writes to this map happen during CalculateFileHash(). Since this happens synchronously // before walking the task graph, it does not need to be protected by a mutex. - packageInputsExpandedHashes map[packageFileHashKey]map[turbopath.AnchoredUnixPath]string + packageInputsExpandedHashes map[string]map[turbopath.AnchoredUnixPath]string // mu is a mutex that we can lock/unlock to read/write from maps // the fields below should be protected by the mutex. @@ -64,143 +62,13 @@ func NewTracker(rootNode string, globalHash string, pipeline fs.Pipeline) *Track } } -// packageFileSpec defines a combination of a package and optional set of input globs -type packageFileSpec struct { - pkg string - inputs []string +// packageFileHashInputs defines a combination of a package and optional set of input globs +type packageFileHashInputs struct { + taskID string + taskDefinition *fs.TaskDefinition + packageName string } -func specFromPackageTask(packageTask *nodes.PackageTask) packageFileSpec { - return packageFileSpec{ - pkg: packageTask.PackageName, - inputs: packageTask.TaskDefinition.Inputs, - } -} - -// packageFileHashKey is a hashable representation of a packageFileSpec. -type packageFileHashKey string - -// hashes the inputs for a packageTask -func (pfs packageFileSpec) ToKey() packageFileHashKey { - sort.Strings(pfs.inputs) - return packageFileHashKey(fmt.Sprintf("%v#%v", pfs.pkg, strings.Join(pfs.inputs, "!"))) -} - -func safeCompileIgnoreFile(filepath string) (*gitignore.GitIgnore, error) { - if fs.FileExists(filepath) { - return gitignore.CompileIgnoreFile(filepath) - } - // no op - return gitignore.CompileIgnoreLines([]string{}...), nil -} - -func (pfs *packageFileSpec) getHashObject(pkg *fs.PackageJSON, repoRoot turbopath.AbsoluteSystemPath) map[turbopath.AnchoredUnixPath]string { - hashObject, pkgDepsErr := hashing.GetPackageDeps(repoRoot, &hashing.PackageDepsOptions{ - PackagePath: pkg.Dir, - InputPatterns: pfs.inputs, - }) - if pkgDepsErr != nil { - manualHashObject, err := manuallyHashPackage(pkg, pfs.inputs, repoRoot) - if err != nil { - return make(map[turbopath.AnchoredUnixPath]string) - } - hashObject = manualHashObject - } - - return hashObject -} - -func (pfs *packageFileSpec) hash(hashObject map[turbopath.AnchoredUnixPath]string) (string, error) { - hashOfFiles, otherErr := fs.HashObject(hashObject) - if otherErr != nil { - return "", otherErr - } - return hashOfFiles, nil -} - -func manuallyHashPackage(pkg *fs.PackageJSON, inputs []string, rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]string, error) { - hashObject := make(map[turbopath.AnchoredUnixPath]string) - // Instead of implementing all gitignore properly, we hack it. We only respect .gitignore in the root and in - // the directory of a package. - ignore, err := safeCompileIgnoreFile(rootPath.UntypedJoin(".gitignore").ToString()) - if err != nil { - return nil, err - } - - ignorePkg, err := safeCompileIgnoreFile(rootPath.UntypedJoin(pkg.Dir.ToStringDuringMigration(), ".gitignore").ToString()) - if err != nil { - return nil, err - } - - pathPrefix := rootPath.UntypedJoin(pkg.Dir.ToStringDuringMigration()) - includePattern := "" - excludePattern := "" - if len(inputs) > 0 { - var includePatterns []string - var excludePatterns []string - for _, pattern := range inputs { - if len(pattern) > 0 && pattern[0] == '!' { - excludePatterns = append(excludePatterns, pathPrefix.UntypedJoin(pattern[1:]).ToString()) - } else { - includePatterns = append(includePatterns, pathPrefix.UntypedJoin(pattern).ToString()) - } - } - if len(includePatterns) > 0 { - includePattern = "{" + strings.Join(includePatterns, ",") + "}" - } - if len(excludePatterns) > 0 { - excludePattern = "{" + strings.Join(excludePatterns, ",") + "}" - } - } - - err = fs.Walk(pathPrefix.ToStringDuringMigration(), func(name string, isDir bool) error { - convertedName := turbopath.AbsoluteSystemPathFromUpstream(name) - rootMatch := ignore.MatchesPath(convertedName.ToString()) - otherMatch := ignorePkg.MatchesPath(convertedName.ToString()) - if !rootMatch && !otherMatch { - if !isDir { - if includePattern != "" { - val, err := doublestar.PathMatch(includePattern, convertedName.ToString()) - if err != nil { - return err - } - if !val { - return nil - } - } - if excludePattern != "" { - val, err := doublestar.PathMatch(excludePattern, convertedName.ToString()) - if err != nil { - return err - } - if val { - return nil - } - } - hash, err := fs.GitLikeHashFile(convertedName.ToString()) - if err != nil { - return fmt.Errorf("could not hash file %v. \n%w", convertedName.ToString(), err) - } - - relativePath, err := convertedName.RelativeTo(pathPrefix) - if err != nil { - return fmt.Errorf("File path cannot be made relative: %w", err) - } - hashObject[relativePath.ToUnixPath()] = hash - } - } - return nil - }) - if err != nil { - return nil, err - } - return hashObject, nil -} - -// packageFileHashes is a map from a package and optional input globs to the hash of -// the matched files in the package. -type packageFileHashes map[packageFileHashKey]string - // CalculateFileHashes hashes each unique package-inputs combination that is present // in the task graph. Must be called before calculating task hashes. func (th *Tracker) CalculateFileHashes( @@ -220,8 +88,9 @@ func (th *Tracker) CalculateFileHashes( if taskID == th.rootNode { continue } - pkgName, _ := util.GetPackageTaskFromId(taskID) - if pkgName == th.rootNode { + + packageName, _ := util.GetPackageTaskFromId(taskID) + if packageName == th.rootNode { continue } @@ -230,42 +99,51 @@ func (th *Tracker) CalculateFileHashes( return fmt.Errorf("missing pipeline entry %v", taskID) } - pfs := &packageFileSpec{ - pkg: pkgName, - inputs: taskDefinition.Inputs, + pfs := &packageFileHashInputs{ + taskID, + taskDefinition, + packageName, } hashTasks.Add(pfs) } - hashes := make(map[packageFileHashKey]string, len(hashTasks)) - hashObjects := make(map[packageFileHashKey]map[turbopath.AnchoredUnixPath]string, len(hashTasks)) - hashQueue := make(chan *packageFileSpec, workerCount) + hashes := make(map[string]string, len(hashTasks)) + hashObjects := make(map[string]map[turbopath.AnchoredUnixPath]string, len(hashTasks)) + hashQueue := make(chan *packageFileHashInputs, workerCount) hashErrs := &errgroup.Group{} for i := 0; i < workerCount; i++ { hashErrs.Go(func() error { - for packageFileSpec := range hashQueue { - pkg, ok := workspaceInfos.PackageJSONs[packageFileSpec.pkg] + for packageFileHashInputs := range hashQueue { + pkg, ok := workspaceInfos.PackageJSONs[packageFileHashInputs.packageName] if !ok { - return fmt.Errorf("cannot find package %v", packageFileSpec.pkg) + return fmt.Errorf("cannot find package %v", packageFileHashInputs.packageName) + } + + // Get the hashes of each file, keyed by the path. + hashObject, err := hashing.GetPackageFileHashes(repoRoot, pkg.Dir, packageFileHashInputs.taskDefinition.Inputs) + if err != nil { + return err } - hashObject := packageFileSpec.getHashObject(pkg, repoRoot) - hash, err := packageFileSpec.hash(hashObject) + + // Get the combined hash of all the files. + hash, err := fs.HashObject(hashObject) if err != nil { return err } + + // Save off the hash information, keyed by package task. th.mu.Lock() - pfsKey := packageFileSpec.ToKey() - hashes[pfsKey] = hash - hashObjects[pfsKey] = hashObject + hashes[packageFileHashInputs.taskID] = hash + hashObjects[packageFileHashInputs.taskID] = hashObject th.mu.Unlock() } return nil }) } for ht := range hashTasks { - hashQueue <- ht.(*packageFileSpec) + hashQueue <- ht.(*packageFileHashInputs) } close(hashQueue) err := hashErrs.Wait() @@ -370,12 +248,9 @@ func (th *Tracker) calculateDependencyHashes(dependencySet dag.Set) ([]string, e // that it has previously been called on its task-graph dependencies. File hashes must be calculated // first. func (th *Tracker) CalculateTaskHash(packageTask *nodes.PackageTask, dependencySet dag.Set, logger hclog.Logger, args []string, useOldTaskHashable bool) (string, error) { - pfs := specFromPackageTask(packageTask) - pkgFileHashKey := pfs.ToKey() - - hashOfFiles, ok := th.packageInputsHashes[pkgFileHashKey] + hashOfFiles, ok := th.packageInputsHashes[packageTask.TaskID] if !ok { - return "", fmt.Errorf("cannot find package-file hash for %v", pkgFileHashKey) + return "", fmt.Errorf("cannot find package-file hash for %v", packageTask.TaskID) } var keyMatchers []string @@ -431,8 +306,7 @@ func (th *Tracker) CalculateTaskHash(packageTask *nodes.PackageTask, dependencyS // GetExpandedInputs gets the expanded set of inputs for a given PackageTask func (th *Tracker) GetExpandedInputs(packageTask *nodes.PackageTask) map[turbopath.AnchoredUnixPath]string { - pfs := specFromPackageTask(packageTask) - expandedInputs := th.packageInputsExpandedHashes[pfs.ToKey()] + expandedInputs := th.packageInputsExpandedHashes[packageTask.TaskID] inputsCopy := make(map[turbopath.AnchoredUnixPath]string, len(expandedInputs)) for path, hash := range expandedInputs { diff --git a/cli/internal/taskhash/taskhash_test.go b/cli/internal/taskhash/taskhash_test.go deleted file mode 100644 index dea00106233ad..0000000000000 --- a/cli/internal/taskhash/taskhash_test.go +++ /dev/null @@ -1,138 +0,0 @@ -package taskhash - -import ( - "path/filepath" - "strings" - "testing" - - "github.com/vercel/turbo/cli/internal/fs" - "github.com/vercel/turbo/cli/internal/turbopath" -) - -func Test_manuallyHashPackage(t *testing.T) { - rootIgnore := strings.Join([]string{ - "ignoreme", - "ignorethisdir/", - }, "\n") - pkgIgnore := strings.Join([]string{ - "pkgignoreme", - "pkgignorethisdir/", - }, "\n") - root := t.TempDir() - repoRoot := turbopath.AbsoluteSystemPathFromUpstream(root) - pkgName := turbopath.AnchoredUnixPath("child-dir/libA").ToSystemPath() - type fileHash struct { - contents string - hash string - } - files := map[turbopath.AnchoredUnixPath]fileHash{ - "top-level-file": {"top-level-file-contents", ""}, - "other-dir/other-dir-file": {"other-dir-file-contents", ""}, - "ignoreme": {"anything", ""}, - "child-dir/libA/some-file": {"some-file-contents", "7e59c6a6ea9098c6d3beb00e753e2c54ea502311"}, - "child-dir/libA/some-dir/other-file": {"some-file-contents", "7e59c6a6ea9098c6d3beb00e753e2c54ea502311"}, - "child-dir/libA/some-dir/another-one": {"some-file-contents", "7e59c6a6ea9098c6d3beb00e753e2c54ea502311"}, - "child-dir/libA/some-dir/excluded-file": {"some-file-contents", "7e59c6a6ea9098c6d3beb00e753e2c54ea502311"}, - "child-dir/libA/ignoreme": {"anything", ""}, - "child-dir/libA/ignorethisdir/anything": {"anything", ""}, - "child-dir/libA/pkgignoreme": {"anything", ""}, - "child-dir/libA/pkgignorethisdir/file": {"anything", ""}, - } - - rootIgnoreFile, err := repoRoot.Join(".gitignore").Create() - if err != nil { - t.Fatalf("failed to create .gitignore: %v", err) - } - _, err = rootIgnoreFile.WriteString(rootIgnore) - if err != nil { - t.Fatalf("failed to write contents to .gitignore: %v", err) - } - rootIgnoreFile.Close() - pkgIgnoreFilename := pkgName.RestoreAnchor(repoRoot).Join(".gitignore") - err = pkgIgnoreFilename.EnsureDir() - if err != nil { - t.Fatalf("failed to ensure directories for %v: %v", pkgIgnoreFilename, err) - } - pkgIgnoreFile, err := pkgIgnoreFilename.Create() - if err != nil { - t.Fatalf("failed to create libA/.gitignore: %v", err) - } - _, err = pkgIgnoreFile.WriteString(pkgIgnore) - if err != nil { - t.Fatalf("failed to write contents to libA/.gitignore: %v", err) - } - pkgIgnoreFile.Close() - for path, spec := range files { - filename := path.ToSystemPath().RestoreAnchor(repoRoot) - err = filename.EnsureDir() - if err != nil { - t.Fatalf("failed to ensure directories for %v: %v", filename, err) - } - f, err := filename.Create() - if err != nil { - t.Fatalf("failed to create file: %v: %v", filename, err) - } - _, err = f.WriteString(spec.contents) - if err != nil { - t.Fatalf("failed to write contents to %v: %v", filename, err) - } - f.Close() - } - // now that we've created the repo, expect our .gitignore file too - files[turbopath.AnchoredUnixPath("child-dir/libA/.gitignore")] = fileHash{contents: "", hash: "3237694bc3312ded18386964a855074af7b066af"} - - pkg := &fs.PackageJSON{ - Dir: pkgName, - } - hashes, err := manuallyHashPackage(pkg, []string{}, repoRoot) - if err != nil { - t.Fatalf("failed to calculate manual hashes: %v", err) - } - - count := 0 - for path, spec := range files { - systemPath := path.ToSystemPath() - if systemPath.HasPrefix(pkgName) { - relPath := systemPath[len(pkgName)+1:] - got, ok := hashes[relPath.ToUnixPath()] - if !ok { - if spec.hash != "" { - t.Errorf("did not find hash for %v, but wanted one", path) - } - } else if got != spec.hash { - t.Errorf("hash of %v, got %v want %v", path, got, spec.hash) - } else { - count++ - } - } - } - if count != len(hashes) { - t.Errorf("found extra hashes in %v", hashes) - } - - count = 0 - justFileHashes, err := manuallyHashPackage(pkg, []string{filepath.FromSlash("**/*file"), "!" + filepath.FromSlash("some-dir/excluded-file")}, repoRoot) - if err != nil { - t.Fatalf("failed to calculate manual hashes: %v", err) - } - for path, spec := range files { - systemPath := path.ToSystemPath() - if systemPath.HasPrefix(pkgName) { - shouldInclude := strings.HasSuffix(systemPath.ToString(), "file") && !strings.HasSuffix(systemPath.ToString(), "excluded-file") - relPath := systemPath[len(pkgName)+1:] - got, ok := justFileHashes[relPath.ToUnixPath()] - if !ok && shouldInclude { - if spec.hash != "" { - t.Errorf("did not find hash for %v, but wanted one", path) - } - } else if shouldInclude && got != spec.hash { - t.Errorf("hash of %v, got %v want %v", path, got, spec.hash) - } else if shouldInclude { - count++ - } - } - } - if count != len(justFileHashes) { - t.Errorf("found extra hashes in %v", hashes) - } -}