From 0a35b8ddb2148e7b38ae628fae579db48f2a8002 Mon Sep 17 00:00:00 2001 From: Greg Soltis Date: Wed, 26 Apr 2023 11:21:22 -0700 Subject: [PATCH 01/10] Implement file hashing, except when inputs are present --- Cargo.lock | 10 +- cli/internal/ffi/bindings.h | 2 + crates/turbopath/Cargo.toml | 1 + .../turbopath/src/absolute_system_path_buf.rs | 21 ++ .../turbopath/src/anchored_system_path_buf.rs | 31 ++- crates/turbopath/src/lib.rs | 15 +- .../turbopath/src/relative_system_path_buf.rs | 3 +- crates/turbopath/src/relative_unix_path.rs | 41 +++- .../turbopath/src/relative_unix_path_buf.rs | 174 ++++++++++++---- crates/turborepo-ffi/src/lib.rs | 23 +++ .../src/daemon/file_descriptor_set.bin | Bin 0 -> 2490 bytes crates/turborepo-scm/Cargo.toml | 1 + .../fixtures/01-git-hash-object/.gitignore | 2 + .../01-git-hash-object/child/child.json | 0 .../child/grandchild/grandchild.json | 0 .../fixtures/01-git-hash-object/root.json | 0 crates/turborepo-scm/src/hash_object.rs | 194 ++++++++++++++++++ crates/turborepo-scm/src/lib.rs | 4 + crates/turborepo-scm/src/ls_tree.rs | 135 ++++++++++++ crates/turborepo-scm/src/package_deps.rs | 168 +++++++++++++++ crates/turborepo-scm/src/status.rs | 156 ++++++++++++++ 21 files changed, 920 insertions(+), 61 deletions(-) create mode 100644 crates/turborepo-lib/src/daemon/file_descriptor_set.bin create mode 100644 crates/turborepo-scm/fixtures/01-git-hash-object/.gitignore create mode 100644 crates/turborepo-scm/fixtures/01-git-hash-object/child/child.json create mode 100644 crates/turborepo-scm/fixtures/01-git-hash-object/child/grandchild/grandchild.json create mode 100644 crates/turborepo-scm/fixtures/01-git-hash-object/root.json create mode 100644 crates/turborepo-scm/src/hash_object.rs create mode 100644 crates/turborepo-scm/src/ls_tree.rs create mode 100644 crates/turborepo-scm/src/package_deps.rs create mode 100644 crates/turborepo-scm/src/status.rs diff --git a/Cargo.lock b/Cargo.lock index 7dd9af35c7852..34c4d5b279a9d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -747,9 +747,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ffdb39cb703212f3c11973452c2861b972f757b021158f3516ba10f2fa8b2c1" +checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" dependencies = [ "memchr", "once_cell", @@ -9028,6 +9028,7 @@ dependencies = [ name = "turbopath" version = "0.1.0" dependencies = [ + "bstr", "path-slash", "serde", "thiserror", @@ -9170,6 +9171,7 @@ dependencies = [ "anyhow", "dunce", "git2 0.16.1", + "nom", "tempfile", "thiserror", "turbopath", @@ -9181,8 +9183,8 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 0.1.10", - "rand 0.4.6", + "cfg-if 1.0.0", + "rand 0.8.5", "static_assertions", ] diff --git a/cli/internal/ffi/bindings.h b/cli/internal/ffi/bindings.h index 923145d3100ef..381e53c6668ff 100644 --- a/cli/internal/ffi/bindings.h +++ b/cli/internal/ffi/bindings.h @@ -18,6 +18,8 @@ struct Buffer previous_content(struct Buffer buffer); struct Buffer recursive_copy(struct Buffer buffer); +struct Buffer recursive_copy(struct Buffer buffer); + struct Buffer transitive_closure(struct Buffer buf); struct Buffer subgraph(struct Buffer buf); diff --git a/crates/turbopath/Cargo.toml b/crates/turbopath/Cargo.toml index b6fb4d764961d..df579a0bc71eb 100644 --- a/crates/turbopath/Cargo.toml +++ b/crates/turbopath/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bstr = "1.4.0" path-slash = "0.2.1" # TODO: Make this a crate feature serde = { workspace = true } diff --git a/crates/turbopath/src/absolute_system_path_buf.rs b/crates/turbopath/src/absolute_system_path_buf.rs index 3cc18b348ef41..d119ba9e6dd74 100644 --- a/crates/turbopath/src/absolute_system_path_buf.rs +++ b/crates/turbopath/src/absolute_system_path_buf.rs @@ -62,6 +62,10 @@ impl AbsoluteSystemPathBuf { Ok(AbsoluteSystemPathBuf(system_path)) } + pub fn new_unchecked(raw: impl Into) -> Self { + Self(raw.into()) + } + /// Anchors `path` at `self`. /// /// # Arguments @@ -163,6 +167,14 @@ impl AbsoluteSystemPathBuf { AbsoluteSystemPathBuf(self.0.join(Path::new(segment))) } + pub fn join_unix_path_literal>( + &self, + unix_path: S, + ) -> Result { + let tail = Path::new(unix_path.as_ref()).into_system()?; + Ok(AbsoluteSystemPathBuf(self.0.join(tail))) + } + pub fn ensure_dir(&self) -> Result<(), io::Error> { if let Some(parent) = self.0.parent() { fs::create_dir_all(parent) @@ -171,6 +183,10 @@ impl AbsoluteSystemPathBuf { } } + pub fn create_dir(&self) -> Result<(), io::Error> { + fs::create_dir_all(self.0.as_path()) + } + pub fn remove(&self) -> Result<(), io::Error> { fs::remove_file(self.0.as_path()) } @@ -245,6 +261,11 @@ impl AbsoluteSystemPathBuf { pub fn open(&self) -> Result { Ok(fs::File::open(&self.0)?) } + + pub fn to_realpath(&self) -> Result { + let realpath = fs::canonicalize(&self.0)?; + Ok(Self(realpath)) + } } impl From for PathBuf { diff --git a/crates/turbopath/src/anchored_system_path_buf.rs b/crates/turbopath/src/anchored_system_path_buf.rs index ab309a134f055..a3e81b59407c4 100644 --- a/crates/turbopath/src/anchored_system_path_buf.rs +++ b/crates/turbopath/src/anchored_system_path_buf.rs @@ -2,7 +2,9 @@ use std::path::{Path, PathBuf}; use serde::{Deserialize, Serialize}; -use crate::{AbsoluteSystemPathBuf, IntoSystem, PathError, PathValidationError}; +use crate::{ + AbsoluteSystemPathBuf, IntoSystem, PathError, PathValidationError, RelativeUnixPathBuf, +}; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Serialize, Deserialize)] pub struct AnchoredSystemPathBuf(PathBuf); @@ -12,7 +14,8 @@ impl TryFrom<&Path> for AnchoredSystemPathBuf { fn try_from(path: &Path) -> Result { if path.is_absolute() { - return Err(PathValidationError::NotRelative(path.to_path_buf()).into()); + let bad_path = path.display().to_string(); + return Err(PathValidationError::NotRelative(bad_path).into()); } Ok(AnchoredSystemPathBuf(path.into_system()?)) @@ -33,6 +36,12 @@ impl AnchoredSystemPathBuf { Ok(AnchoredSystemPathBuf(stripped_path)) } + pub fn from_raw>(raw: P) -> Result { + let system_path = raw.as_ref(); + let system_path = system_path.into_system()?; + Ok(Self(system_path)) + } + pub fn as_path(&self) -> &Path { self.0.as_path() } @@ -42,6 +51,24 @@ impl AnchoredSystemPathBuf { .to_str() .ok_or_else(|| PathValidationError::InvalidUnicode(self.0.clone()).into()) } + + pub fn to_unix(&self) -> Result { + #[cfg(unix)] + { + use std::os::unix::ffi::OsStrExt; + let bytes = self.0.as_os_str().as_bytes(); + return RelativeUnixPathBuf::new(bytes); + } + #[cfg(not(unix))] + { + use crate::IntoUnix; + let unix_buf = self.0.as_path().into_unix()?; + let unix_str = unix_buf + .to_str() + .ok_or_else(|| PathValidationError::InvalidUnicode(unix_buf.clone()))?; + return RelativeUnixPathBuf::new(unix_str.as_bytes()); + } + } } impl From for PathBuf { diff --git a/crates/turbopath/src/lib.rs b/crates/turbopath/src/lib.rs index c5bca98cd60a1..17dd95e702b2c 100644 --- a/crates/turbopath/src/lib.rs +++ b/crates/turbopath/src/lib.rs @@ -8,7 +8,7 @@ mod relative_unix_path_buf; use std::{ io, - path::{Path, PathBuf}, + path::{Path, PathBuf, StripPrefixError}, }; pub use absolute_system_path_buf::AbsoluteSystemPathBuf; @@ -24,6 +24,10 @@ pub enum PathError { PathValidationError(#[from] PathValidationError), #[error("IO Error {0}")] IO(#[from] io::Error), + #[error("Path prefix error: {0}")] + PrefixError(#[from] StripPrefixError), + #[error("Invalid UTF8: {0}")] + Utf8Error(#[from] bstr::Utf8Error), } impl PathError { @@ -43,11 +47,18 @@ pub enum PathValidationError { #[error("Path is not absolute: {0}")] NotAbsolute(PathBuf), #[error("Path is not relative: {0}")] - NotRelative(PathBuf), + NotRelative(String), #[error("Path {0} is not parent of {1}")] NotParent(String, String), #[error("Path {0} is not a unix path")] NotUnix(String), + #[error("{0} is not a prefix for {1}")] + PrefixError(String, String), +} + +pub(crate) fn not_relative_error(bytes: &[u8]) -> PathValidationError { + let s = String::from_utf8_lossy(bytes).to_string(); + PathValidationError::NotRelative(s) } trait IntoSystem { diff --git a/crates/turbopath/src/relative_system_path_buf.rs b/crates/turbopath/src/relative_system_path_buf.rs index ef47cc6e24622..1d9fbcc9faef1 100644 --- a/crates/turbopath/src/relative_system_path_buf.rs +++ b/crates/turbopath/src/relative_system_path_buf.rs @@ -36,7 +36,8 @@ impl RelativeSystemPathBuf { pub fn new(unchecked_path: impl Into) -> Result { let unchecked_path = unchecked_path.into(); if unchecked_path.is_absolute() { - return Err(PathValidationError::NotRelative(unchecked_path)); + let bad_path = unchecked_path.display().to_string(); + return Err(PathValidationError::NotRelative(bad_path)); } let system_path = unchecked_path.into_system()?; diff --git a/crates/turbopath/src/relative_unix_path.rs b/crates/turbopath/src/relative_unix_path.rs index a8adfc244d016..c53d49963dd6c 100644 --- a/crates/turbopath/src/relative_unix_path.rs +++ b/crates/turbopath/src/relative_unix_path.rs @@ -1,25 +1,48 @@ -use std::path::Path; +use std::path::PathBuf; -use crate::{IntoSystem, PathError, PathValidationError, RelativeSystemPathBuf}; +use bstr::BStr; +use crate::{not_relative_error, PathError, RelativeSystemPathBuf}; + +#[repr(transparent)] pub struct RelativeUnixPath { - inner: Path, + inner: BStr, } impl RelativeUnixPath { - pub fn new>(value: &P) -> Result<&Self, PathError> { + pub fn new>(value: &P) -> Result<&Self, PathError> { let path = value.as_ref(); - if path.is_absolute() { - return Err(PathValidationError::NotRelative(path.to_owned()).into()); + if path[0] == b'/' { + return Err(not_relative_error(path).into()); } // copied from stdlib path.rs: relies on the representation of // RelativeUnixPath being just a Path, the same way Path relies on // just being an OsStr - Ok(unsafe { &*(path as *const Path as *const Self) }) + Ok(unsafe { &*(path as *const BStr as *const Self) }) } pub fn to_system_path(&self) -> Result { - let system_path = self.inner.into_system()?; - Ok(RelativeSystemPathBuf::new_unchecked(system_path)) + #[cfg(unix)] + { + // On unix, unix paths are already system paths. Copy the bytes + // but skip validation. + use std::{ffi::OsString, os::unix::prelude::OsStringExt}; + let path = PathBuf::from(OsString::from_vec(self.inner.to_vec())); + Ok(RelativeSystemPathBuf::new_unchecked(path)) + } + + #[cfg(windows)] + { + let system_path_bytes = self + .inner + .iter() + .map(|byte| if *byte == b'/' { b'\\' } else { *byte }) + .collect::>(); + // Is this safe to do? We think we have utf8 bytes or bytes that roundtrip + // through utf8 + let system_path_string = unsafe { String::from_utf8_unchecked(system_path_bytes) }; + let system_path_buf = PathBuf::from(system_path_string); + Ok(RelativeSystemPathBuf::new_unchecked(system_path_buf)) + } } } diff --git a/crates/turbopath/src/relative_unix_path_buf.rs b/crates/turbopath/src/relative_unix_path_buf.rs index 95306ee28b763..b2ed58d813d98 100644 --- a/crates/turbopath/src/relative_unix_path_buf.rs +++ b/crates/turbopath/src/relative_unix_path_buf.rs @@ -1,75 +1,163 @@ -use std::path::PathBuf; +use std::{fmt::Debug, io::Write}; -use serde::Serialize; +use bstr::{BString, ByteSlice}; -use crate::{IntoUnix, PathValidationError}; +use crate::{not_relative_error, PathError, PathValidationError}; -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Serialize)] -pub struct RelativeUnixPathBuf(PathBuf); +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub struct RelativeUnixPathBuf(BString); impl RelativeUnixPathBuf { - /// Create a new RelativeUnixPathBuf from a PathBuf by calling `into_unix()` - /// - /// NOTE: `into_unix` *only* converts Windows paths to Unix paths *on* a - /// Windows system. Do not pass a Windows path on a Unix system and - /// assume it'll be converted. - /// - /// # Arguments - /// - /// * `path`: - /// - /// returns: Result - /// - /// # Examples - /// - /// ``` - /// ``` - pub fn new(path: impl Into) -> Result { - let path = path.into(); - if path.is_absolute() { - return Err(PathValidationError::NotRelative(path)); + pub fn new(path: impl Into>) -> Result { + let bytes: Vec = path.into(); + if !bytes.is_empty() && bytes[0] == b'/' { + return Err(not_relative_error(&bytes).into()); } + Ok(Self(BString::new(bytes))) + } + + pub fn as_str(&self) -> Result<&str, PathError> { + let s = self.0.to_str()?; + Ok(s) + } + + // write_escaped_bytes writes this path to the given writer in the form + // "", where escaped_path is the path with '"' and '\n' + // characters escaped with '\'. + pub fn write_escapted_bytes(&self, writer: &mut W) -> Result<(), PathError> { + writer.write_all(&[b'\"'])?; + let mut i: usize = 0; + while i < self.0.len() { + if let Some(mut index) = self.0[i..] + .iter() + .position(|byte| *byte == b'\"' || *byte == b'\n') + { + // renormalize the index into the byte vector + index += i; + writer.write_all(&self.0[i..index])?; + let byte = self.0[index]; + if byte == b'\"' { + writer.write_all(&[b'\\', b'\"'])?; + } else { + writer.write_all(&[b'\\', b'\n'])?; + } + i = index + 1; + } else { + writer.write_all(&self.0)?; + i = self.0.len(); + } + } + writer.write_all(&[b'\"'])?; + Ok(()) + } + + pub fn strip_prefix(&self, prefix: &RelativeUnixPathBuf) -> Result { + let prefix_len = prefix.0.len(); + if prefix_len == 0 { + return Ok(self.clone()); + } + if !self.0.starts_with(&prefix.0) { + return Err(PathError::PathValidationError( + PathValidationError::NotParent(prefix.0.to_string(), self.0.to_string()), + )); + } + if self.0[prefix_len] != b'/' { + let prefix_str = prefix.as_str().unwrap_or("invalid utf8").to_string(); + let this = self.as_str().unwrap_or("invalid utf8").to_string(); + return Err(PathError::PathValidationError( + PathValidationError::PrefixError(prefix_str, this), + )); + } + let tail_slice = &self.0[(prefix_len + 1)..]; + Self::new(tail_slice) + } - Ok(RelativeUnixPathBuf(path.into_unix()?)) + pub fn join(&self, tail: &RelativeUnixPathBuf) -> Self { + let buffer = Vec::with_capacity(self.0.len() + 1 + tail.0.len()); + let mut path = BString::new(buffer); + if self.0.len() > 0 { + path.extend_from_slice(&self.0); + path.push(b'/'); + } + path.extend_from_slice(&tail.0); + Self(path) } +} - pub fn to_str(&self) -> Result<&str, PathValidationError> { - self.0 - .to_str() - .ok_or_else(|| PathValidationError::InvalidUnicode(self.0.clone())) +impl Debug for RelativeUnixPathBuf { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.as_str() { + Ok(s) => write!(f, "{}", s), + Err(_) => write!(f, "Non-utf8 {:?}", self.0), + } } } #[cfg(test)] mod tests { + use std::io::BufWriter; + #[cfg(windows)] use std::path::Path; use super::*; #[test] fn test_relative_unix_path_buf() { - let path = RelativeUnixPathBuf::new(PathBuf::from("foo/bar")).unwrap(); - assert_eq!(path.to_str().unwrap(), "foo/bar"); + let path = RelativeUnixPathBuf::new("foo/bar").unwrap(); + assert_eq!(path.as_str().unwrap(), "foo/bar"); } #[test] fn test_relative_unix_path_buf_with_extension() { - let path = RelativeUnixPathBuf::new(PathBuf::from("foo/bar.txt")).unwrap(); - assert_eq!(path.to_str().unwrap(), "foo/bar.txt"); + let path = RelativeUnixPathBuf::new("foo/bar.txt").unwrap(); + assert_eq!(path.as_str().unwrap(), "foo/bar.txt"); } #[test] - fn test_relative_unix_path_buf_errors() { - #[cfg(not(windows))] - assert!(RelativeUnixPathBuf::new(PathBuf::from("/foo/bar")).is_err()); - #[cfg(windows)] - assert!(RelativeUnixPathBuf::new(PathBuf::from("C:\\foo\\bar")).is_err()); + fn test_join() { + let head = RelativeUnixPathBuf::new("some/path").unwrap(); + let tail = RelativeUnixPathBuf::new("child/leaf").unwrap(); + let combined = head.join(&tail); + assert_eq!(combined.as_str().unwrap(), "some/path/child/leaf"); + } + + #[test] + fn test_strip_prefix() { + let combined = RelativeUnixPathBuf::new("some/path/child/leaf").unwrap(); + let head = RelativeUnixPathBuf::new("some/path").unwrap(); + let expected = RelativeUnixPathBuf::new("child/leaf").unwrap(); + let tail = combined.strip_prefix(&head).unwrap(); + assert_eq!(tail, expected); + } + + #[test] + fn test_strip_empty_prefix() { + let combined = RelativeUnixPathBuf::new("some/path").unwrap(); + let tail = combined + .strip_prefix(&RelativeUnixPathBuf::new("").unwrap()) + .unwrap(); + assert_eq!(tail, combined); } - #[cfg(windows)] #[test] - fn test_convert_from_windows_path() { - let path = RelativeUnixPathBuf::new(PathBuf::from("foo\\bar")).unwrap(); - assert_eq!(path.0.as_path(), Path::new("foo/bar")); + fn test_write_escaped() { + let input = "\"quote\"\nnewline\n".as_bytes(); + let expected = "\"\\\"quote\\\"\\\nnewline\\\n\"".as_bytes(); + let mut buffer = Vec::new(); + { + let mut writer = BufWriter::new(&mut buffer); + let path = RelativeUnixPathBuf::new(input).unwrap(); + path.write_escapted_bytes(&mut writer).unwrap(); + } + assert_eq!(buffer.as_slice(), expected); + } + + #[test] + fn test_relative_unix_path_buf_errors() { + assert!(RelativeUnixPathBuf::new("/foo/bar").is_err()); + // Note: this shouldn't be an error, this is a valid relative unix path + // #[cfg(windows)] + // assert!(RelativeUnixPathBuf::new(PathBuf::from("C:\\foo\\bar")). + // is_err()); } } diff --git a/crates/turborepo-ffi/src/lib.rs b/crates/turborepo-ffi/src/lib.rs index f761a427ca995..9e37736159b65 100644 --- a/crates/turborepo-ffi/src/lib.rs +++ b/crates/turborepo-ffi/src/lib.rs @@ -163,3 +163,26 @@ pub extern "C" fn recursive_copy(buffer: Buffer) -> Buffer { }; response.into() } + +#[no_mangle] +pub extern "C" fn recursive_copy(buffer: Buffer) -> Buffer { + let req: proto::RecursiveCopyRequest = match buffer.into_proto() { + Ok(req) => req, + Err(err) => { + let resp = proto::RecursiveCopyResponse { + error: Some(err.to_string()), + }; + return resp.into(); + } + }; + let response = match turborepo_fs::recursive_copy( + &AbsoluteSystemPathBuf::new_unchecked(req.src), + &AbsoluteSystemPathBuf::new_unchecked(req.dst), + ) { + Ok(()) => proto::RecursiveCopyResponse { error: None }, + Err(e) => proto::RecursiveCopyResponse { + error: Some(e.to_string()), + }, + }; + response.into() +} diff --git a/crates/turborepo-lib/src/daemon/file_descriptor_set.bin b/crates/turborepo-lib/src/daemon/file_descriptor_set.bin new file mode 100644 index 0000000000000000000000000000000000000000..64e2f19fbe7d92114a4c5e612a5f167172518ab8 GIT binary patch literal 2490 zcmbVNU2hsk6kTSQW%jZ*Fu@q(FIX(c;2787wo;XcDoyLgk{i`&6{!z(F>C+{SjW3G z?O*6q|4n~D|5(q=&ba=n6!mS-z4x5^$v}TSAwOAMUq-{u)pe9aVmU<$45DdpLjJem zbQ+z9?-ya5h$`iN4X@)#G_PjrnLY2^K(R>_hq3frOommaoSwG48U_Vg(n9f7G>^le zM1?owMKX*&%(ZwRsQ9l*hk^Gd=_iZW1fA2eF{B~lOY(>Pa2Cx|yDBW--RRtHrStMu z%=sNg@JEK$zl)N|yWd|glB-1$zrCI$NoWca4e}%9xEM{N%NQAKw4P6GC(ygvtU4j_V70eTw6=MF@ptY{oDnxiCbsW~8jqf5 zt^VHudO9cMre`M}Et z$Fhzy#3|zo+p>iG;=*RuDS5ng*B0KGQA+n&xJ$`iFip7*FOS(li^AdADW|B=Z}e z8zfa;{H!Z+gzdAp+U0=eyQ>C)=C2(Y1e*W+M>$3ZV=P!gjnR>_D)6$DbJQ4%Y(-Ke z$O^m2xIqXkdPRdESA1e8$Rk=$?29^1W zGNbB>Ra0g;*$PjiL9ANq$}Fc-5~+s;TA>w99u?zg#$b)_&SAhkcLNRZcGZZRvgikh7(80wHHh z=L7<+rTYa!&X(?%%W~SfUjWeBx?dpa)BOU0)<(Z_rJisOti!Y>4^aoap=$DEO&zwc zx&uH>9bFR;#5=ksAkaEu*F-|R(`qYm63&r)jpQ*BqK@=0f~0ttRTKpPa$Q|35CXei px?mv4b;~QNCXy}on0j*@0ML3jG}&U$D{D#FVsEv2>8i)E_YVRh-q8R6 literal 0 HcmV?d00001 diff --git a/crates/turborepo-scm/Cargo.toml b/crates/turborepo-scm/Cargo.toml index 9c40f507ce844..d57355a4f37d9 100644 --- a/crates/turborepo-scm/Cargo.toml +++ b/crates/turborepo-scm/Cargo.toml @@ -10,6 +10,7 @@ license = "MPL-2.0" anyhow = { workspace = true } dunce = { workspace = true } git2 = { version = "0.16.1", default-features = false } +nom = "7.1.3" thiserror = { workspace = true } turbopath = { workspace = true } diff --git a/crates/turborepo-scm/fixtures/01-git-hash-object/.gitignore b/crates/turborepo-scm/fixtures/01-git-hash-object/.gitignore new file mode 100644 index 0000000000000..d8e19507425ea --- /dev/null +++ b/crates/turborepo-scm/fixtures/01-git-hash-object/.gitignore @@ -0,0 +1,2 @@ +"quote" +new*line diff --git a/crates/turborepo-scm/fixtures/01-git-hash-object/child/child.json b/crates/turborepo-scm/fixtures/01-git-hash-object/child/child.json new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/crates/turborepo-scm/fixtures/01-git-hash-object/child/grandchild/grandchild.json b/crates/turborepo-scm/fixtures/01-git-hash-object/child/grandchild/grandchild.json new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/crates/turborepo-scm/fixtures/01-git-hash-object/root.json b/crates/turborepo-scm/fixtures/01-git-hash-object/root.json new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/crates/turborepo-scm/src/hash_object.rs b/crates/turborepo-scm/src/hash_object.rs new file mode 100644 index 0000000000000..34c77708b540e --- /dev/null +++ b/crates/turborepo-scm/src/hash_object.rs @@ -0,0 +1,194 @@ +use std::{ + backtrace::Backtrace, + io::{BufWriter, Read, Write}, + panic, + process::{Command, Stdio}, + thread, +}; + +use anyhow::{anyhow, Result}; +use nom::{Finish, IResult}; +use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; + +use crate::{package_deps::GitHashes, Error}; + +pub(crate) fn hash_objects( + pkg_path: &AbsoluteSystemPathBuf, + to_hash: Vec, + pkg_prefix: &RelativeUnixPathBuf, + hashes: &mut GitHashes, +) -> Result<()> { + if to_hash.is_empty() { + return Ok(()); + } + let mut git = Command::new("git") + .args(["hash-object", "--stdin-paths"]) + .current_dir(pkg_path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .stdin(Stdio::piped()) + .spawn()?; + { + let stdout = git + .stdout + .as_mut() + .ok_or_else(|| anyhow!("failed to get stdout for git hash-object"))?; + // We take, rather than borrow, stdin so that we can drop it and force the + // underlying file descriptor to close, signalling the end of input. + let stdin: std::process::ChildStdin = git + .stdin + .take() + .ok_or_else(|| anyhow!("failed to get stdin for git hash-object"))?; + let mut stderr = git + .stderr + .take() + .ok_or_else(|| anyhow!("failed to get stderr for git hash-object"))?; + let result = read_object_hashes(stdout, stdin, &to_hash, pkg_prefix, hashes); + if result.is_err() { + let mut buf = String::new(); + let bytes_read = stderr.read_to_string(&mut buf)?; + if bytes_read > 0 { + // something failed with git, report that error + return Err(Error::Git(buf, Backtrace::capture()).into()); + } + } + result?; + } + git.wait()?; + Ok(()) +} + +fn read_object_hashes( + mut reader: R, + writer: W, + to_hash: &Vec, + pkg_prefix: &RelativeUnixPathBuf, + hashes: &mut GitHashes, +) -> Result<()> { + thread::scope(move |scope| -> Result<()> { + let write_thread = scope.spawn(move || -> Result<()> { + let mut writer = BufWriter::new(writer); + for path in to_hash { + path.write_escapted_bytes(&mut writer)?; + writer.write_all(&[b'\n'])?; + writer.flush()?; + } + // writer is dropped here, closing stdin + Ok(()) + }); + let mut i: usize = 0; + let mut buffer: [u8; 41] = [0; 41]; + loop { + if i == to_hash.len() { + break; + } + reader.read_exact(&mut buffer)?; + { + let hash = parse_hash_object(&buffer)?; + let hash = String::from_utf8(hash.to_vec())?; + let filename = &(to_hash[i]); + let path = filename.strip_prefix(pkg_prefix)?; + hashes.insert(path, hash); + } + i += 1; + } + match write_thread.join() { + // the error case is if the thread panic'd. In that case, we propagate + // the panic, since we aren't going to handle it. + Err(e) => panic::resume_unwind(e), + Ok(result) => result, + } + })?; + Ok(()) +} + +fn parse_hash_object(i: &[u8]) -> Result<&[u8]> { + match nom_parse_hash_object(i).finish() { + Ok((_, hash)) => Ok(hash), + Err(e) => Err(anyhow!( + "failed to parse git-hash-object {}", + std::str::from_utf8(e.input)? + )), + } +} + +fn nom_parse_hash_object(i: &[u8]) -> IResult<&[u8], &[u8]> { + let (i, hash) = nom::bytes::complete::take(40usize)(i)?; + let (i, _) = nom::bytes::complete::tag(&[b'\n'])(i)?; + Ok((i, hash)) +} + +#[cfg(test)] +mod test { + use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; + + use super::hash_objects; + use crate::package_deps::{find_git_root, GitHashes}; + + #[test] + fn test_read_object_hashes() { + // Note that cwd can be different based on where the test suite is running from + // or if the test is launched in debug mode from VSCode + let cwd = std::env::current_dir().unwrap(); + let cwd = AbsoluteSystemPathBuf::new(cwd).unwrap(); + let git_root = find_git_root(&cwd).unwrap(); + let fixture_path = git_root + .join_unix_path_literal("crates/turborepo-scm/fixtures/01-git-hash-object") + .unwrap(); + + let fixture_child_path = fixture_path.join_literal("child"); + let git_root = find_git_root(&fixture_path).unwrap(); + + // paths for files here are relative to the package path. + let tests: Vec<(Vec<(&str, &str)>, &AbsoluteSystemPathBuf)> = vec![ + (vec![], &fixture_path), + ( + vec![ + ("../root.json", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + ("child.json", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + ( + "grandchild/grandchild.json", + "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", + ), + ], + &fixture_child_path, + ), + ]; + + for (to_hash, pkg_path) in tests { + let file_hashes: Vec<(RelativeUnixPathBuf, String)> = to_hash + .into_iter() + .map(|(raw, hash)| (RelativeUnixPathBuf::new(raw).unwrap(), String::from(hash))) + .collect(); + + let git_to_pkg_path = git_root.anchor(pkg_path).unwrap(); + let pkg_prefix = git_to_pkg_path.to_unix().unwrap(); + + let expected_hashes = GitHashes::from_iter(file_hashes.into_iter()); + let mut hashes = GitHashes::new(); + let to_hash = expected_hashes.keys().map(|k| pkg_prefix.join(k)).collect(); + hash_objects(&pkg_path, to_hash, &pkg_prefix, &mut hashes).unwrap(); + assert_eq!(hashes, expected_hashes); + } + + // paths for files here are relative to the package path. + let error_tests: Vec<(Vec<&str>, &AbsoluteSystemPathBuf)> = vec![ + // skipping test for outside of git repo, we now error earlier in the process + (vec!["nonexistent.json"], &fixture_path), + ]; + + for (to_hash, pkg_path) in error_tests { + let git_to_pkg_path = git_root.anchor(pkg_path).unwrap(); + let pkg_prefix = git_to_pkg_path.to_unix().unwrap(); + + let to_hash = to_hash + .into_iter() + .map(|k| pkg_prefix.join(&RelativeUnixPathBuf::new(k).unwrap())) + .collect(); + + let mut hashes = GitHashes::new(); + let result = hash_objects(&pkg_path, to_hash, &pkg_prefix, &mut hashes); + assert_eq!(result.is_err(), true); + } + } +} diff --git a/crates/turborepo-scm/src/lib.rs b/crates/turborepo-scm/src/lib.rs index 86abdfc38d4e5..bb96c50bf839b 100644 --- a/crates/turborepo-scm/src/lib.rs +++ b/crates/turborepo-scm/src/lib.rs @@ -8,6 +8,10 @@ use thiserror::Error; use turbopath::PathError; pub mod git; +mod hash_object; +mod ls_tree; +pub mod package_deps; +mod status; #[derive(Debug, Error)] pub enum Error { diff --git a/crates/turborepo-scm/src/ls_tree.rs b/crates/turborepo-scm/src/ls_tree.rs new file mode 100644 index 0000000000000..282156e9fa593 --- /dev/null +++ b/crates/turborepo-scm/src/ls_tree.rs @@ -0,0 +1,135 @@ +use std::{ + io::{BufRead, BufReader, Read}, + process::{Command, Stdio}, +}; + +use anyhow::{anyhow, Result}; +use nom::Finish; +use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; + +use crate::package_deps::GitHashes; + +pub fn git_ls_tree(root_path: &AbsoluteSystemPathBuf) -> Result { + let mut hashes = GitHashes::new(); + let mut git = Command::new("git") + .args(["ls-tree", "-r", "-z", "HEAD"]) + .current_dir(root_path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn()?; + { + let stdout = git + .stdout + .as_mut() + .ok_or_else(|| anyhow!("failed to get stdout for git ls-tree"))?; + read_ls_tree(stdout, &mut hashes)?; + } + git.wait()?; + Ok(hashes) +} + +fn read_ls_tree(reader: R, hashes: &mut GitHashes) -> Result<()> { + let mut reader = BufReader::new(reader); + let mut buffer = Vec::new(); + loop { + buffer.clear(); + { + let bytes_read = reader.read_until(b'\0', &mut buffer)?; + if bytes_read == 0 { + break; + } + { + let entry = parse_ls_tree(&buffer)?; + let hash = String::from_utf8(entry.hash.to_vec())?; + let path = RelativeUnixPathBuf::new(entry.filename)?; + hashes.insert(path, hash); + } + } + } + Ok(()) +} + +struct LsTreeEntry<'a> { + filename: &'a [u8], + hash: &'a [u8], +} + +fn parse_ls_tree(i: &[u8]) -> Result> { + match nom_parse_ls_tree(i).finish() { + Ok((_, entry)) => Ok(entry), + Err(e) => Err(anyhow!("nom: {:?}: {}", e, std::str::from_utf8(e.input)?)), + } +} + +fn nom_parse_ls_tree(i: &[u8]) -> nom::IResult<&[u8], LsTreeEntry<'_>> { + let (i, _) = nom::bytes::complete::is_not(" ")(i)?; + let (i, _) = nom::character::complete::space1(i)?; + let (i, _) = nom::bytes::complete::is_not(" ")(i)?; + let (i, _) = nom::character::complete::space1(i)?; + let (i, hash) = nom::bytes::complete::take(40usize)(i)?; + let (i, _) = nom::bytes::complete::take(1usize)(i)?; + let (i, filename) = nom::bytes::complete::is_not(" \0")(i)?; + Ok((i, LsTreeEntry { filename, hash })) +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use turbopath::RelativeUnixPathBuf; + + use crate::{ls_tree::read_ls_tree, package_deps::GitHashes}; + + fn to_hash_map(pairs: &[(&str, &str)]) -> GitHashes { + HashMap::from_iter(pairs.into_iter().map(|(path, hash)| { + ( + RelativeUnixPathBuf::new(path.as_bytes()).unwrap(), + hash.to_string(), + ) + })) + } + + #[test] + fn test_ls_tree() { + let tests: &[(&str, &[(&str, &str)])] = &[ + ( + "100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\tpackage.json\0", + &[("package.json", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")], + ), + ( + // missing nul byte + "100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\tpackage.json", + &[("package.json", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")], + ), + ( + "100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\t\t\000100644 blob \ + e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\t\"\000100644 blob \ + 5b999efa470b056e329b4c23a73904e0794bdc2f\t\n\000100644 blob \ + f44f57fff95196c5f7139dfa0b96875f1e9650a9\t.gitignore\000100644 blob \ + 33dbaf21275ca2a5f460249d941cbc27d5da3121\tREADME.md\000040000 tree \ + 7360f2d292aec95907cebdcbb412a6bf2bd10f8a\tapps\000100644 blob \ + 9ec2879b24ce2c817296eebe2cb3846f8e4751ea\tpackage.json\000040000 tree \ + 5759aadaea2cde55468a61e7104eb0a9d86c1d30\tpackages\000100644 blob \ + 33d0621ee2f4da4a2f6f6bdd51a42618d181e337\tturbo.json\0", + &[ + ("\t", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + ("\"", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + ("\n", "5b999efa470b056e329b4c23a73904e0794bdc2f"), + (".gitignore", "f44f57fff95196c5f7139dfa0b96875f1e9650a9"), + ("README.md", "33dbaf21275ca2a5f460249d941cbc27d5da3121"), + ("apps", "7360f2d292aec95907cebdcbb412a6bf2bd10f8a"), + ("package.json", "9ec2879b24ce2c817296eebe2cb3846f8e4751ea"), + ("packages", "5759aadaea2cde55468a61e7104eb0a9d86c1d30"), + ("turbo.json", "33d0621ee2f4da4a2f6f6bdd51a42618d181e337"), + ], + ), + ]; + for (input, expected) in tests { + let input_bytes = input.as_bytes(); + let mut hashes = GitHashes::new(); + let expected = to_hash_map(expected); + read_ls_tree(input_bytes, &mut hashes).unwrap(); + assert_eq!(hashes, expected); + } + } +} diff --git a/crates/turborepo-scm/src/package_deps.rs b/crates/turborepo-scm/src/package_deps.rs new file mode 100644 index 0000000000000..9bb66721f26b3 --- /dev/null +++ b/crates/turborepo-scm/src/package_deps.rs @@ -0,0 +1,168 @@ +use std::{collections::HashMap, process::Command}; + +use anyhow::{anyhow, Result}; +use turbopath::{AbsoluteSystemPathBuf, AnchoredSystemPathBuf, RelativeUnixPathBuf}; + +use crate::{hash_object::hash_objects, ls_tree::git_ls_tree, status::append_git_status}; + +pub type GitHashes = HashMap; + +pub fn get_package_deps( + turbo_root: &AbsoluteSystemPathBuf, + package_path: &AnchoredSystemPathBuf, + inputs: &[&str], +) -> Result { + // TODO: memoize git root -> turbo root calculation once we aren't crossing ffi + let git_root = find_git_root(turbo_root)?; + let full_pkg_path = turbo_root.resolve(package_path); + let git_to_pkg_path = git_root.anchor(&full_pkg_path)?; + let pkg_prefix = git_to_pkg_path.to_unix()?; + let result = if inputs.len() == 0 { + let mut hashes = git_ls_tree(&full_pkg_path)?; + // Note: to_hash is *git repo relative* + let to_hash = append_git_status(&full_pkg_path, &pkg_prefix, inputs, &mut hashes)?; + hash_objects(&full_pkg_path, to_hash, &pkg_prefix, &mut hashes)?; + hashes + } else { + let pkg_prefix_str = pkg_prefix.as_str()?; + let mut inputs = inputs.to_vec(); + inputs.push("package.json"); + inputs.push("turbo.json"); + let mut prefixed_input_patterns = vec![]; + let mut prefixed_exclude_patterns = vec![]; + for input in inputs { + if input.starts_with("!") { + let glob = input + .get(1..) + .ok_or_else(|| anyhow!("invalid glob: {}", input))?; + let pkg_glob = format!("{}/{}", pkg_prefix_str, glob); + prefixed_exclude_patterns.push(pkg_glob); + } else { + let pkg_glob = format!("{}/{}", pkg_prefix_str, input); + prefixed_input_patterns.push(pkg_glob); + } + } + let files_to_hash = unimplemented!(); + }; + Ok(result) +} + +pub(crate) fn find_git_root(turbo_root: &AbsoluteSystemPathBuf) -> Result { + let rev_parse = Command::new("git") + .args(["rev-parse", "--show-cdup"]) + .current_dir(turbo_root) + .output()?; + let root = String::from_utf8(rev_parse.stdout)?; + Ok(turbo_root.join_literal(root.trim_end()).to_realpath()?) +} + +#[cfg(test)] +mod tests { + use std::process::Command; + + use super::*; + + fn tmp_dir() -> Result<(tempfile::TempDir, AbsoluteSystemPathBuf)> { + let tmp_dir = tempfile::tempdir()?; + let dir = AbsoluteSystemPathBuf::new(tmp_dir.path().to_path_buf())?.to_realpath()?; + Ok((tmp_dir, dir)) + } + + fn require_git_cmd(repo_root: &AbsoluteSystemPathBuf, args: &[&str]) { + let mut cmd = Command::new("git"); + cmd.args(args).current_dir(repo_root); + assert_eq!(cmd.output().unwrap().status.success(), true); + } + + fn setup_repository(repo_root: &AbsoluteSystemPathBuf) { + let cmds: &[&[&str]] = &[ + &["init", "."], + &["config", "--local", "user.name", "test"], + &["config", "--local", "user.email", "test@example.com"], + ]; + for cmd in cmds { + require_git_cmd(repo_root, cmd); + } + } + + fn commit_all(repo_root: &AbsoluteSystemPathBuf) { + let cmds: &[&[&str]] = &[&["add", "."], &["commit", "-m", "foo"]]; + for cmd in cmds { + require_git_cmd(repo_root, cmd); + } + } + + #[test] + fn test_get_package_deps() -> Result<()> { + // Directory structure: + // / + // new-root-file <- new file not added to git + // my-pkg/ + // committed-file + // deleted-file + // uncommitted-file <- new file not added to git + // dir/ + // nested-file + let (_repo_root_tmp, repo_root) = tmp_dir()?; + let my_pkg_dir = repo_root.join_literal("my-pkg"); + my_pkg_dir.create_dir()?; + + // create file 1 + let committed_file_path = my_pkg_dir.join_literal("committed-file"); + committed_file_path.create_with_contents("committed bytes")?; + + // create file 2 + let deleted_file_path = my_pkg_dir.join_literal("deleted-file"); + deleted_file_path.create_with_contents("delete-me")?; + + // create file 3 + let nested_file_path = my_pkg_dir.join_literal("dir/nested-file"); + nested_file_path.ensure_dir()?; + nested_file_path.create_with_contents("nested")?; + + // create a package.json + let pkg_json_path = my_pkg_dir.join_literal("package.json"); + pkg_json_path.create_with_contents("{}")?; + + setup_repository(&repo_root); + commit_all(&repo_root); + + // remove a file + deleted_file_path.remove()?; + + // create another untracked file in git + let uncommitted_file_path = my_pkg_dir.join_literal("uncommitted-file"); + uncommitted_file_path.create_with_contents("uncommitted bytes")?; + + // create an untracked file in git up a level + let root_file_path = repo_root.join_literal("new-root-file"); + root_file_path.create_with_contents("new-root bytes")?; + + let package_path = AnchoredSystemPathBuf::from_raw("my-pkg")?; + + let expected = to_hash_map(&[ + ("committed-file", "3a29e62ea9ba15c4a4009d1f605d391cdd262033"), + ( + "uncommitted-file", + "4e56ad89387e6379e4e91ddfe9872cf6a72c9976", + ), + ("package.json", "9e26dfeeb6e641a33dae4961196235bdb965b21b"), + ( + "dir/nested-file", + "bfe53d766e64d78f80050b73cd1c88095bc70abb", + ), + ]); + let hashes = get_package_file_hashes_from_git_index(&repo_root, &package_path)?; + assert_eq!(hashes, expected); + Ok(()) + } + + fn to_hash_map(pairs: &[(&str, &str)]) -> GitHashes { + HashMap::from_iter(pairs.into_iter().map(|(path, hash)| { + ( + RelativeUnixPathBuf::new(path.as_bytes()).unwrap(), + hash.to_string(), + ) + })) + } +} diff --git a/crates/turborepo-scm/src/status.rs b/crates/turborepo-scm/src/status.rs new file mode 100644 index 0000000000000..7d65e8d2aa49e --- /dev/null +++ b/crates/turborepo-scm/src/status.rs @@ -0,0 +1,156 @@ +use std::{ + backtrace::Backtrace, + io::{BufRead, BufReader, Read}, + process::{Command, Stdio}, +}; + +use anyhow::{anyhow, Result}; +use nom::Finish; +use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; + +use crate::{package_deps::GitHashes, Error}; + +pub(crate) fn append_git_status( + root_path: &AbsoluteSystemPathBuf, + pkg_prefix: &RelativeUnixPathBuf, + patterns: &[&str], + hashes: &mut GitHashes, +) -> Result> { + let mut args = vec!["status", "--untracked-files", "--no-renames", "-z", "--"]; + if patterns.len() == 0 { + args.push("."); + } else { + let mut patterns = Vec::from(patterns); + args.append(&mut patterns); + } + let mut git = Command::new("git") + .args(args.as_slice()) + .current_dir(root_path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn()?; + let to_hash = { + let stdout = git + .stdout + .as_mut() + .ok_or_else(|| anyhow!("failed to get stdout for git status"))?; + let mut stderr = git + .stderr + .take() + .ok_or_else(|| anyhow!("failed to get stderr for git status"))?; + let result = read_status(stdout, pkg_prefix, hashes); + if result.is_err() { + let mut buf = String::new(); + let bytes_read = stderr.read_to_string(&mut buf)?; + if bytes_read > 0 { + // something failed with git, report that error + return Err(Error::Git(buf, Backtrace::capture()).into()); + } + } + result? + }; + git.wait()?; + Ok(to_hash) +} + +fn read_status( + reader: R, + pkg_prefix: &RelativeUnixPathBuf, + hashes: &mut GitHashes, +) -> Result> { + let mut to_hash = Vec::new(); + let mut reader = BufReader::new(reader); + let mut buffer = Vec::new(); + loop { + buffer.clear(); + { + let bytes_read = reader.read_until(b'\0', &mut buffer)?; + if bytes_read == 0 { + break; + } + { + let entry = parse_status(&buffer)?; + let path = RelativeUnixPathBuf::new(entry.filename)?; + if entry.is_delete { + let path = path.strip_prefix(pkg_prefix)?; + hashes.remove(&path); + } else { + to_hash.push(path); + } + } + } + } + Ok(to_hash) +} + +struct StatusEntry<'a> { + filename: &'a [u8], + is_delete: bool, +} + +fn parse_status(i: &[u8]) -> Result> { + match nom_parse_status(i).finish() { + Ok((_, tup)) => Ok(tup), + Err(e) => Err(anyhow!("nom: {:?} {}", e, std::str::from_utf8(e.input)?)), + } +} + +fn nom_parse_status(i: &[u8]) -> nom::IResult<&[u8], StatusEntry<'_>> { + let (i, x) = nom::bytes::complete::take(1usize)(i)?; + let (i, y) = nom::bytes::complete::take(1usize)(i)?; + let (i, _) = nom::character::complete::space1(i)?; + let (i, filename) = nom::bytes::complete::is_not(" \0")(i)?; + Ok(( + i, + StatusEntry { + filename, + is_delete: x[0] == b'D' || y[0] == b'D', + }, + )) +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use turbopath::RelativeUnixPathBuf; + + use super::read_status; + use crate::package_deps::GitHashes; + + #[test] + fn test_status() { + let tests: &[(&str, &str, (&str, bool))] = &[ + ("AD my-pkg/package.json\0", "my-pkg", ("package.json", true)), + ( + // no trailing NUL + "AD some-pkg/package.json", + "some-pkg", + ("package.json", true), + ), + ("M package.json\0", "", ("package.json", false)), + ("A some-pkg/some-file\0", "some-pkg", ("some-file", false)), + ]; + for (input, prefix, (expected_filename, expect_delete)) in tests { + let prefix = RelativeUnixPathBuf::new(prefix.as_bytes()).unwrap(); + let mut hashes = to_hash_map(&[(expected_filename, "some-hash")]); + let to_hash = read_status(input.as_bytes(), &prefix, &mut hashes).unwrap(); + if *expect_delete { + assert_eq!(hashes.len(), 0, "input: {}", input); + } else { + assert_eq!(to_hash.len(), 1, "input: {}", input); + let expected = prefix.join(&RelativeUnixPathBuf::new(*expected_filename).unwrap()); + assert_eq!(to_hash[0], expected); + } + } + } + + fn to_hash_map(pairs: &[(&str, &str)]) -> GitHashes { + HashMap::from_iter(pairs.into_iter().map(|(path, hash)| { + ( + RelativeUnixPathBuf::new(path.as_bytes()).unwrap(), + hash.to_string(), + ) + })) + } +} From e89a1b3e00428aa3ffd444e4b58cc09367ee5d0d Mon Sep 17 00:00:00 2001 From: Greg Soltis Date: Sat, 6 May 2023 10:13:01 -0700 Subject: [PATCH 02/10] FFI wiring --- cli/internal/ffi/bindings.h | 2 +- cli/internal/ffi/ffi.go | 23 ++ cli/internal/ffi/proto/messages.pb.go | 344 +++++++++++++++--- cli/internal/hashing/package_deps_hash.go | 100 ----- cli/internal/hashing/package_deps_hash_go.go | 112 ++++++ .../hashing/package_deps_hash_rust.go | 22 ++ crates/turborepo-ffi/messages.proto | 16 + crates/turborepo-ffi/src/lib.rs | 87 ++++- crates/turborepo-scm/src/package_deps.rs | 38 +- crates/turborepo-scm/src/status.rs | 17 +- 10 files changed, 561 insertions(+), 200 deletions(-) create mode 100644 cli/internal/hashing/package_deps_hash_go.go create mode 100644 cli/internal/hashing/package_deps_hash_rust.go diff --git a/cli/internal/ffi/bindings.h b/cli/internal/ffi/bindings.h index 381e53c6668ff..61010332dceb1 100644 --- a/cli/internal/ffi/bindings.h +++ b/cli/internal/ffi/bindings.h @@ -18,7 +18,7 @@ struct Buffer previous_content(struct Buffer buffer); struct Buffer recursive_copy(struct Buffer buffer); -struct Buffer recursive_copy(struct Buffer buffer); +struct Buffer get_package_file_hashes_from_git_index(struct Buffer buffer); struct Buffer transitive_closure(struct Buffer buf); diff --git a/cli/internal/ffi/ffi.go b/cli/internal/ffi/ffi.go index b59846fa73bb0..a997ab3fbe88c 100644 --- a/cli/internal/ffi/ffi.go +++ b/cli/internal/ffi/ffi.go @@ -313,3 +313,26 @@ func GlobalChange(packageManager string, prevContents []byte, currContents []byt return resp.GetGlobalChange() } + +// GetPackageFileHashesFromGitIndex proxies to rust to use git to hash the files in a package. +// It does not support additional files, it just hashes the non-ignored files in the package. +func GetPackageFileHashesFromGitIndex(rootPath string, packagePath string) (map[string]string, error) { + req := ffi_proto.GetPackageFileHashesFromGitIndexRequest{ + TurboRoot: rootPath, + PackagePath: packagePath, + } + reqBuf := Marshal(&req) + resBuf := C.get_package_file_hashes_from_git_index(reqBuf) + reqBuf.Free() + + resp := ffi_proto.GetPackageFileHashesFromGitIndexResponse{} + if err := Unmarshal(resBuf, resp.ProtoReflect().Interface()); err != nil { + panic(err) + } + + if err := resp.GetError(); err != "" { + return nil, errors.New(err) + } + hashes := resp.GetHashes() + return hashes.Hashes, nil +} diff --git a/cli/internal/ffi/proto/messages.pb.go b/cli/internal/ffi/proto/messages.pb.go index 553b205c07904..2232633f114bd 100644 --- a/cli/internal/ffi/proto/messages.pb.go +++ b/cli/internal/ffi/proto/messages.pb.go @@ -1662,6 +1662,190 @@ func (x *RecursiveCopyResponse) GetError() string { return "" } +type GetPackageFileHashesFromGitIndexRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + TurboRoot string `protobuf:"bytes,1,opt,name=turbo_root,json=turboRoot,proto3" json:"turbo_root,omitempty"` + PackagePath string `protobuf:"bytes,2,opt,name=package_path,json=packagePath,proto3" json:"package_path,omitempty"` +} + +func (x *GetPackageFileHashesFromGitIndexRequest) Reset() { + *x = GetPackageFileHashesFromGitIndexRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_turborepo_ffi_messages_proto_msgTypes[26] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetPackageFileHashesFromGitIndexRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetPackageFileHashesFromGitIndexRequest) ProtoMessage() {} + +func (x *GetPackageFileHashesFromGitIndexRequest) ProtoReflect() protoreflect.Message { + mi := &file_turborepo_ffi_messages_proto_msgTypes[26] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetPackageFileHashesFromGitIndexRequest.ProtoReflect.Descriptor instead. +func (*GetPackageFileHashesFromGitIndexRequest) Descriptor() ([]byte, []int) { + return file_turborepo_ffi_messages_proto_rawDescGZIP(), []int{26} +} + +func (x *GetPackageFileHashesFromGitIndexRequest) GetTurboRoot() string { + if x != nil { + return x.TurboRoot + } + return "" +} + +func (x *GetPackageFileHashesFromGitIndexRequest) GetPackagePath() string { + if x != nil { + return x.PackagePath + } + return "" +} + +type FileHashes struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Hashes map[string]string `protobuf:"bytes,1,rep,name=hashes,proto3" json:"hashes,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` +} + +func (x *FileHashes) Reset() { + *x = FileHashes{} + if protoimpl.UnsafeEnabled { + mi := &file_turborepo_ffi_messages_proto_msgTypes[27] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *FileHashes) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FileHashes) ProtoMessage() {} + +func (x *FileHashes) ProtoReflect() protoreflect.Message { + mi := &file_turborepo_ffi_messages_proto_msgTypes[27] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FileHashes.ProtoReflect.Descriptor instead. +func (*FileHashes) Descriptor() ([]byte, []int) { + return file_turborepo_ffi_messages_proto_rawDescGZIP(), []int{27} +} + +func (x *FileHashes) GetHashes() map[string]string { + if x != nil { + return x.Hashes + } + return nil +} + +type GetPackageFileHashesFromGitIndexResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Types that are assignable to Response: + // *GetPackageFileHashesFromGitIndexResponse_Hashes + // *GetPackageFileHashesFromGitIndexResponse_Error + Response isGetPackageFileHashesFromGitIndexResponse_Response `protobuf_oneof:"response"` +} + +func (x *GetPackageFileHashesFromGitIndexResponse) Reset() { + *x = GetPackageFileHashesFromGitIndexResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_turborepo_ffi_messages_proto_msgTypes[28] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetPackageFileHashesFromGitIndexResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetPackageFileHashesFromGitIndexResponse) ProtoMessage() {} + +func (x *GetPackageFileHashesFromGitIndexResponse) ProtoReflect() protoreflect.Message { + mi := &file_turborepo_ffi_messages_proto_msgTypes[28] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetPackageFileHashesFromGitIndexResponse.ProtoReflect.Descriptor instead. +func (*GetPackageFileHashesFromGitIndexResponse) Descriptor() ([]byte, []int) { + return file_turborepo_ffi_messages_proto_rawDescGZIP(), []int{28} +} + +func (m *GetPackageFileHashesFromGitIndexResponse) GetResponse() isGetPackageFileHashesFromGitIndexResponse_Response { + if m != nil { + return m.Response + } + return nil +} + +func (x *GetPackageFileHashesFromGitIndexResponse) GetHashes() *FileHashes { + if x, ok := x.GetResponse().(*GetPackageFileHashesFromGitIndexResponse_Hashes); ok { + return x.Hashes + } + return nil +} + +func (x *GetPackageFileHashesFromGitIndexResponse) GetError() string { + if x, ok := x.GetResponse().(*GetPackageFileHashesFromGitIndexResponse_Error); ok { + return x.Error + } + return "" +} + +type isGetPackageFileHashesFromGitIndexResponse_Response interface { + isGetPackageFileHashesFromGitIndexResponse_Response() +} + +type GetPackageFileHashesFromGitIndexResponse_Hashes struct { + Hashes *FileHashes `protobuf:"bytes,1,opt,name=hashes,proto3,oneof"` +} + +type GetPackageFileHashesFromGitIndexResponse_Error struct { + Error string `protobuf:"bytes,2,opt,name=error,proto3,oneof"` +} + +func (*GetPackageFileHashesFromGitIndexResponse_Hashes) isGetPackageFileHashesFromGitIndexResponse_Response() { +} + +func (*GetPackageFileHashesFromGitIndexResponse_Error) isGetPackageFileHashesFromGitIndexResponse_Response() { +} + var File_turborepo_ffi_messages_proto protoreflect.FileDescriptor var file_turborepo_ffi_messages_proto_rawDesc = []byte{ @@ -1843,10 +2027,32 @@ var file_turborepo_ffi_messages_proto_rawDesc = []byte{ 0x76, 0x65, 0x43, 0x6f, 0x70, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x19, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x88, 0x01, 0x01, 0x42, 0x08, 0x0a, 0x06, 0x5f, 0x65, 0x72, - 0x72, 0x6f, 0x72, 0x2a, 0x24, 0x0a, 0x0e, 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x4d, 0x61, - 0x6e, 0x61, 0x67, 0x65, 0x72, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x50, 0x4d, 0x10, 0x00, 0x12, 0x09, - 0x0a, 0x05, 0x42, 0x45, 0x52, 0x52, 0x59, 0x10, 0x01, 0x42, 0x0b, 0x5a, 0x09, 0x66, 0x66, 0x69, - 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x72, 0x6f, 0x72, 0x22, 0x6b, 0x0a, 0x27, 0x47, 0x65, 0x74, 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, + 0x65, 0x46, 0x69, 0x6c, 0x65, 0x48, 0x61, 0x73, 0x68, 0x65, 0x73, 0x46, 0x72, 0x6f, 0x6d, 0x47, + 0x69, 0x74, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, + 0x0a, 0x0a, 0x74, 0x75, 0x72, 0x62, 0x6f, 0x5f, 0x72, 0x6f, 0x6f, 0x74, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x09, 0x74, 0x75, 0x72, 0x62, 0x6f, 0x52, 0x6f, 0x6f, 0x74, 0x12, 0x21, 0x0a, + 0x0c, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0b, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x50, 0x61, 0x74, 0x68, + 0x22, 0x78, 0x0a, 0x0a, 0x46, 0x69, 0x6c, 0x65, 0x48, 0x61, 0x73, 0x68, 0x65, 0x73, 0x12, 0x2f, + 0x0a, 0x06, 0x68, 0x61, 0x73, 0x68, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, + 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x48, 0x61, 0x73, 0x68, 0x65, 0x73, 0x2e, 0x48, 0x61, 0x73, 0x68, + 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x68, 0x61, 0x73, 0x68, 0x65, 0x73, 0x1a, + 0x39, 0x0a, 0x0b, 0x48, 0x61, 0x73, 0x68, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, + 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, + 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x75, 0x0a, 0x28, 0x47, 0x65, + 0x74, 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x46, 0x69, 0x6c, 0x65, 0x48, 0x61, 0x73, 0x68, + 0x65, 0x73, 0x46, 0x72, 0x6f, 0x6d, 0x47, 0x69, 0x74, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x52, 0x65, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x25, 0x0a, 0x06, 0x68, 0x61, 0x73, 0x68, 0x65, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0b, 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x48, 0x61, 0x73, + 0x68, 0x65, 0x73, 0x48, 0x00, 0x52, 0x06, 0x68, 0x61, 0x73, 0x68, 0x65, 0x73, 0x12, 0x16, 0x0a, + 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x05, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x42, 0x0a, 0x0a, 0x08, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x2a, 0x24, 0x0a, 0x0e, 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x4d, 0x61, 0x6e, 0x61, + 0x67, 0x65, 0x72, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x50, 0x4d, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, + 0x42, 0x45, 0x52, 0x52, 0x59, 0x10, 0x01, 0x42, 0x0b, 0x5a, 0x09, 0x66, 0x66, 0x69, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -1862,62 +2068,68 @@ func file_turborepo_ffi_messages_proto_rawDescGZIP() []byte { } var file_turborepo_ffi_messages_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_turborepo_ffi_messages_proto_msgTypes = make([]protoimpl.MessageInfo, 29) +var file_turborepo_ffi_messages_proto_msgTypes = make([]protoimpl.MessageInfo, 33) var file_turborepo_ffi_messages_proto_goTypes = []interface{}{ - (PackageManager)(0), // 0: PackageManager - (*TurboDataDirResp)(nil), // 1: TurboDataDirResp - (*GlobReq)(nil), // 2: GlobReq - (*GlobResp)(nil), // 3: GlobResp - (*GlobRespList)(nil), // 4: GlobRespList - (*ChangedFilesReq)(nil), // 5: ChangedFilesReq - (*ChangedFilesResp)(nil), // 6: ChangedFilesResp - (*ChangedFilesList)(nil), // 7: ChangedFilesList - (*PreviousContentReq)(nil), // 8: PreviousContentReq - (*PreviousContentResp)(nil), // 9: PreviousContentResp - (*PackageDependency)(nil), // 10: PackageDependency - (*PackageDependencyList)(nil), // 11: PackageDependencyList - (*WorkspaceDependencies)(nil), // 12: WorkspaceDependencies - (*TransitiveDepsRequest)(nil), // 13: TransitiveDepsRequest - (*TransitiveDepsResponse)(nil), // 14: TransitiveDepsResponse - (*AdditionalBerryData)(nil), // 15: AdditionalBerryData - (*LockfilePackage)(nil), // 16: LockfilePackage - (*LockfilePackageList)(nil), // 17: LockfilePackageList - (*SubgraphRequest)(nil), // 18: SubgraphRequest - (*SubgraphResponse)(nil), // 19: SubgraphResponse - (*PatchesRequest)(nil), // 20: PatchesRequest - (*PatchesResponse)(nil), // 21: PatchesResponse - (*Patches)(nil), // 22: Patches - (*GlobalChangeRequest)(nil), // 23: GlobalChangeRequest - (*GlobalChangeResponse)(nil), // 24: GlobalChangeResponse - (*RecursiveCopyRequest)(nil), // 25: RecursiveCopyRequest - (*RecursiveCopyResponse)(nil), // 26: RecursiveCopyResponse - nil, // 27: WorkspaceDependencies.DependenciesEntry - nil, // 28: TransitiveDepsRequest.WorkspacesEntry - nil, // 29: AdditionalBerryData.ResolutionsEntry + (PackageManager)(0), // 0: PackageManager + (*TurboDataDirResp)(nil), // 1: TurboDataDirResp + (*GlobReq)(nil), // 2: GlobReq + (*GlobResp)(nil), // 3: GlobResp + (*GlobRespList)(nil), // 4: GlobRespList + (*ChangedFilesReq)(nil), // 5: ChangedFilesReq + (*ChangedFilesResp)(nil), // 6: ChangedFilesResp + (*ChangedFilesList)(nil), // 7: ChangedFilesList + (*PreviousContentReq)(nil), // 8: PreviousContentReq + (*PreviousContentResp)(nil), // 9: PreviousContentResp + (*PackageDependency)(nil), // 10: PackageDependency + (*PackageDependencyList)(nil), // 11: PackageDependencyList + (*WorkspaceDependencies)(nil), // 12: WorkspaceDependencies + (*TransitiveDepsRequest)(nil), // 13: TransitiveDepsRequest + (*TransitiveDepsResponse)(nil), // 14: TransitiveDepsResponse + (*AdditionalBerryData)(nil), // 15: AdditionalBerryData + (*LockfilePackage)(nil), // 16: LockfilePackage + (*LockfilePackageList)(nil), // 17: LockfilePackageList + (*SubgraphRequest)(nil), // 18: SubgraphRequest + (*SubgraphResponse)(nil), // 19: SubgraphResponse + (*PatchesRequest)(nil), // 20: PatchesRequest + (*PatchesResponse)(nil), // 21: PatchesResponse + (*Patches)(nil), // 22: Patches + (*GlobalChangeRequest)(nil), // 23: GlobalChangeRequest + (*GlobalChangeResponse)(nil), // 24: GlobalChangeResponse + (*RecursiveCopyRequest)(nil), // 25: RecursiveCopyRequest + (*RecursiveCopyResponse)(nil), // 26: RecursiveCopyResponse + (*GetPackageFileHashesFromGitIndexRequest)(nil), // 27: GetPackageFileHashesFromGitIndexRequest + (*FileHashes)(nil), // 28: FileHashes + (*GetPackageFileHashesFromGitIndexResponse)(nil), // 29: GetPackageFileHashesFromGitIndexResponse + nil, // 30: WorkspaceDependencies.DependenciesEntry + nil, // 31: TransitiveDepsRequest.WorkspacesEntry + nil, // 32: AdditionalBerryData.ResolutionsEntry + nil, // 33: FileHashes.HashesEntry } var file_turborepo_ffi_messages_proto_depIdxs = []int32{ 4, // 0: GlobResp.files:type_name -> GlobRespList 7, // 1: ChangedFilesResp.files:type_name -> ChangedFilesList 10, // 2: PackageDependencyList.list:type_name -> PackageDependency - 27, // 3: WorkspaceDependencies.dependencies:type_name -> WorkspaceDependencies.DependenciesEntry + 30, // 3: WorkspaceDependencies.dependencies:type_name -> WorkspaceDependencies.DependenciesEntry 0, // 4: TransitiveDepsRequest.package_manager:type_name -> PackageManager - 28, // 5: TransitiveDepsRequest.workspaces:type_name -> TransitiveDepsRequest.WorkspacesEntry + 31, // 5: TransitiveDepsRequest.workspaces:type_name -> TransitiveDepsRequest.WorkspacesEntry 15, // 6: TransitiveDepsRequest.resolutions:type_name -> AdditionalBerryData 12, // 7: TransitiveDepsResponse.dependencies:type_name -> WorkspaceDependencies - 29, // 8: AdditionalBerryData.resolutions:type_name -> AdditionalBerryData.ResolutionsEntry + 32, // 8: AdditionalBerryData.resolutions:type_name -> AdditionalBerryData.ResolutionsEntry 16, // 9: LockfilePackageList.list:type_name -> LockfilePackage 0, // 10: SubgraphRequest.package_manager:type_name -> PackageManager 15, // 11: SubgraphRequest.resolutions:type_name -> AdditionalBerryData 0, // 12: PatchesRequest.package_manager:type_name -> PackageManager 22, // 13: PatchesResponse.patches:type_name -> Patches 0, // 14: GlobalChangeRequest.package_manager:type_name -> PackageManager - 17, // 15: WorkspaceDependencies.DependenciesEntry.value:type_name -> LockfilePackageList - 11, // 16: TransitiveDepsRequest.WorkspacesEntry.value:type_name -> PackageDependencyList - 17, // [17:17] is the sub-list for method output_type - 17, // [17:17] is the sub-list for method input_type - 17, // [17:17] is the sub-list for extension type_name - 17, // [17:17] is the sub-list for extension extendee - 0, // [0:17] is the sub-list for field type_name + 33, // 15: FileHashes.hashes:type_name -> FileHashes.HashesEntry + 28, // 16: GetPackageFileHashesFromGitIndexResponse.hashes:type_name -> FileHashes + 17, // 17: WorkspaceDependencies.DependenciesEntry.value:type_name -> LockfilePackageList + 11, // 18: TransitiveDepsRequest.WorkspacesEntry.value:type_name -> PackageDependencyList + 19, // [19:19] is the sub-list for method output_type + 19, // [19:19] is the sub-list for method input_type + 19, // [19:19] is the sub-list for extension type_name + 19, // [19:19] is the sub-list for extension extendee + 0, // [0:19] is the sub-list for field type_name } func init() { file_turborepo_ffi_messages_proto_init() } @@ -2238,6 +2450,42 @@ func file_turborepo_ffi_messages_proto_init() { return nil } } + file_turborepo_ffi_messages_proto_msgTypes[26].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetPackageFileHashesFromGitIndexRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_turborepo_ffi_messages_proto_msgTypes[27].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*FileHashes); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_turborepo_ffi_messages_proto_msgTypes[28].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetPackageFileHashesFromGitIndexResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } } file_turborepo_ffi_messages_proto_msgTypes[2].OneofWrappers = []interface{}{ (*GlobResp_Files)(nil), @@ -2267,13 +2515,17 @@ func file_turborepo_ffi_messages_proto_init() { (*PatchesResponse_Error)(nil), } file_turborepo_ffi_messages_proto_msgTypes[25].OneofWrappers = []interface{}{} + file_turborepo_ffi_messages_proto_msgTypes[28].OneofWrappers = []interface{}{ + (*GetPackageFileHashesFromGitIndexResponse_Hashes)(nil), + (*GetPackageFileHashesFromGitIndexResponse_Error)(nil), + } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_turborepo_ffi_messages_proto_rawDesc, NumEnums: 1, - NumMessages: 29, + NumMessages: 33, NumExtensions: 0, NumServices: 0, }, diff --git a/cli/internal/hashing/package_deps_hash.go b/cli/internal/hashing/package_deps_hash.go index c51c056b9dcf7..dfa972585729d 100644 --- a/cli/internal/hashing/package_deps_hash.go +++ b/cli/internal/hashing/package_deps_hash.go @@ -28,48 +28,6 @@ type PackageDepsOptions struct { InputPatterns []string } -func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) { - var result map[turbopath.AnchoredUnixPath]string - absolutePackagePath := packagePath.RestoreAnchor(rootPath) - - // Get the state of the git index. - gitLsTreeOutput, err := gitLsTree(absolutePackagePath) - if err != nil { - return nil, fmt.Errorf("could not get git hashes for files in package %s: %w", packagePath, err) - } - result = gitLsTreeOutput - - // Update the with the state of the working directory. - // The paths returned from this call are anchored at the package directory - gitStatusOutput, err := gitStatus(absolutePackagePath) - if err != nil { - return nil, fmt.Errorf("Could not get git hashes from git status: %v", err) - } - - // Review status output to identify the delta. - var filesToHash []turbopath.AnchoredSystemPath - for filePath, status := range gitStatusOutput { - if status.isDelete() { - delete(result, filePath) - } else { - filesToHash = append(filesToHash, filePath.ToSystemPath()) - } - } - - // Get the hashes for any modified files in the working directory. - hashes, err := GetHashesForFiles(absolutePackagePath, filesToHash) - if err != nil { - return nil, err - } - - // Zip up file paths and hashes together - for filePath, hash := range hashes { - result[filePath] = hash - } - - return result, nil -} - func safeCompileIgnoreFile(filepath turbopath.AbsoluteSystemPath) (*gitignore.GitIgnore, error) { if filepath.FileExists() { return gitignore.CompileIgnoreFile(filepath.ToString()) @@ -496,61 +454,3 @@ type statusCode struct { func (s statusCode) isDelete() bool { return s.x == "D" || s.y == "D" } - -// gitStatus returns a map of paths to their `git` status code. This can be used to identify what should -// be done with files that do not currently match what is in the index. -// -// Note: `git status -z`'s relative path results are relative to the repository's location. -// We need to calculate where the repository's location is in order to determine what the full path is -// before we can return those paths relative to the calling directory, normalizing to the behavior of -// `ls-files` and `ls-tree`. -func gitStatus(rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]statusCode, error) { - cmd := exec.Command( - "git", // Using `git` from $PATH, - "status", // tell me about the status of the working tree, - "--untracked-files", // including information about untracked files, - "--no-renames", // do not detect renames, - "-z", // with each file path relative to the repository root and \000-terminated, - "--", // and any additional argument you see is a path, promise. - ) - cmd.Args = append(cmd.Args, ".") // Operate in the current directory instead of the root of the working tree. - cmd.Dir = rootPath.ToString() // Include files only from this directory. - - entries, err := runGitCommand(cmd, "status", gitoutput.NewStatusReader) - if err != nil { - return nil, err - } - - output := make(map[turbopath.AnchoredUnixPath]statusCode, len(entries)) - convertedRootPath := turbopath.AbsoluteSystemPathFromUpstream(rootPath.ToString()) - - traversePath, err := memoizedGetTraversePath(convertedRootPath) - if err != nil { - return nil, err - } - - for _, entry := range entries { - statusEntry := gitoutput.StatusEntry(entry) - // Anchored at repository. - pathFromStatus := turbopath.AnchoredUnixPathFromUpstream(statusEntry.GetField(gitoutput.Path)) - var outputPath turbopath.AnchoredUnixPath - - if len(traversePath) > 0 { - repositoryPath := convertedRootPath.Join(traversePath.ToSystemPath()) - fileFullPath := pathFromStatus.ToSystemPath().RestoreAnchor(repositoryPath) - - relativePath, err := fileFullPath.RelativeTo(convertedRootPath) - if err != nil { - return nil, err - } - - outputPath = relativePath.ToUnixPath() - } else { - outputPath = pathFromStatus - } - - output[outputPath] = statusCode{x: statusEntry.GetField(gitoutput.StatusX), y: statusEntry.GetField(gitoutput.StatusY)} - } - - return output, nil -} diff --git a/cli/internal/hashing/package_deps_hash_go.go b/cli/internal/hashing/package_deps_hash_go.go new file mode 100644 index 0000000000000..46e5db6a65774 --- /dev/null +++ b/cli/internal/hashing/package_deps_hash_go.go @@ -0,0 +1,112 @@ +//go:build go || !rust +// +build go !rust + +package hashing + +import ( + "fmt" + "os/exec" + + "github.com/vercel/turbo/cli/internal/encoding/gitoutput" + "github.com/vercel/turbo/cli/internal/turbopath" +) + +func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) { + var result map[turbopath.AnchoredUnixPath]string + absolutePackagePath := packagePath.RestoreAnchor(rootPath) + + // Get the state of the git index. + gitLsTreeOutput, err := gitLsTree(absolutePackagePath) + if err != nil { + return nil, fmt.Errorf("could not get git hashes for files in package %s: %w", packagePath, err) + } + result = gitLsTreeOutput + + // Update the with the state of the working directory. + // The paths returned from this call are anchored at the package directory + gitStatusOutput, err := gitStatus(absolutePackagePath) + if err != nil { + return nil, fmt.Errorf("Could not get git hashes from git status: %v", err) + } + + // Review status output to identify the delta. + var filesToHash []turbopath.AnchoredSystemPath + for filePath, status := range gitStatusOutput { + if status.isDelete() { + delete(result, filePath) + } else { + filesToHash = append(filesToHash, filePath.ToSystemPath()) + } + } + + // Get the hashes for any modified files in the working directory. + hashes, err := GetHashesForFiles(absolutePackagePath, filesToHash) + if err != nil { + return nil, err + } + + // Zip up file paths and hashes together + for filePath, hash := range hashes { + result[filePath] = hash + } + + return result, nil +} + +// gitStatus returns a map of paths to their `git` status code. This can be used to identify what should +// be done with files that do not currently match what is in the index. +// +// Note: `git status -z`'s relative path results are relative to the repository's location. +// We need to calculate where the repository's location is in order to determine what the full path is +// before we can return those paths relative to the calling directory, normalizing to the behavior of +// `ls-files` and `ls-tree`. +func gitStatus(rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]statusCode, error) { + cmd := exec.Command( + "git", // Using `git` from $PATH, + "status", // tell me about the status of the working tree, + "--untracked-files", // including information about untracked files, + "--no-renames", // do not detect renames, + "-z", // with each file path relative to the repository root and \000-terminated, + "--", // and any additional argument you see is a path, promise. + ) + cmd.Args = append(cmd.Args, ".") // Operate in the current directory instead of the root of the working tree. + cmd.Dir = rootPath.ToString() // Include files only from this directory. + + entries, err := runGitCommand(cmd, "status", gitoutput.NewStatusReader) + if err != nil { + return nil, err + } + + output := make(map[turbopath.AnchoredUnixPath]statusCode, len(entries)) + convertedRootPath := turbopath.AbsoluteSystemPathFromUpstream(rootPath.ToString()) + + traversePath, err := memoizedGetTraversePath(convertedRootPath) + if err != nil { + return nil, err + } + + for _, entry := range entries { + statusEntry := gitoutput.StatusEntry(entry) + // Anchored at repository. + pathFromStatus := turbopath.AnchoredUnixPathFromUpstream(statusEntry.GetField(gitoutput.Path)) + var outputPath turbopath.AnchoredUnixPath + + if len(traversePath) > 0 { + repositoryPath := convertedRootPath.Join(traversePath.ToSystemPath()) + fileFullPath := pathFromStatus.ToSystemPath().RestoreAnchor(repositoryPath) + + relativePath, err := fileFullPath.RelativeTo(convertedRootPath) + if err != nil { + return nil, err + } + + outputPath = relativePath.ToUnixPath() + } else { + outputPath = pathFromStatus + } + + output[outputPath] = statusCode{x: statusEntry.GetField(gitoutput.StatusX), y: statusEntry.GetField(gitoutput.StatusY)} + } + + return output, nil +} diff --git a/cli/internal/hashing/package_deps_hash_rust.go b/cli/internal/hashing/package_deps_hash_rust.go new file mode 100644 index 0000000000000..4f5aa1dd13ae9 --- /dev/null +++ b/cli/internal/hashing/package_deps_hash_rust.go @@ -0,0 +1,22 @@ +//go:build rust +// +build rust + +package hashing + +import ( + "github.com/vercel/turbo/cli/internal/ffi" + "github.com/vercel/turbo/cli/internal/turbopath" +) + +func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) { + rawHashes, err := ffi.GetPackageFileHashesFromGitIndex(rootPath.ToString(), packagePath.ToString()) + if err != nil { + return nil, err + } + + hashes := make(map[turbopath.AnchoredUnixPath]string, len(rawHashes)) + for rawPath, hash := range rawHashes { + hashes[turbopath.AnchoredUnixPathFromUpstream(rawPath)] = hash + } + return hashes, nil +} diff --git a/crates/turborepo-ffi/messages.proto b/crates/turborepo-ffi/messages.proto index ce7b189a1f3a2..d4408d21daf5f 100644 --- a/crates/turborepo-ffi/messages.proto +++ b/crates/turborepo-ffi/messages.proto @@ -149,3 +149,19 @@ message RecursiveCopyRequest { message RecursiveCopyResponse { optional string error = 1; } + +message GetPackageFileHashesFromGitIndexRequest { + string turbo_root = 1; + string package_path = 2; +} + +message FileHashes { + map hashes = 1; +} + +message GetPackageFileHashesFromGitIndexResponse { + oneof response { + FileHashes hashes = 1; + string error = 2; + } +} diff --git a/crates/turborepo-ffi/src/lib.rs b/crates/turborepo-ffi/src/lib.rs index 9e37736159b65..52e10ff72fd02 100644 --- a/crates/turborepo-ffi/src/lib.rs +++ b/crates/turborepo-ffi/src/lib.rs @@ -4,10 +4,10 @@ //! and in ffi.go before modifying this file. mod lockfile; -use std::{mem::ManuallyDrop, path::PathBuf}; +use std::{collections::HashMap, mem::ManuallyDrop, path::PathBuf}; pub use lockfile::{patches, subgraph, transitive_closure}; -use turbopath::AbsoluteSystemPathBuf; +use turbopath::{AbsoluteSystemPathBuf, AnchoredSystemPathBuf}; mod proto { include!(concat!(env!("OUT_DIR"), "/_.rs")); @@ -165,24 +165,85 @@ pub extern "C" fn recursive_copy(buffer: Buffer) -> Buffer { } #[no_mangle] -pub extern "C" fn recursive_copy(buffer: Buffer) -> Buffer { - let req: proto::RecursiveCopyRequest = match buffer.into_proto() { +pub extern "C" fn get_package_file_hashes_from_git_index(buffer: Buffer) -> Buffer { + let req: proto::GetPackageFileHashesFromGitIndexRequest = match buffer.into_proto() { Ok(req) => req, Err(err) => { - let resp = proto::RecursiveCopyResponse { - error: Some(err.to_string()), + let resp = proto::GetPackageFileHashesFromGitIndexResponse { + response: Some( + proto::get_package_file_hashes_from_git_index_response::Response::Error( + err.to_string(), + ), + ), }; return resp.into(); } }; - let response = match turborepo_fs::recursive_copy( - &AbsoluteSystemPathBuf::new_unchecked(req.src), - &AbsoluteSystemPathBuf::new_unchecked(req.dst), + + let turbo_root = match AbsoluteSystemPathBuf::new(req.turbo_root) { + Ok(turbo_root) => turbo_root, + Err(err) => { + let resp = proto::GetPackageFileHashesFromGitIndexResponse { + response: Some( + proto::get_package_file_hashes_from_git_index_response::Response::Error( + err.to_string(), + ), + ), + }; + return resp.into(); + } + }; + let package_path = match AnchoredSystemPathBuf::from_raw(req.package_path) { + Ok(package_path) => package_path, + Err(err) => { + let resp = proto::GetPackageFileHashesFromGitIndexResponse { + response: Some( + proto::get_package_file_hashes_from_git_index_response::Response::Error( + err.to_string(), + ), + ), + }; + return resp.into(); + } + }; + let response = match turborepo_scm::package_deps::get_package_file_hashes_from_git_index( + &turbo_root, + &package_path, ) { - Ok(()) => proto::RecursiveCopyResponse { error: None }, - Err(e) => proto::RecursiveCopyResponse { - error: Some(e.to_string()), - }, + Ok(hashes) => { + let mut to_return = HashMap::new(); + for (filename, hash) in hashes { + let filename = match filename.as_str() { + Ok(s) => s.to_owned(), + Err(err) => { + let resp = proto::GetPackageFileHashesFromGitIndexResponse { + response: Some(proto::get_package_file_hashes_from_git_index_response::Response::Error(err.to_string())) + }; + return resp.into(); + } + }; + to_return.insert(filename, hash); + } + let file_hashes = proto::FileHashes { hashes: to_return }; + let resp = proto::GetPackageFileHashesFromGitIndexResponse { + response: Some( + proto::get_package_file_hashes_from_git_index_response::Response::Hashes( + file_hashes, + ), + ), + }; + resp + } + Err(err) => { + let resp = proto::GetPackageFileHashesFromGitIndexResponse { + response: Some( + proto::get_package_file_hashes_from_git_index_response::Response::Error( + err.to_string(), + ), + ), + }; + return resp.into(); + } }; response.into() } diff --git a/crates/turborepo-scm/src/package_deps.rs b/crates/turborepo-scm/src/package_deps.rs index 9bb66721f26b3..9c7a2298d8b83 100644 --- a/crates/turborepo-scm/src/package_deps.rs +++ b/crates/turborepo-scm/src/package_deps.rs @@ -1,50 +1,26 @@ use std::{collections::HashMap, process::Command}; -use anyhow::{anyhow, Result}; +use anyhow::Result; use turbopath::{AbsoluteSystemPathBuf, AnchoredSystemPathBuf, RelativeUnixPathBuf}; use crate::{hash_object::hash_objects, ls_tree::git_ls_tree, status::append_git_status}; pub type GitHashes = HashMap; -pub fn get_package_deps( +pub fn get_package_file_hashes_from_git_index( turbo_root: &AbsoluteSystemPathBuf, package_path: &AnchoredSystemPathBuf, - inputs: &[&str], ) -> Result { // TODO: memoize git root -> turbo root calculation once we aren't crossing ffi let git_root = find_git_root(turbo_root)?; let full_pkg_path = turbo_root.resolve(package_path); let git_to_pkg_path = git_root.anchor(&full_pkg_path)?; let pkg_prefix = git_to_pkg_path.to_unix()?; - let result = if inputs.len() == 0 { - let mut hashes = git_ls_tree(&full_pkg_path)?; - // Note: to_hash is *git repo relative* - let to_hash = append_git_status(&full_pkg_path, &pkg_prefix, inputs, &mut hashes)?; - hash_objects(&full_pkg_path, to_hash, &pkg_prefix, &mut hashes)?; - hashes - } else { - let pkg_prefix_str = pkg_prefix.as_str()?; - let mut inputs = inputs.to_vec(); - inputs.push("package.json"); - inputs.push("turbo.json"); - let mut prefixed_input_patterns = vec![]; - let mut prefixed_exclude_patterns = vec![]; - for input in inputs { - if input.starts_with("!") { - let glob = input - .get(1..) - .ok_or_else(|| anyhow!("invalid glob: {}", input))?; - let pkg_glob = format!("{}/{}", pkg_prefix_str, glob); - prefixed_exclude_patterns.push(pkg_glob); - } else { - let pkg_glob = format!("{}/{}", pkg_prefix_str, input); - prefixed_input_patterns.push(pkg_glob); - } - } - let files_to_hash = unimplemented!(); - }; - Ok(result) + let mut hashes = git_ls_tree(&full_pkg_path)?; + // Note: to_hash is *git repo relative* + let to_hash = append_git_status(&full_pkg_path, &pkg_prefix, &mut hashes)?; + hash_objects(&full_pkg_path, to_hash, &pkg_prefix, &mut hashes)?; + Ok(hashes) } pub(crate) fn find_git_root(turbo_root: &AbsoluteSystemPathBuf) -> Result { diff --git a/crates/turborepo-scm/src/status.rs b/crates/turborepo-scm/src/status.rs index 7d65e8d2aa49e..12b0fb3ff3781 100644 --- a/crates/turborepo-scm/src/status.rs +++ b/crates/turborepo-scm/src/status.rs @@ -13,18 +13,17 @@ use crate::{package_deps::GitHashes, Error}; pub(crate) fn append_git_status( root_path: &AbsoluteSystemPathBuf, pkg_prefix: &RelativeUnixPathBuf, - patterns: &[&str], hashes: &mut GitHashes, ) -> Result> { - let mut args = vec!["status", "--untracked-files", "--no-renames", "-z", "--"]; - if patterns.len() == 0 { - args.push("."); - } else { - let mut patterns = Vec::from(patterns); - args.append(&mut patterns); - } let mut git = Command::new("git") - .args(args.as_slice()) + .args([ + "status", + "--untracked-files", + "--no-renames", + "-z", + "--", + ".", + ]) .current_dir(root_path) .stdout(Stdio::piped()) .stderr(Stdio::piped()) From bc21aa4800307ae7bf2fa9f059beaf62d11e61c7 Mon Sep 17 00:00:00 2001 From: Greg Soltis Date: Mon, 8 May 2023 13:30:41 -0700 Subject: [PATCH 03/10] Feedback --- .../turbopath/src/absolute_system_path_buf.rs | 6 +---- crates/turbopath/src/lib.rs | 18 +++++++++---- crates/turbopath/src/relative_unix_path.rs | 12 ++++----- .../turbopath/src/relative_unix_path_buf.rs | 26 ++++++++++++------- crates/turborepo-scm/src/package_deps.rs | 2 +- 5 files changed, 37 insertions(+), 27 deletions(-) diff --git a/crates/turbopath/src/absolute_system_path_buf.rs b/crates/turbopath/src/absolute_system_path_buf.rs index d119ba9e6dd74..f0d6813f8b1af 100644 --- a/crates/turbopath/src/absolute_system_path_buf.rs +++ b/crates/turbopath/src/absolute_system_path_buf.rs @@ -62,10 +62,6 @@ impl AbsoluteSystemPathBuf { Ok(AbsoluteSystemPathBuf(system_path)) } - pub fn new_unchecked(raw: impl Into) -> Self { - Self(raw.into()) - } - /// Anchors `path` at `self`. /// /// # Arguments @@ -183,7 +179,7 @@ impl AbsoluteSystemPathBuf { } } - pub fn create_dir(&self) -> Result<(), io::Error> { + pub fn create_dir_all(&self) -> Result<(), io::Error> { fs::create_dir_all(self.0.as_path()) } diff --git a/crates/turbopath/src/lib.rs b/crates/turbopath/src/lib.rs index 17dd95e702b2c..bd7273d524146 100644 --- a/crates/turbopath/src/lib.rs +++ b/crates/turbopath/src/lib.rs @@ -26,8 +26,14 @@ pub enum PathError { IO(#[from] io::Error), #[error("Path prefix error: {0}")] PrefixError(#[from] StripPrefixError), - #[error("Invalid UTF8: {0}")] - Utf8Error(#[from] bstr::Utf8Error), + #[error("Invalid UTF8: {0:?}")] + Utf8Error(Vec), +} + +impl From for PathError { + fn from(value: std::string::FromUtf8Error) -> Self { + PathError::Utf8Error(value.into_bytes()) + } } impl PathError { @@ -56,9 +62,11 @@ pub enum PathValidationError { PrefixError(String, String), } -pub(crate) fn not_relative_error(bytes: &[u8]) -> PathValidationError { - let s = String::from_utf8_lossy(bytes).to_string(); - PathValidationError::NotRelative(s) +impl PathValidationError { + pub(crate) fn not_relative_error(bytes: &[u8]) -> PathValidationError { + let s = String::from_utf8_lossy(bytes).to_string(); + PathValidationError::NotRelative(s) + } } trait IntoSystem { diff --git a/crates/turbopath/src/relative_unix_path.rs b/crates/turbopath/src/relative_unix_path.rs index c53d49963dd6c..019609acc42be 100644 --- a/crates/turbopath/src/relative_unix_path.rs +++ b/crates/turbopath/src/relative_unix_path.rs @@ -2,7 +2,7 @@ use std::path::PathBuf; use bstr::BStr; -use crate::{not_relative_error, PathError, RelativeSystemPathBuf}; +use crate::{PathError, PathValidationError, RelativeSystemPathBuf}; #[repr(transparent)] pub struct RelativeUnixPath { @@ -12,11 +12,11 @@ pub struct RelativeUnixPath { impl RelativeUnixPath { pub fn new>(value: &P) -> Result<&Self, PathError> { let path = value.as_ref(); - if path[0] == b'/' { - return Err(not_relative_error(path).into()); + if path.first() == Some(&b'/') { + return Err(PathValidationError::not_relative_error(path).into()); } // copied from stdlib path.rs: relies on the representation of - // RelativeUnixPath being just a Path, the same way Path relies on + // RelativeUnixPath being just a BStr, the same way Path relies on // just being an OsStr Ok(unsafe { &*(path as *const BStr as *const Self) }) } @@ -38,9 +38,7 @@ impl RelativeUnixPath { .iter() .map(|byte| if *byte == b'/' { b'\\' } else { *byte }) .collect::>(); - // Is this safe to do? We think we have utf8 bytes or bytes that roundtrip - // through utf8 - let system_path_string = unsafe { String::from_utf8_unchecked(system_path_bytes) }; + let system_path_string = String::from_utf8(system_path_bytes)?; let system_path_buf = PathBuf::from(system_path_string); Ok(RelativeSystemPathBuf::new_unchecked(system_path_buf)) } diff --git a/crates/turbopath/src/relative_unix_path_buf.rs b/crates/turbopath/src/relative_unix_path_buf.rs index b2ed58d813d98..1a8fee8e6ed93 100644 --- a/crates/turbopath/src/relative_unix_path_buf.rs +++ b/crates/turbopath/src/relative_unix_path_buf.rs @@ -2,7 +2,7 @@ use std::{fmt::Debug, io::Write}; use bstr::{BString, ByteSlice}; -use crate::{not_relative_error, PathError, PathValidationError}; +use crate::{PathError, PathValidationError}; #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] pub struct RelativeUnixPathBuf(BString); @@ -10,14 +10,17 @@ pub struct RelativeUnixPathBuf(BString); impl RelativeUnixPathBuf { pub fn new(path: impl Into>) -> Result { let bytes: Vec = path.into(); - if !bytes.is_empty() && bytes[0] == b'/' { - return Err(not_relative_error(&bytes).into()); + if bytes.first() == Some(&b'/') { + return Err(PathValidationError::not_relative_error(&bytes).into()); } Ok(Self(BString::new(bytes))) } pub fn as_str(&self) -> Result<&str, PathError> { - let s = self.0.to_str()?; + let s = self + .0 + .to_str() + .or_else(|_| Err(PathError::Utf8Error(self.0.as_bytes().to_owned())))?; Ok(s) } @@ -26,22 +29,27 @@ impl RelativeUnixPathBuf { // characters escaped with '\'. pub fn write_escapted_bytes(&self, writer: &mut W) -> Result<(), PathError> { writer.write_all(&[b'\"'])?; + // i is our pointer into self.0, and to_escape_index is a pointer to the next + // byte to be escaped. Each time we find a byte to be escaped, we write + // out everything from i to to_escape_index, then the escape byte, '\\', + // then the byte-to-be-escaped. Finally we set i to 1 + to_escape_index + // to move our pointer past the byte we just escaped. let mut i: usize = 0; while i < self.0.len() { - if let Some(mut index) = self.0[i..] + if let Some(mut to_escape_index) = self.0[i..] .iter() .position(|byte| *byte == b'\"' || *byte == b'\n') { // renormalize the index into the byte vector - index += i; - writer.write_all(&self.0[i..index])?; - let byte = self.0[index]; + to_escape_index += i; + writer.write_all(&self.0[i..to_escape_index])?; + let byte = self.0[to_escape_index]; if byte == b'\"' { writer.write_all(&[b'\\', b'\"'])?; } else { writer.write_all(&[b'\\', b'\n'])?; } - i = index + 1; + i = to_escape_index + 1; } else { writer.write_all(&self.0)?; i = self.0.len(); diff --git a/crates/turborepo-scm/src/package_deps.rs b/crates/turborepo-scm/src/package_deps.rs index 9c7a2298d8b83..ac2dea0a74d4b 100644 --- a/crates/turborepo-scm/src/package_deps.rs +++ b/crates/turborepo-scm/src/package_deps.rs @@ -81,7 +81,7 @@ mod tests { // nested-file let (_repo_root_tmp, repo_root) = tmp_dir()?; let my_pkg_dir = repo_root.join_literal("my-pkg"); - my_pkg_dir.create_dir()?; + my_pkg_dir.create_dir_all()?; // create file 1 let committed_file_path = my_pkg_dir.join_literal("committed-file"); From a457d1d1bd1c7c7fe3c8a494cc08c6c0e71bb33b Mon Sep 17 00:00:00 2001 From: Greg Soltis Date: Tue, 9 May 2023 09:15:31 -0700 Subject: [PATCH 04/10] Update cli/internal/ffi/ffi.go Co-authored-by: Chris Olszewski --- cli/internal/ffi/ffi.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/internal/ffi/ffi.go b/cli/internal/ffi/ffi.go index a997ab3fbe88c..0a05db77a7251 100644 --- a/cli/internal/ffi/ffi.go +++ b/cli/internal/ffi/ffi.go @@ -334,5 +334,5 @@ func GetPackageFileHashesFromGitIndex(rootPath string, packagePath string) (map[ return nil, errors.New(err) } hashes := resp.GetHashes() - return hashes.Hashes, nil + return hashes.GetHashes(), nil } From d6c7ee9f35680b9314c86fec4a9bc6b559fb5b61 Mon Sep 17 00:00:00 2001 From: Greg Soltis Date: Tue, 9 May 2023 13:00:06 -0700 Subject: [PATCH 05/10] More response to feedback --- Cargo.lock | 1 - .../turbopath/src/relative_unix_path_buf.rs | 32 +++++++++++---- crates/turborepo-scm/Cargo.toml | 1 - crates/turborepo-scm/src/hash_object.rs | 41 +++++++++---------- crates/turborepo-scm/src/lib.rs | 13 +++++- crates/turborepo-scm/src/ls_tree.rs | 31 ++++++++++---- crates/turborepo-scm/src/package_deps.rs | 24 ++++++----- crates/turborepo-scm/src/status.rs | 19 +++++---- 8 files changed, 102 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 34c4d5b279a9d..252853e4e1e9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9168,7 +9168,6 @@ dependencies = [ name = "turborepo-scm" version = "0.1.0" dependencies = [ - "anyhow", "dunce", "git2 0.16.1", "nom", diff --git a/crates/turbopath/src/relative_unix_path_buf.rs b/crates/turbopath/src/relative_unix_path_buf.rs index 1a8fee8e6ed93..87d679dd7a3cd 100644 --- a/crates/turbopath/src/relative_unix_path_buf.rs +++ b/crates/turbopath/src/relative_unix_path_buf.rs @@ -27,7 +27,7 @@ impl RelativeUnixPathBuf { // write_escaped_bytes writes this path to the given writer in the form // "", where escaped_path is the path with '"' and '\n' // characters escaped with '\'. - pub fn write_escapted_bytes(&self, writer: &mut W) -> Result<(), PathError> { + pub fn write_escaped_bytes(&self, writer: &mut W) -> Result<(), PathError> { writer.write_all(&[b'\"'])?; // i is our pointer into self.0, and to_escape_index is a pointer to the next // byte to be escaped. Each time we find a byte to be escaped, we write @@ -44,11 +44,7 @@ impl RelativeUnixPathBuf { to_escape_index += i; writer.write_all(&self.0[i..to_escape_index])?; let byte = self.0[to_escape_index]; - if byte == b'\"' { - writer.write_all(&[b'\\', b'\"'])?; - } else { - writer.write_all(&[b'\\', b'\n'])?; - } + writer.write_all(&[b'\\', byte])?; i = to_escape_index + 1; } else { writer.write_all(&self.0)?; @@ -69,13 +65,22 @@ impl RelativeUnixPathBuf { PathValidationError::NotParent(prefix.0.to_string(), self.0.to_string()), )); } + + // Handle the case where we are stripping the entire contents of this path + if self.0.len() == prefix.0.len() { + return Self::new(""); + } + + // We now know that this path starts with the prefix, and that this path's + // length is greater than the prefix's length if self.0[prefix_len] != b'/' { - let prefix_str = prefix.as_str().unwrap_or("invalid utf8").to_string(); - let this = self.as_str().unwrap_or("invalid utf8").to_string(); + let prefix_str = prefix.0.to_str_lossy().into_owned(); + let this = self.0.to_str_lossy().into_owned(); return Err(PathError::PathValidationError( PathValidationError::PrefixError(prefix_str, this), )); } + let tail_slice = &self.0[(prefix_len + 1)..]; Self::new(tail_slice) } @@ -138,6 +143,15 @@ mod tests { assert_eq!(tail, expected); } + #[test] + fn test_strip_entire_contents() { + let combined = RelativeUnixPathBuf::new("some/path").unwrap(); + let head = combined.clone(); + let expected = RelativeUnixPathBuf::new("").unwrap(); + let tail = combined.strip_prefix(&head).unwrap(); + assert_eq!(tail, expected); + } + #[test] fn test_strip_empty_prefix() { let combined = RelativeUnixPathBuf::new("some/path").unwrap(); @@ -155,7 +169,7 @@ mod tests { { let mut writer = BufWriter::new(&mut buffer); let path = RelativeUnixPathBuf::new(input).unwrap(); - path.write_escapted_bytes(&mut writer).unwrap(); + path.write_escaped_bytes(&mut writer).unwrap(); } assert_eq!(buffer.as_slice(), expected); } diff --git a/crates/turborepo-scm/Cargo.toml b/crates/turborepo-scm/Cargo.toml index d57355a4f37d9..466e6e11f5025 100644 --- a/crates/turborepo-scm/Cargo.toml +++ b/crates/turborepo-scm/Cargo.toml @@ -7,7 +7,6 @@ license = "MPL-2.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -anyhow = { workspace = true } dunce = { workspace = true } git2 = { version = "0.16.1", default-features = false } nom = "7.1.3" diff --git a/crates/turborepo-scm/src/hash_object.rs b/crates/turborepo-scm/src/hash_object.rs index 34c77708b540e..0272137c0a2a9 100644 --- a/crates/turborepo-scm/src/hash_object.rs +++ b/crates/turborepo-scm/src/hash_object.rs @@ -1,12 +1,10 @@ use std::{ - backtrace::Backtrace, io::{BufWriter, Read, Write}, panic, process::{Command, Stdio}, thread, }; -use anyhow::{anyhow, Result}; use nom::{Finish, IResult}; use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; @@ -17,7 +15,7 @@ pub(crate) fn hash_objects( to_hash: Vec, pkg_prefix: &RelativeUnixPathBuf, hashes: &mut GitHashes, -) -> Result<()> { +) -> Result<(), Error> { if to_hash.is_empty() { return Ok(()); } @@ -32,24 +30,24 @@ pub(crate) fn hash_objects( let stdout = git .stdout .as_mut() - .ok_or_else(|| anyhow!("failed to get stdout for git hash-object"))?; + .ok_or_else(|| Error::git_error("failed to get stdout for git hash-object"))?; // We take, rather than borrow, stdin so that we can drop it and force the // underlying file descriptor to close, signalling the end of input. let stdin: std::process::ChildStdin = git .stdin .take() - .ok_or_else(|| anyhow!("failed to get stdin for git hash-object"))?; + .ok_or_else(|| Error::git_error("failed to get stdin for git hash-object"))?; let mut stderr = git .stderr .take() - .ok_or_else(|| anyhow!("failed to get stderr for git hash-object"))?; + .ok_or_else(|| Error::git_error("failed to get stderr for git hash-object"))?; let result = read_object_hashes(stdout, stdin, &to_hash, pkg_prefix, hashes); if result.is_err() { let mut buf = String::new(); let bytes_read = stderr.read_to_string(&mut buf)?; if bytes_read > 0 { // something failed with git, report that error - return Err(Error::Git(buf, Backtrace::capture()).into()); + return Err(Error::git_error(buf)); } } result?; @@ -58,27 +56,30 @@ pub(crate) fn hash_objects( Ok(()) } +const HASH_LEN: usize = 40; + fn read_object_hashes( mut reader: R, writer: W, to_hash: &Vec, pkg_prefix: &RelativeUnixPathBuf, hashes: &mut GitHashes, -) -> Result<()> { - thread::scope(move |scope| -> Result<()> { - let write_thread = scope.spawn(move || -> Result<()> { +) -> Result<(), Error> { + thread::scope(move |scope| -> Result<(), Error> { + let write_thread = scope.spawn(move || -> Result<(), Error> { let mut writer = BufWriter::new(writer); for path in to_hash { - path.write_escapted_bytes(&mut writer)?; + path.write_escaped_bytes(&mut writer)?; writer.write_all(&[b'\n'])?; writer.flush()?; } // writer is dropped here, closing stdin Ok(()) }); - let mut i: usize = 0; - let mut buffer: [u8; 41] = [0; 41]; - loop { + //let mut i: usize = 0; + // Buffer size is HASH_LEN + 1 to account for the trailing \n + let mut buffer: [u8; HASH_LEN + 1] = [0; HASH_LEN + 1]; + for (i, filename) in to_hash.iter().enumerate() { if i == to_hash.len() { break; } @@ -86,11 +87,9 @@ fn read_object_hashes( { let hash = parse_hash_object(&buffer)?; let hash = String::from_utf8(hash.to_vec())?; - let filename = &(to_hash[i]); let path = filename.strip_prefix(pkg_prefix)?; hashes.insert(path, hash); } - i += 1; } match write_thread.join() { // the error case is if the thread panic'd. In that case, we propagate @@ -102,18 +101,18 @@ fn read_object_hashes( Ok(()) } -fn parse_hash_object(i: &[u8]) -> Result<&[u8]> { +fn parse_hash_object(i: &[u8]) -> Result<&[u8], Error> { match nom_parse_hash_object(i).finish() { Ok((_, hash)) => Ok(hash), - Err(e) => Err(anyhow!( + Err(e) => Err(Error::git_error(format!( "failed to parse git-hash-object {}", - std::str::from_utf8(e.input)? - )), + String::from_utf8_lossy(e.input) + ))), } } fn nom_parse_hash_object(i: &[u8]) -> IResult<&[u8], &[u8]> { - let (i, hash) = nom::bytes::complete::take(40usize)(i)?; + let (i, hash) = nom::bytes::complete::take(HASH_LEN)(i)?; let (i, _) = nom::bytes::complete::tag(&[b'\n'])(i)?; Ok((i, hash)) } diff --git a/crates/turborepo-scm/src/lib.rs b/crates/turborepo-scm/src/lib.rs index bb96c50bf839b..d0b11553269df 100644 --- a/crates/turborepo-scm/src/lib.rs +++ b/crates/turborepo-scm/src/lib.rs @@ -2,7 +2,7 @@ #![feature(provide_any)] #![feature(assert_matches)] -use std::backtrace; +use std::backtrace::{self, Backtrace}; use thiserror::Error; use turbopath::PathError; @@ -23,4 +23,15 @@ pub enum Error { Io(#[from] std::io::Error, #[backtrace] backtrace::Backtrace), #[error("path error: {0}")] Path(#[from] PathError, #[backtrace] backtrace::Backtrace), + #[error("encoding error: {0}")] + Encoding( + #[from] std::string::FromUtf8Error, + #[backtrace] backtrace::Backtrace, + ), +} + +impl Error { + pub(crate) fn git_error(s: impl Into) -> Self { + Error::Git(s.into(), Backtrace::capture()) + } } diff --git a/crates/turborepo-scm/src/ls_tree.rs b/crates/turborepo-scm/src/ls_tree.rs index 282156e9fa593..51fb9626e3e3a 100644 --- a/crates/turborepo-scm/src/ls_tree.rs +++ b/crates/turborepo-scm/src/ls_tree.rs @@ -3,13 +3,12 @@ use std::{ process::{Command, Stdio}, }; -use anyhow::{anyhow, Result}; use nom::Finish; use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; -use crate::package_deps::GitHashes; +use crate::{package_deps::GitHashes, Error}; -pub fn git_ls_tree(root_path: &AbsoluteSystemPathBuf) -> Result { +pub fn git_ls_tree(root_path: &AbsoluteSystemPathBuf) -> Result { let mut hashes = GitHashes::new(); let mut git = Command::new("git") .args(["ls-tree", "-r", "-z", "HEAD"]) @@ -21,14 +20,27 @@ pub fn git_ls_tree(root_path: &AbsoluteSystemPathBuf) -> Result { let stdout = git .stdout .as_mut() - .ok_or_else(|| anyhow!("failed to get stdout for git ls-tree"))?; - read_ls_tree(stdout, &mut hashes)?; + .ok_or_else(|| Error::git_error("failed to get stdout for git ls-tree"))?; + let mut stderr = git + .stderr + .take() + .ok_or_else(|| Error::git_error("failed to get stderr for git ls-tree"))?; + let result = read_ls_tree(stdout, &mut hashes); + if result.is_err() { + let mut buf = String::new(); + let bytes_read = stderr.read_to_string(&mut buf)?; + if bytes_read > 0 { + // something failed with git, report that error + return Err(Error::git_error(buf)); + } + } + result?; } git.wait()?; Ok(hashes) } -fn read_ls_tree(reader: R, hashes: &mut GitHashes) -> Result<()> { +fn read_ls_tree(reader: R, hashes: &mut GitHashes) -> Result<(), Error> { let mut reader = BufReader::new(reader); let mut buffer = Vec::new(); loop { @@ -54,10 +66,13 @@ struct LsTreeEntry<'a> { hash: &'a [u8], } -fn parse_ls_tree(i: &[u8]) -> Result> { +fn parse_ls_tree(i: &[u8]) -> Result, Error> { match nom_parse_ls_tree(i).finish() { Ok((_, entry)) => Ok(entry), - Err(e) => Err(anyhow!("nom: {:?}: {}", e, std::str::from_utf8(e.input)?)), + Err(e) => Err(Error::git_error(format!( + "failed to parse git-ls-tree: {}", + String::from_utf8_lossy(e.input) + ))), } } diff --git a/crates/turborepo-scm/src/package_deps.rs b/crates/turborepo-scm/src/package_deps.rs index ac2dea0a74d4b..27002cfef8eee 100644 --- a/crates/turborepo-scm/src/package_deps.rs +++ b/crates/turborepo-scm/src/package_deps.rs @@ -1,16 +1,15 @@ use std::{collections::HashMap, process::Command}; -use anyhow::Result; use turbopath::{AbsoluteSystemPathBuf, AnchoredSystemPathBuf, RelativeUnixPathBuf}; -use crate::{hash_object::hash_objects, ls_tree::git_ls_tree, status::append_git_status}; +use crate::{hash_object::hash_objects, ls_tree::git_ls_tree, status::append_git_status, Error}; pub type GitHashes = HashMap; pub fn get_package_file_hashes_from_git_index( turbo_root: &AbsoluteSystemPathBuf, package_path: &AnchoredSystemPathBuf, -) -> Result { +) -> Result { // TODO: memoize git root -> turbo root calculation once we aren't crossing ffi let git_root = find_git_root(turbo_root)?; let full_pkg_path = turbo_root.resolve(package_path); @@ -23,7 +22,9 @@ pub fn get_package_file_hashes_from_git_index( Ok(hashes) } -pub(crate) fn find_git_root(turbo_root: &AbsoluteSystemPathBuf) -> Result { +pub(crate) fn find_git_root( + turbo_root: &AbsoluteSystemPathBuf, +) -> Result { let rev_parse = Command::new("git") .args(["rev-parse", "--show-cdup"]) .current_dir(turbo_root) @@ -38,10 +39,13 @@ mod tests { use super::*; - fn tmp_dir() -> Result<(tempfile::TempDir, AbsoluteSystemPathBuf)> { - let tmp_dir = tempfile::tempdir()?; - let dir = AbsoluteSystemPathBuf::new(tmp_dir.path().to_path_buf())?.to_realpath()?; - Ok((tmp_dir, dir)) + fn tmp_dir() -> (tempfile::TempDir, AbsoluteSystemPathBuf) { + let tmp_dir = tempfile::tempdir().unwrap(); + let dir = AbsoluteSystemPathBuf::new(tmp_dir.path().to_path_buf()) + .unwrap() + .to_realpath() + .unwrap(); + (tmp_dir, dir) } fn require_git_cmd(repo_root: &AbsoluteSystemPathBuf, args: &[&str]) { @@ -69,7 +73,7 @@ mod tests { } #[test] - fn test_get_package_deps() -> Result<()> { + fn test_get_package_deps() -> Result<(), Error> { // Directory structure: // / // new-root-file <- new file not added to git @@ -79,7 +83,7 @@ mod tests { // uncommitted-file <- new file not added to git // dir/ // nested-file - let (_repo_root_tmp, repo_root) = tmp_dir()?; + let (_repo_root_tmp, repo_root) = tmp_dir(); let my_pkg_dir = repo_root.join_literal("my-pkg"); my_pkg_dir.create_dir_all()?; diff --git a/crates/turborepo-scm/src/status.rs b/crates/turborepo-scm/src/status.rs index 12b0fb3ff3781..f7110c90e0cea 100644 --- a/crates/turborepo-scm/src/status.rs +++ b/crates/turborepo-scm/src/status.rs @@ -1,10 +1,8 @@ use std::{ - backtrace::Backtrace, io::{BufRead, BufReader, Read}, process::{Command, Stdio}, }; -use anyhow::{anyhow, Result}; use nom::Finish; use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; @@ -14,7 +12,7 @@ pub(crate) fn append_git_status( root_path: &AbsoluteSystemPathBuf, pkg_prefix: &RelativeUnixPathBuf, hashes: &mut GitHashes, -) -> Result> { +) -> Result, Error> { let mut git = Command::new("git") .args([ "status", @@ -32,18 +30,18 @@ pub(crate) fn append_git_status( let stdout = git .stdout .as_mut() - .ok_or_else(|| anyhow!("failed to get stdout for git status"))?; + .ok_or_else(|| Error::git_error("failed to get stdout for git status"))?; let mut stderr = git .stderr .take() - .ok_or_else(|| anyhow!("failed to get stderr for git status"))?; + .ok_or_else(|| Error::git_error("failed to get stderr for git status"))?; let result = read_status(stdout, pkg_prefix, hashes); if result.is_err() { let mut buf = String::new(); let bytes_read = stderr.read_to_string(&mut buf)?; if bytes_read > 0 { // something failed with git, report that error - return Err(Error::Git(buf, Backtrace::capture()).into()); + return Err(Error::git_error(buf)); } } result? @@ -56,7 +54,7 @@ fn read_status( reader: R, pkg_prefix: &RelativeUnixPathBuf, hashes: &mut GitHashes, -) -> Result> { +) -> Result, Error> { let mut to_hash = Vec::new(); let mut reader = BufReader::new(reader); let mut buffer = Vec::new(); @@ -87,10 +85,13 @@ struct StatusEntry<'a> { is_delete: bool, } -fn parse_status(i: &[u8]) -> Result> { +fn parse_status(i: &[u8]) -> Result, Error> { match nom_parse_status(i).finish() { Ok((_, tup)) => Ok(tup), - Err(e) => Err(anyhow!("nom: {:?} {}", e, std::str::from_utf8(e.input)?)), + Err(e) => Err(Error::git_error(format!( + "failed to parse git-status: {}", + String::from_utf8_lossy(e.input) + ))), } } From 3f84fa9c1c357b0cdc67c993e81219f4ac5edcfd Mon Sep 17 00:00:00 2001 From: Greg Soltis Date: Tue, 9 May 2023 14:14:15 -0700 Subject: [PATCH 06/10] More idiomatic implementation of write_escaped_bytes --- .../turbopath/src/relative_unix_path_buf.rs | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/crates/turbopath/src/relative_unix_path_buf.rs b/crates/turbopath/src/relative_unix_path_buf.rs index 87d679dd7a3cd..a3d8124595e7d 100644 --- a/crates/turbopath/src/relative_unix_path_buf.rs +++ b/crates/turbopath/src/relative_unix_path_buf.rs @@ -35,21 +35,18 @@ impl RelativeUnixPathBuf { // then the byte-to-be-escaped. Finally we set i to 1 + to_escape_index // to move our pointer past the byte we just escaped. let mut i: usize = 0; - while i < self.0.len() { - if let Some(mut to_escape_index) = self.0[i..] - .iter() - .position(|byte| *byte == b'\"' || *byte == b'\n') - { - // renormalize the index into the byte vector - to_escape_index += i; - writer.write_all(&self.0[i..to_escape_index])?; - let byte = self.0[to_escape_index]; - writer.write_all(&[b'\\', byte])?; - i = to_escape_index + 1; - } else { - writer.write_all(&self.0)?; - i = self.0.len(); - } + for (to_escaped_index, byte) in self + .0 + .iter() + .enumerate() + .filter(|(_, byte)| **byte == b'\"' || **byte == b'\n') + { + writer.write_all(&self.0[i..to_escaped_index])?; + writer.write_all(&[b'\\', *byte])?; + i = to_escaped_index + 1; + } + if i < self.0.len() { + writer.write_all(&self.0[i..])?; } writer.write_all(&[b'\"'])?; Ok(()) From 6c651b3a881f853e9115a4aeb9737490fa470c3b Mon Sep 17 00:00:00 2001 From: Greg Soltis Date: Wed, 10 May 2023 11:28:43 -0700 Subject: [PATCH 07/10] Update crates/turborepo-scm/src/hash_object.rs Co-authored-by: Nicholas Yang --- crates/turborepo-scm/src/hash_object.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/turborepo-scm/src/hash_object.rs b/crates/turborepo-scm/src/hash_object.rs index 0272137c0a2a9..c49def43c2a9c 100644 --- a/crates/turborepo-scm/src/hash_object.rs +++ b/crates/turborepo-scm/src/hash_object.rs @@ -42,15 +42,15 @@ pub(crate) fn hash_objects( .take() .ok_or_else(|| Error::git_error("failed to get stderr for git hash-object"))?; let result = read_object_hashes(stdout, stdin, &to_hash, pkg_prefix, hashes); - if result.is_err() { + if let Err(err) = result { let mut buf = String::new(); let bytes_read = stderr.read_to_string(&mut buf)?; if bytes_read > 0 { // something failed with git, report that error return Err(Error::git_error(buf)); } + return Err(err) } - result?; } git.wait()?; Ok(()) From d6fb6fd7d24376ebd8f2fa9699405eac76b8f61f Mon Sep 17 00:00:00 2001 From: Greg Soltis Date: Wed, 10 May 2023 11:31:26 -0700 Subject: [PATCH 08/10] Ignore file_description_set.bin --- crates/turborepo-lib/.gitignore | 1 + .../src/daemon/file_descriptor_set.bin | Bin 2490 -> 0 bytes 2 files changed, 1 insertion(+) create mode 100644 crates/turborepo-lib/.gitignore delete mode 100644 crates/turborepo-lib/src/daemon/file_descriptor_set.bin diff --git a/crates/turborepo-lib/.gitignore b/crates/turborepo-lib/.gitignore new file mode 100644 index 0000000000000..1a7387072f811 --- /dev/null +++ b/crates/turborepo-lib/.gitignore @@ -0,0 +1 @@ +file_descriptor_set.bin diff --git a/crates/turborepo-lib/src/daemon/file_descriptor_set.bin b/crates/turborepo-lib/src/daemon/file_descriptor_set.bin deleted file mode 100644 index 64e2f19fbe7d92114a4c5e612a5f167172518ab8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2490 zcmbVNU2hsk6kTSQW%jZ*Fu@q(FIX(c;2787wo;XcDoyLgk{i`&6{!z(F>C+{SjW3G z?O*6q|4n~D|5(q=&ba=n6!mS-z4x5^$v}TSAwOAMUq-{u)pe9aVmU<$45DdpLjJem zbQ+z9?-ya5h$`iN4X@)#G_PjrnLY2^K(R>_hq3frOommaoSwG48U_Vg(n9f7G>^le zM1?owMKX*&%(ZwRsQ9l*hk^Gd=_iZW1fA2eF{B~lOY(>Pa2Cx|yDBW--RRtHrStMu z%=sNg@JEK$zl)N|yWd|glB-1$zrCI$NoWca4e}%9xEM{N%NQAKw4P6GC(ygvtU4j_V70eTw6=MF@ptY{oDnxiCbsW~8jqf5 zt^VHudO9cMre`M}Et z$Fhzy#3|zo+p>iG;=*RuDS5ng*B0KGQA+n&xJ$`iFip7*FOS(li^AdADW|B=Z}e z8zfa;{H!Z+gzdAp+U0=eyQ>C)=C2(Y1e*W+M>$3ZV=P!gjnR>_D)6$DbJQ4%Y(-Ke z$O^m2xIqXkdPRdESA1e8$Rk=$?29^1W zGNbB>Ra0g;*$PjiL9ANq$}Fc-5~+s;TA>w99u?zg#$b)_&SAhkcLNRZcGZZRvgikh7(80wHHh z=L7<+rTYa!&X(?%%W~SfUjWeBx?dpa)BOU0)<(Z_rJisOti!Y>4^aoap=$DEO&zwc zx&uH>9bFR;#5=ksAkaEu*F-|R(`qYm63&r)jpQ*BqK@=0f~0ttRTKpPa$Q|35CXei px?mv4b;~QNCXy}on0j*@0ML3jG}&U$D{D#FVsEv2>8i)E_YVRh-q8R6 From 70481af9718dfa5664e3685d4589c5b5b38ffada Mon Sep 17 00:00:00 2001 From: Greg Soltis Date: Wed, 10 May 2023 12:49:47 -0700 Subject: [PATCH 09/10] Semicolon --- crates/turborepo-scm/src/hash_object.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/turborepo-scm/src/hash_object.rs b/crates/turborepo-scm/src/hash_object.rs index c49def43c2a9c..3a4e9b89a7150 100644 --- a/crates/turborepo-scm/src/hash_object.rs +++ b/crates/turborepo-scm/src/hash_object.rs @@ -49,7 +49,7 @@ pub(crate) fn hash_objects( // something failed with git, report that error return Err(Error::git_error(buf)); } - return Err(err) + return Err(err); } } git.wait()?; From ae70481068950c0bd14bd2292e02deebe2362903 Mon Sep 17 00:00:00 2001 From: Greg Soltis Date: Wed, 10 May 2023 15:06:12 -0700 Subject: [PATCH 10/10] Update crates/turborepo-scm/src/hash_object.rs Co-authored-by: Chris Olszewski --- crates/turborepo-scm/src/hash_object.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/turborepo-scm/src/hash_object.rs b/crates/turborepo-scm/src/hash_object.rs index 3a4e9b89a7150..f0797a731c38d 100644 --- a/crates/turborepo-scm/src/hash_object.rs +++ b/crates/turborepo-scm/src/hash_object.rs @@ -76,7 +76,6 @@ fn read_object_hashes( // writer is dropped here, closing stdin Ok(()) }); - //let mut i: usize = 0; // Buffer size is HASH_LEN + 1 to account for the trailing \n let mut buffer: [u8; HASH_LEN + 1] = [0; HASH_LEN + 1]; for (i, filename) in to_hash.iter().enumerate() {