From 0530836163f6d1be36694789fcc70aeb0fee77ff Mon Sep 17 00:00:00 2001 From: Alberto Valverde Date: Mon, 14 Oct 2024 19:33:15 +0200 Subject: [PATCH] [inferno-vc] Resurrect #139 and more granular locks in cached client (#141) --- inferno-vc/CHANGELOG.md | 9 ++ inferno-vc/inferno-vc.cabal | 3 +- .../Inferno/VersionControl/Client/Cached.hs | 101 +++++++++--------- inferno-vc/src/Inferno/VersionControl/Log.hs | 28 ++++- .../VersionControl/Operations/Filesystem.hs | 2 +- .../src/Inferno/VersionControl/Server.hs | 17 ++- .../src/Inferno/VersionControl/Testing.hs | 4 +- 7 files changed, 105 insertions(+), 59 deletions(-) diff --git a/inferno-vc/CHANGELOG.md b/inferno-vc/CHANGELOG.md index f572decc..0233572c 100644 --- a/inferno-vc/CHANGELOG.md +++ b/inferno-vc/CHANGELOG.md @@ -1,6 +1,15 @@ # Revision History for inferno-vc *Note*: we use https://pvp.haskell.org/ (MAJOR.MAJOR.MINOR.PATCH) +## 0.3.8.0 -- 2024-10-14 +* Made fetchObjectClosureHashes return the scriptId used to call it since it + also belongs in the closure. +* Added logging to cached client to see hits and misses +* Added logging to server to see what scriptIds are being used to request + fetchObjects and fetchObjectClosureHashes +* Made locks on cache more granular and only fetch a single upstream object per + request + ## 0.3.7.1 -- 2024-09-23 * Fix overflowing threadDelay on armv7l diff --git a/inferno-vc/inferno-vc.cabal b/inferno-vc/inferno-vc.cabal index 38f8ac4a..20e38bd8 100644 --- a/inferno-vc/inferno-vc.cabal +++ b/inferno-vc/inferno-vc.cabal @@ -1,6 +1,6 @@ cabal-version: >=1.10 name: inferno-vc -version: 0.3.7.1 +version: 0.3.8.0 synopsis: Version control server for Inferno description: A version control server for Inferno scripts category: DSL,Scripting @@ -69,6 +69,7 @@ library , QuickCheck , stm , unbounded-delays + , random-shuffle default-language: Haskell2010 default-extensions: diff --git a/inferno-vc/src/Inferno/VersionControl/Client/Cached.hs b/inferno-vc/src/Inferno/VersionControl/Client/Cached.hs index 7f049a8c..e3874307 100644 --- a/inferno-vc/src/Inferno/VersionControl/Client/Cached.hs +++ b/inferno-vc/src/Inferno/VersionControl/Client/Cached.hs @@ -27,8 +27,7 @@ import Data.Aeson (FromJSON, ToJSON, eitherDecodeStrict, encode) import qualified Data.ByteString as B import qualified Data.ByteString.Base64.URL as Base64 import qualified Data.ByteString.Char8 as Char8 -import qualified Data.ByteString.Lazy as BL -import Data.Either (partitionEithers) +import qualified Data.ByteString.Lazy.Char8 as BL import Data.Generics.Product (HasType, getTyped) import Data.Generics.Sum (AsType (..)) import Data.List (foldl') @@ -36,6 +35,7 @@ import qualified Data.Map as Map import qualified Data.Set as Set import GHC.Generics (Generic) import qualified Inferno.VersionControl.Client as VCClient +import Inferno.VersionControl.Log (VCCacheTrace (..)) import Inferno.VersionControl.Operations.Error (VCStoreError (..)) import Inferno.VersionControl.Server (VCServerError) import Inferno.VersionControl.Types @@ -44,32 +44,36 @@ import Inferno.VersionControl.Types VCObjectHash (..), vcObjectHashToByteString, ) +import Plow.Logging (IOTracer (..), traceWith) import Servant.Client (ClientEnv, ClientError) import Servant.Typed.Error (TypedClientM, runTypedClientM) import System.AtomicWrite.Writer.LazyByteString (atomicWriteFile) import System.Directory (createDirectoryIfMissing, doesFileExist) import System.FilePath.Posix (()) +import System.Random.Shuffle (shuffleM) data VCCacheEnv = VCCacheEnv { cachePath :: FilePath, - cacheInFlight :: TVar (Set.Set VCObjectHash) + cacheObjInFlight :: TVar (Set.Set VCObjectHash), + cacheDepInFlight :: TVar (Set.Set VCObjectHash), + tracer :: IOTracer VCCacheTrace } deriving (Generic) -- | Makes sure only one thread at a time fetches the closure for certain -- VCObjectHashes -withInFlight :: (MonadMask m, MonadIO m) => VCCacheEnv -> [VCObjectHash] -> m a -> m a -withInFlight VCCacheEnv {cacheInFlight} hashes = bracket_ acquire release +withInFlight :: (MonadMask m, MonadIO m) => TVar (Set.Set VCObjectHash) -> [VCObjectHash] -> m a -> m a +withInFlight hashSetRef hashes = bracket_ acquire release where acquire = liftIO $ atomically $ do - inFlight <- readTVar cacheInFlight + inFlight <- readTVar hashSetRef if any (`Set.member` inFlight) hashes then retry else do - writeTVar cacheInFlight $! foldl' (flip Set.insert) inFlight hashes + writeTVar hashSetRef $! foldl' (flip Set.insert) inFlight hashes release = liftIO $ atomically $ do - inFlight <- readTVar cacheInFlight - writeTVar cacheInFlight $! foldl' (flip Set.delete) inFlight hashes + inFlight <- readTVar hashSetRef + writeTVar hashSetRef $! foldl' (flip Set.delete) inFlight hashes data CachedVCClientError = ClientVCStoreError VCServerError @@ -77,11 +81,12 @@ data CachedVCClientError | LocalVCStoreError VCStoreError deriving (Show, Generic) -initVCCachedClient :: FilePath -> IO VCCacheEnv -initVCCachedClient cachePath = do - createDirectoryIfMissing True $ cachePath "deps" - cacheInFlight <- newTVarIO mempty - pure VCCacheEnv {cachePath, cacheInFlight} +initVCCachedClient :: FilePath -> IOTracer VCCacheTrace -> IO VCCacheEnv +initVCCachedClient cachePath tracer = do + createDirectoryIfMissing True $ cachePath "deps-v2" + cacheObjInFlight <- newTVarIO mempty + cacheDepInFlight <- newTVarIO mempty + pure VCCacheEnv {cachePath, cacheObjInFlight, cacheDepInFlight, tracer} fetchVCObjectClosure :: ( MonadError err m, @@ -103,40 +108,36 @@ fetchVCObjectClosure :: VCObjectHash -> m (Map.Map VCObjectHash (VCMeta a g VCObject)) fetchVCObjectClosure fetchVCObjects remoteFetchVCObjectClosureHashes objHash = do - env@VCCacheEnv {cachePath} <- asks getTyped + VCCacheEnv {cachePath, cacheObjInFlight, cacheDepInFlight, tracer} <- asks getTyped deps <- - withInFlight env [objHash] $ - liftIO (doesFileExist $ cachePath "deps" show objHash) >>= \case + withInFlight cacheDepInFlight [objHash] $ + liftIO (doesFileExist $ cachePath "deps-v2" show objHash) >>= \case False -> do + traceWith tracer $ VCCacheDepsMiss objHash deps <- liftServantClient $ remoteFetchVCObjectClosureHashes objHash - liftIO - $ atomicWriteFile - (cachePath "deps" show objHash) - $ BL.concat [BL.fromStrict (vcObjectHashToByteString h) <> "\n" | h <- deps] + liftIO $ + atomicWriteFile (cachePath "deps-v2" show objHash) $ + BL.unlines $ + map (BL.fromStrict . vcObjectHashToByteString) deps pure deps - True -> fetchVCObjectClosureHashes objHash - withInFlight env deps $ do - (nonLocalHashes, localHashes) <- - partitionEithers - <$> forM - (objHash : deps) - ( \depHash -> do - liftIO (doesFileExist $ cachePath show depHash) >>= \case - True -> pure $ Right depHash - False -> pure $ Left depHash - ) - localObjs <- - Map.fromList - <$> forM - localHashes - ( \h -> - (h,) <$> fetchVCObjectUnsafe h - ) - - nonLocalObjs <- liftServantClient $ fetchVCObjects nonLocalHashes - forM_ (Map.toList nonLocalObjs) $ \(h, o) -> - liftIO $ atomicWriteFile (cachePath show h) $ encode o - pure $ localObjs `Map.union` nonLocalObjs + True -> do + traceWith tracer $ VCCacheDepsHit objHash + fetchVCObjectClosureHashes objHash + -- shuffle deps to improve concurrent performance + shuffledDeps <- liftIO $ shuffleM deps + fmap mconcat $ + forM shuffledDeps $ \depHash -> + withInFlight cacheObjInFlight [depHash] $ + liftIO (doesFileExist $ cachePath show depHash) >>= \case + True -> do + traceWith tracer (VCCacheHit depHash) + Map.singleton depHash <$> fetchVCObjectUnsafe depHash + False -> do + traceWith tracer (VCCacheMiss depHash) + objs <- liftServantClient $ fetchVCObjects [depHash] + forM_ (Map.toList objs) $ \(h, o) -> + liftIO $ atomicWriteFile (cachePath show h) $ encode o + pure objs fetchVCObjectClosureHashes :: ( MonadError err m, @@ -149,7 +150,7 @@ fetchVCObjectClosureHashes :: m [VCObjectHash] fetchVCObjectClosureHashes h = do VCCacheEnv {cachePath} <- asks getTyped - let fp = cachePath "deps" show h + let fp = cachePath "deps-v2" show h readVCObjectHashTxt fp readVCObjectHashTxt :: @@ -162,8 +163,11 @@ readVCObjectHashTxt :: readVCObjectHashTxt fp = do deps <- filter (not . B.null) . Char8.lines <$> liftIO (B.readFile fp) forM deps $ \dep -> do - decoded <- either (const $ throwing _Typed $ InvalidHash $ Char8.unpack dep) pure $ Base64.decode dep - maybe (throwing _Typed $ InvalidHash $ Char8.unpack dep) (pure . VCObjectHash) $ digestFromByteString decoded + decoded <- + either (const $ throwing _Typed $ InvalidHash $ Char8.unpack dep) pure $ + Base64.decode dep + maybe (throwing _Typed $ InvalidHash $ Char8.unpack dep) (pure . VCObjectHash) $ + digestFromByteString decoded fetchVCObjectUnsafe :: ( MonadReader r m, @@ -178,7 +182,8 @@ fetchVCObjectUnsafe :: fetchVCObjectUnsafe h = do VCCacheEnv {cachePath} <- asks getTyped let fp = cachePath show h - either (throwing _Typed . CouldNotDecodeObject h) pure =<< liftIO (eitherDecodeStrict <$> Char8.readFile fp) + either (throwing _Typed . CouldNotDecodeObject h) pure + =<< liftIO (eitherDecodeStrict <$> Char8.readFile fp) liftServantClient :: ( MonadError e m, diff --git a/inferno-vc/src/Inferno/VersionControl/Log.hs b/inferno-vc/src/Inferno/VersionControl/Log.hs index 5f76a878..b7409bb9 100644 --- a/inferno-vc/src/Inferno/VersionControl/Log.hs +++ b/inferno-vc/src/Inferno/VersionControl/Log.hs @@ -1,9 +1,16 @@ {-# LANGUAGE OverloadedStrings #-} -module Inferno.VersionControl.Log where +module Inferno.VersionControl.Log + ( VCServerTrace (..), + VCCacheTrace (..), + vcServerTraceToText, + vcCacheTraceToText, + ) +where -import Data.Text (Text, pack) +import Data.Text (Text, intercalate, pack) import Inferno.VersionControl.Operations.Error (VCStoreError, vcStoreErrorToString) +import Inferno.VersionControl.Types (VCObjectHash) data VCServerTrace = ThrownVCStoreError VCStoreError @@ -14,6 +21,8 @@ data VCServerTrace | ReadJSON FilePath | ReadTxt FilePath | DeleteFile FilePath + | VCFetchObjects [VCObjectHash] + | VCFetchObjectClosureHashes VCObjectHash vcServerTraceToText :: VCServerTrace -> Text vcServerTraceToText = \case @@ -25,3 +34,18 @@ vcServerTraceToText = \case ThrownVCStoreError e -> pack (vcStoreErrorToString e) ThrownVCOtherError e -> "Other server error: " <> e DeleteFile fp -> "Deleting file: " <> pack fp + VCFetchObjects objs -> "FetchObjects " <> intercalate ", " (map (pack . show) objs) + VCFetchObjectClosureHashes obj -> "FetchObjectClosureHashes " <> pack (show obj) + +data VCCacheTrace + = VCCacheHit VCObjectHash + | VCCacheMiss VCObjectHash + | VCCacheDepsHit VCObjectHash + | VCCacheDepsMiss VCObjectHash + +vcCacheTraceToText :: VCCacheTrace -> Text +vcCacheTraceToText = \case + VCCacheHit h -> "VC Cache hit " <> pack (show h) + VCCacheMiss h -> "VC Cache miss " <> pack (show h) + VCCacheDepsHit h -> "VC Cache deps hit " <> pack (show h) + VCCacheDepsMiss h -> "VC Cache deps miss " <> pack (show h) diff --git a/inferno-vc/src/Inferno/VersionControl/Operations/Filesystem.hs b/inferno-vc/src/Inferno/VersionControl/Operations/Filesystem.hs index ff60a353..3968b0d5 100644 --- a/inferno-vc/src/Inferno/VersionControl/Operations/Filesystem.hs +++ b/inferno-vc/src/Inferno/VersionControl/Operations/Filesystem.hs @@ -341,7 +341,7 @@ instance fetchVCObjectClosureHashes h = do VCStorePath storePath <- asks getTyped let fp = storePath "deps" show h - readVCObjectHashTxt fp + (h :) <$> readVCObjectHashTxt fp deleteAutosavedVCObjectsOlderThan t = do -- We know that all autosaves must be heads: diff --git a/inferno-vc/src/Inferno/VersionControl/Server.hs b/inferno-vc/src/Inferno/VersionControl/Server.hs index e8414ed8..fa0abbd4 100644 --- a/inferno-vc/src/Inferno/VersionControl/Server.hs +++ b/inferno-vc/src/Inferno/VersionControl/Server.hs @@ -37,7 +37,7 @@ import Data.Time.Clock.POSIX (getPOSIXTime) import GHC.Generics (Generic) import Inferno.Types.Syntax (Expr) import Inferno.Types.Type (TCScheme) -import Inferno.VersionControl.Log (VCServerTrace (ThrownVCOtherError, ThrownVCStoreError), vcServerTraceToText) +import Inferno.VersionControl.Log (VCServerTrace (..), vcServerTraceToText) import qualified Inferno.VersionControl.Operations as Ops import qualified Inferno.VersionControl.Operations.Error as Ops import Inferno.VersionControl.Server.Types (readServerConfig) @@ -100,14 +100,21 @@ vcServer :: Ord (Ops.Group m) ) => (forall x. m x -> Handler (Union (WithError VCServerError x))) -> + IOTracer VCServerTrace -> Server (VersionControlAPI (Ops.Author m) (Ops.Group m)) -vcServer toHandler = +vcServer toHandler tracer = toHandler . fetchFunctionH :<|> toHandler . Ops.fetchFunctionsForGroups :<|> toHandler . Ops.fetchVCObject :<|> toHandler . Ops.fetchVCObjectHistory - :<|> toHandler . fetchVCObjects - :<|> toHandler . Ops.fetchVCObjectClosureHashes + :<|> ( \objs -> + traceWith tracer (VCFetchObjects objs) + >> toHandler (fetchVCObjects objs) + ) + :<|> ( \obj -> + traceWith tracer (VCFetchObjectClosureHashes obj) + >> toHandler (Ops.fetchVCObjectClosureHashes obj) + ) :<|> toHandler . pushFunctionH :<|> toHandler . Ops.deleteAutosavedVCObject :<|> toHandler . Ops.deleteVCObjects @@ -187,7 +194,7 @@ runServerConfig middleware withEnv runOp serverConfig = do gzip def $ middleware env $ serve (Proxy :: Proxy (VersionControlAPI a g)) $ - vcServer (liftIO . liftTypedError . flip runOp env) + vcServer (liftIO . liftTypedError . flip runOp env) serverTracer withLinkedAsync_ :: IO a -> IO b -> IO b withLinkedAsync_ f g = withAsync f $ \h -> link h >> g diff --git a/inferno-vc/src/Inferno/VersionControl/Testing.hs b/inferno-vc/src/Inferno/VersionControl/Testing.hs index eb018e13..18edc990 100644 --- a/inferno-vc/src/Inferno/VersionControl/Testing.hs +++ b/inferno-vc/src/Inferno/VersionControl/Testing.hs @@ -143,9 +143,9 @@ vcServerSpec url = do metas <- runOperation vcClientEnv (fetchFunctionsForGroups (Set.singleton g)) map obj metas `shouldBe` [h4] - -- The closure of h4 should be empty as it has no dependencies: + -- The closure of h4 should only contain h4 as it has no dependencies: metas' <- runOperation vcClientEnv (fetchVCObjectClosureHashes h4) - metas' `shouldBe` [] + metas' `shouldBe` [h4] -- After cloning h4 to h5, fetchFunctionsForGroups should return h4 and h5: o5 <- createObjForGroup g VCObjectPublic $ CloneOf h4