Skip to content

Commit

Permalink
Fix slow lockfile generation (#1261)
Browse files Browse the repository at this point in the history
* Memoize and combine queries to speed up lockfile generation
* Add comments and remove pointless parTraverse
* Re-add swallowed error and remove accidental formatting in foreign mod
* Lockfile gen speed review feedback - naming and comments
* Replace singular metadata query
  • Loading branch information
finnhodgkin committed Aug 23, 2024
1 parent 5efaad1 commit 72b2e61
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 48 deletions.
116 changes: 99 additions & 17 deletions src/Spago/Command/Fetch.purs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ import Data.Array.NonEmpty as NEA
import Data.Codec.JSON as CJ
import Data.Codec.JSON.Common as CJ.Common
import Data.Either as Either
import Data.Filterable (filterMap)
import Data.HTTP.Method as Method
import Data.Int as Int
import Data.List as List
import Data.Map as Map
import Data.Newtype (wrap)
import Data.Set as Set
Expand Down Expand Up @@ -279,16 +281,40 @@ type LockfileBuilderResult =
, packages :: Map PackageName Lock.LockEntry
}

lookupInCache :: forall a k v. Ord k => k -> Ref.Ref (Map k v) -> Spago a (Maybe v)
lookupInCache key cacheRef = liftEffect $ Ref.read cacheRef >>= Map.lookup key >>> pure

updateCache :: forall a k v. Ord k => k -> v -> Ref.Ref (Map k v) -> Spago a Unit
updateCache key value cacheRef = liftEffect $ Ref.modify_ (Map.insert key value) cacheRef

writeNewLockfile :: forall a. String -> PackageTransitiveDeps -> Spago (FetchEnv a) PackageTransitiveDeps
writeNewLockfile reason allTransitiveDeps = do
logInfo $ reason <> ", generating it..."
{ workspace } <- ask

-- All these Refs are needed to memoise Db and file reads
packageDependenciesCache <- liftEffect $ Ref.new Map.empty
gitRefCache <- liftEffect $ Ref.new Map.empty
metadataRefCache <- liftEffect $ Ref.new Map.empty
let
processPackage :: LockfileBuilderResult -> Tuple PackageName (Tuple PackageName Package) -> Spago (FetchEnv a) LockfileBuilderResult
processPackage result (Tuple workspacePackageName (Tuple dependencyName dependencyPackage)) = do
(packageDependencies :: Array PackageName) <- (Array.fromFoldable <<< Map.keys <<< fromMaybe Map.empty)
<$> getPackageDependencies dependencyName dependencyPackage
memoisedGetPackageDependencies :: PackageName -> Package -> Spago (FetchEnv a) (Maybe (Map PackageName Range))
memoisedGetPackageDependencies packageName package = do
lookupInCache packageName packageDependenciesCache >>=
case _ of
Just cached -> do
pure cached
Nothing -> do
-- Not cached. Compute it, write to ref, return it
res <- getPackageDependencies packageName package
updateCache packageName res packageDependenciesCache
pure res

processPackage :: Map PackageName _ -> LockfileBuilderResult -> Tuple PackageName (Tuple PackageName Package) -> Spago (FetchEnv a) LockfileBuilderResult
processPackage registryIntegrityMap result (Tuple workspacePackageName (Tuple dependencyName dependencyPackage)) = do
let
getDeps = (Array.fromFoldable <<< Map.keys <<< fromMaybe Map.empty)
<$> memoisedGetPackageDependencies dependencyName dependencyPackage

updatePackage r package = (updateWorkspacePackage r)
{ packages = Map.insert dependencyName package r.packages }
updateWorkspacePackage r = r
Expand All @@ -303,28 +329,84 @@ writeNewLockfile reason allTransitiveDeps = do

case dependencyPackage of
WorkspacePackage _pkg -> pure $ updateWorkspacePackage result

GitPackage gitPackage -> do
let packageLocation = Config.getPackageLocation dependencyName dependencyPackage
Git.getRef (Just packageLocation) >>= case _ of
Left err -> die err -- TODO maybe not die here?
Right rev -> pure $ updatePackage result $ FromGit { rev, dependencies: packageDependencies, url: gitPackage.git, subdir: gitPackage.subdir }
lookupInCache packageLocation gitRefCache >>= case _ of
Nothing ->
-- Get the ref and update the cache
Git.getRef (Just packageLocation) >>= case _ of
Left err -> die err -- TODO maybe not die here?
Right rev -> do
dependencies <- getDeps
let
lockEntry =
FromGit { rev, dependencies, url: gitPackage.git, subdir: gitPackage.subdir }
updateCache packageLocation lockEntry gitRefCache
pure $ updatePackage result lockEntry
Just entry -> pure $ updatePackage result entry

RegistryVersion version -> do
metadata <- Registry.getMetadata dependencyName
registryVersion <- case (metadata >>= (\(Metadata meta) -> Either.note "Didn't find version in the metadata file" $ Map.lookup version meta.published)) of
Left err -> die $ "Couldn't read metadata, reason:\n " <> err
Right { hash: integrity } ->
pure { version, integrity, dependencies: packageDependencies }
pure $ updatePackage result $ FromRegistry registryVersion
lookupInCache dependencyName metadataRefCache >>= case _ of
Nothing -> do
registryVersion <- FromRegistry <$> case Map.lookup dependencyName registryIntegrityMap of
-- This shouldn't be Nothing because it's already handled when building the integrity map below
Nothing -> die $ "Couldn't read metadata"
Just integrity -> do
dependencies <- getDeps
pure { version, integrity, dependencies }
updateCache dependencyName registryVersion metadataRefCache
pure $ updatePackage result registryVersion
Just entry -> do
pure $ updatePackage result entry

LocalPackage { path } -> do
pure $ updatePackage result $ FromPath { path, dependencies: packageDependencies }
dependencies <- getDeps
pure $ updatePackage result $ FromPath { path, dependencies }

let
toArray :: forall k v. Map k v -> Array (Tuple k v)
toArray = Map.toUnfoldable
allDependencies = foldMap sequence $ toArray $ map toArray allTransitiveDeps

-- Fetch the Registry metadata in one go for all required packages
let
uniqueRegistryPackageNames = Array.nub $ filterMap
( \(Tuple _ (Tuple dependencyName dependencyPackage)) -> case dependencyPackage of
RegistryVersion _ -> Just dependencyName
_ -> Nothing
)
allDependencies
metadataMap <- Registry.getMetadataForPackages uniqueRegistryPackageNames >>= case _ of
Left err -> die $ "Couldn't read metadata, reason:\n " <> err
Right ms -> pure ms

(registryVersions :: Map PackageName Sha256) <- Map.fromFoldable <<< Array.catMaybes <$>
( traverse
( \(Tuple _ (Tuple dependencyName dependencyPackage)) -> case dependencyPackage of
RegistryVersion version -> do
let metadata = Map.lookup dependencyName metadataMap
case (metadata >>= (\(Metadata meta) -> Map.lookup version meta.published)) of
Nothing | isNothing metadata ->
die $ "Couldn't read metadata for " <> PackageName.print dependencyName
Nothing ->
die $ "Couldn't read metadata for " <> PackageName.print dependencyName
<> ": didn't find version in the metadata file"
Just { hash: integrity } ->
pure $ Just $ dependencyName /\ integrity
_ -> pure Nothing
)
$ allDependencies
)

({ packages, workspacePackages } :: LockfileBuilderResult) <-
Array.foldM processPackage
{ workspacePackages: Map.fromFoldable $ map Config.workspacePackageToLockfilePackage (Config.getWorkspacePackages workspace.packageSet), packages: Map.empty }
(foldMap sequence $ toArray $ map toArray allTransitiveDeps)
-- NOTE! We used to have `Array.foldM` here, but it was significantly slower
-- (~10ms vs 6s on a very large project)
List.foldM (processPackage registryVersions)
{ workspacePackages: Map.fromFoldable $ map Config.workspacePackageToLockfilePackage (Config.getWorkspacePackages workspace.packageSet)
, packages: Map.empty
}
$ List.fromFoldable allDependencies

let
lockfile =
Expand Down
13 changes: 6 additions & 7 deletions src/Spago/Db.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,12 @@ export const removeManifestImpl = (db, name, version) => {
db.prepare("DELETE FROM package_manifests WHERE name = ? AND version = ?").run(name, version);
}

export const getMetadataImpl = (db, name) => {
const row = db
.prepare("SELECT * FROM package_metadata WHERE name = ? LIMIT 1")
.get(name);
return row;
}

export const insertMetadataImpl = (db, name, metadata, last_fetched) => {
db.prepare("INSERT OR REPLACE INTO package_metadata (name, metadata, last_fetched) VALUES (@name, @metadata, @last_fetched)").run({ name, metadata, last_fetched });
}

export const getMetadataForPackagesImpl = (db, names) => {
// There can be a lot of package names here, potentially hitting the max number of sqlite parameters, so we use json to bypass this
const query = db.prepare("SELECT * FROM package_metadata WHERE name IN (SELECT value FROM json_each(?));");
return query.all(JSON.stringify(names));
};
36 changes: 25 additions & 11 deletions src/Spago/Db.purs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ module Spago.Db
, getLastPull
, getManifest
, getMetadata
, getMetadataForPackages
, insertManifest
, insertMetadata
, insertPackageSet
Expand All @@ -26,6 +27,7 @@ import Data.Codec.JSON.Record as CJ.Record
import Data.DateTime (Date, DateTime(..))
import Data.DateTime as DateTime
import Data.Either as Either
import Data.Filterable (filterMap)
import Data.Formatter.DateTime as DateTime.Format
import Data.Map as Map
import Data.Nullable (Nullable)
Expand Down Expand Up @@ -100,18 +102,28 @@ insertManifest :: Db -> PackageName -> Version -> Manifest -> Effect Unit
insertManifest db packageName version manifest = Uncurried.runEffectFn4 insertManifestImpl db (PackageName.print packageName) (Version.print version) (printJson Manifest.codec manifest)

getMetadata :: Db -> PackageName -> Effect (Maybe Metadata)
getMetadata db packageName = do
maybeMetadataEntry <- Nullable.toMaybe <$> Uncurried.runEffectFn2 getMetadataImpl db (PackageName.print packageName)
getMetadata db packageName =
getMetadataForPackages db [ packageName ]
<#> Map.lookup packageName

getMetadataForPackages :: Db -> Array PackageName -> Effect (Map PackageName Metadata)
getMetadataForPackages db packageNames = do
metadataEntries <- Uncurried.runEffectFn2 getMetadataForPackagesImpl db (PackageName.print <$> packageNames)
now <- Now.nowDateTime
pure $ do
metadataEntry <- maybeMetadataEntry
lastFetched <- Either.hush $ DateTime.Format.unformat Internal.Format.iso8601DateTime metadataEntry.last_fetched
-- if the metadata is older than 15 minutes, we consider it stale
case DateTime.diff now lastFetched of
Minutes n | n <= 15.0 -> do
metadata <- Either.hush $ parseJson Metadata.codec metadataEntry.metadata
pure metadata
_ -> Nothing
pure
$ metadataEntries
#
( filterMap \metadataEntry -> do
packageName <- hush $ PackageName.parse metadataEntry.name
lastFetched <- Either.hush $ DateTime.Format.unformat Internal.Format.iso8601DateTime metadataEntry.last_fetched
-- if the metadata is older than 15 minutes, we consider it stale
case DateTime.diff now lastFetched of
Minutes n | n <= 15.0 -> do
metadata <- Either.hush $ parseJson Metadata.codec metadataEntry.metadata
pure $ packageName /\ metadata
_ -> Nothing
)
# Map.fromFoldable

insertMetadata :: Db -> PackageName -> Metadata -> Effect Unit
insertMetadata db packageName metadata@(Metadata { unpublished }) = do
Expand Down Expand Up @@ -246,4 +258,6 @@ foreign import removeManifestImpl :: EffectFn3 Db String String Unit

foreign import getMetadataImpl :: EffectFn2 Db String (Nullable MetadataEntryJs)

foreign import getMetadataForPackagesImpl :: EffectFn2 Db (Array String) (Array MetadataEntryJs)

foreign import insertMetadataImpl :: EffectFn4 Db String String String Unit
52 changes: 39 additions & 13 deletions src/Spago/Registry.purs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module Spago.Registry
, findPackageSet
, getManifestFromIndex
, getMetadata
, getMetadataForPackages
, getRegistryFns
, listMetadataFiles
, listPackageSets
Expand Down Expand Up @@ -62,6 +63,7 @@ type RegistryEnv a = Record (RegistryEnvRow a)
type RegistryFunctions =
{ getManifestFromIndex :: PackageName -> Version -> Spago (LogEnv ()) (Maybe Manifest)
, getMetadata :: PackageName -> Spago (LogEnv ()) (Either String Metadata)
, getMetadataForPackages :: Array PackageName -> Spago (LogEnv ()) (Either String (Map PackageName Metadata))
, findPackageSet :: Maybe Version -> Spago (PreRegistryEnv ()) Version
, listPackageSets :: Spago (PreRegistryEnv ()) (Array Db.PackageSet)
, listMetadataFiles :: Spago (LogEnv ()) (Array String)
Expand All @@ -74,6 +76,12 @@ getMetadata packageName = do
{ getMetadata: fn } <- runSpago { logOptions, db, git, purs, offline } getRegistry
runSpago { logOptions } (fn packageName)

getMetadataForPackages :: Array PackageName -> Spago (RegistryEnv _) _
getMetadataForPackages packageNames = do
{ getRegistry, logOptions, db, git, purs, offline } <- ask
{ getMetadataForPackages: fn } <- runSpago { logOptions, db, git, purs, offline } getRegistry
runSpago { logOptions } (fn packageNames)

getManifestFromIndex :: PackageName -> Version -> Spago (RegistryEnv _) _
getManifestFromIndex packageName version = do
{ getRegistry, logOptions, db, git, purs, offline } <- ask
Expand Down Expand Up @@ -123,6 +131,7 @@ getRegistryFns registryBox registryLock = do
registryFns =
{ getManifestFromIndex: getManifestFromIndexImpl db
, getMetadata: getMetadataImpl db
, getMetadataForPackages: getMetadataForPackagesImpl db
, listMetadataFiles: FS.ls (Path.concat [ Paths.registryPath, Registry.Constants.metadataDirectory ])
, listPackageSets: listPackageSetsImpl
, findPackageSet: findPackageSetImpl
Expand Down Expand Up @@ -200,20 +209,37 @@ getRegistryFns registryBox registryLock = do
-- Metadata can change over time (unpublished packages, and new packages), so we need
-- to read it from file every time we have a fresh Registry
getMetadataImpl :: Db -> PackageName -> Spago (LogEnv ()) (Either String Metadata)
getMetadataImpl db name = do
getMetadataImpl db name =
getMetadataForPackagesImpl db [ name ]
<#> case _ of
Left err -> Left err
Right metadataMap -> case Map.lookup name metadataMap of
Nothing -> Left $ "Failed to get metadata for package: " <> PackageName.print name
Just metadata -> Right metadata

-- Parallelised version of `getMetadataImpl`
getMetadataForPackagesImpl :: Db -> Array PackageName -> Spago (LogEnv ()) (Either String (Map PackageName Metadata))
getMetadataForPackagesImpl db names = do
-- we first try reading it from the DB
liftEffect (Db.getMetadata db name) >>= case _ of
Just metadata -> do
logDebug $ "Got metadata from DB: " <> PackageName.print name
pure (Right metadata)
_ -> do
-- if we don't have it we try reading it from file
metadataFromFile name >>= case _ of
Left e -> pure (Left e)
Right m -> do
-- and memoize it
liftEffect (Db.insertMetadata db name m)
pure (Right m)
liftEffect (Db.getMetadataForPackages db names) >>= \metadatas -> do
{ fail, success } <- partitionEithers <$> parTraverseSpago
( \name -> do
case Map.lookup name metadatas of
Nothing ->
-- if we don't have it we try reading it from file
metadataFromFile name >>= case _ of
Left e -> pure (Left e)
Right m -> do
-- and memoize it
liftEffect (Db.insertMetadata db name m)
pure (Right $ name /\ m)
Just m -> pure $ Right $ name /\ m
)
names
case Array.head fail of
Nothing -> pure $ Right $ Map.fromFoldable success
Just f -> pure $ Left $ f

where
metadataFromFile pkgName = do
let metadataFilePath = Path.concat [ Paths.registryPath, Registry.Constants.metadataDirectory, PackageName.print pkgName <> ".json" ]
Expand Down

0 comments on commit 72b2e61

Please sign in to comment.