diff --git a/Benchmarks/PrivateInformationRetrievalBenchmark/PrivateInformationRetrievalBenchmark.swift b/Benchmarks/PrivateInformationRetrievalBenchmark/PrivateInformationRetrievalBenchmark.swift index 735d4541..37cb8d0e 100644 --- a/Benchmarks/PrivateInformationRetrievalBenchmark/PrivateInformationRetrievalBenchmark.swift +++ b/Benchmarks/PrivateInformationRetrievalBenchmark/PrivateInformationRetrievalBenchmark.swift @@ -278,9 +278,30 @@ struct KeywordPirBenchmarkContext /tmp/database-v2.txtpb To ensure processing the update database yields the same configuration, we use the `.fixedSize` cuckoo table argument, specifying a bucket count. A larger bucket count will leave more room for new entries, without changing the configuration. However, a larger bucket count will also increase server runtime. -One way to choose the `bucketCount` is to start with `bucketCount : 1` and try larger `bucketCounts` until the processing works. -If the processing throws a `PirError.failedToConstructCuckooTable` or logs `Failed to construct Cuckoo table`, this is an indication the chosen bucket count was too small. -We create `/tmp/config-v1-fixed-size.json` with the following contents +There are a few ways to find a good `bucketCount`: +* Start with a small bucket count. + If the processing throws a `PirError.failedToConstructCuckooTable` or logs `Failed to construct Cuckoo table`, this is an indication the chosen bucket count was too small. + Choose larger `bucketCounts` until the processing works. + +* Add a callback to [ProcessKeywordDatabase.processShard](https://swiftpackageindex.com/apple/swift-homomorphic-encryption/main/documentation/privateinformationretrieval/processkeyworddatabase/processshard(shard:with:)). + This callback can be used to report the bucketCount after the cuckoo table was created. +A sample callback is +```swift +func onEvent(event: ProcessKeywordDatabase.ProcessShardEvent) throws { + switch event { + case let .cuckooTableEvent(.createdTable(table)): + let summary = try table.summarize() + let bucketCount = summary.bucketCount + default: + () + } +} ``` + +For our example, we use `bucketCount: 256`. + +We create `/tmp/config-v1-fixed-size.json` with the following contents +```json { "algorithm" : "mulPir", "cuckooTableArguments" : { diff --git a/Sources/PIRProcessDatabase/ProcessDatabase.swift b/Sources/PIRProcessDatabase/ProcessDatabase.swift index 754a7510..039f0efc 100644 --- a/Sources/PIRProcessDatabase/ProcessDatabase.swift +++ b/Sources/PIRProcessDatabase/ProcessDatabase.swift @@ -347,8 +347,33 @@ struct ProcessDatabase: ParsableCommand { for (shardID, shard) in keywordDatabase.shards .sorted(by: { $0.0.localizedStandardCompare($1.0) == .orderedAscending }) { - ProcessDatabase.logger.info("Processing shard \(shardID)") - let processed = try ProcessKeywordDatabase.processShard(shard: shard, with: processArgs) + func logEvent(event: ProcessKeywordDatabase.ProcessShardEvent) throws { + switch event { + case let .cuckooTableEvent(.createdTable(table)): + let summary = try table.summarize() + ProcessDatabase.logger.info("Created cuckoo table \(summary)") + case let .cuckooTableEvent(.expandingTable(table)): + let summary = try table.summarize() + ProcessDatabase.logger.info("Expanding cuckoo table \(summary)") + case let .cuckooTableEvent(.finishedExpandingTable(table)): + let summary = try table.summarize() + ProcessDatabase.logger.info("Finished expanding cuckoo table \(summary)") + case let .cuckooTableEvent(.insertedKeywordValuePair(index, _)): + let reportingPercentage = 10 + let shardFraction = shard.rows.count / reportingPercentage + if (index + 1).isMultiple(of: shardFraction) { + let percentage = Float(reportingPercentage * (index + 1)) / Float(shardFraction) + ProcessDatabase.logger + .info("Inserted \(index + 1) / \(shard.rows.count) keywords \(percentage)%") + } + } + } + + ProcessDatabase.logger.info("Processing shard \(shardID) with \(shard.rows.count) rows") + let processed = try ProcessKeywordDatabase.processShard( + shard: shard, + with: processArgs, + onEvent: logEvent) if config.trialsPerShard > 0 { guard let row = shard.rows.first else { throw PirError.emptyDatabase diff --git a/Sources/PrivateInformationRetrieval/CuckooTable.swift b/Sources/PrivateInformationRetrieval/CuckooTable.swift index 4ec3d66e..d24600a9 100644 --- a/Sources/PrivateInformationRetrieval/CuckooTable.swift +++ b/Sources/PrivateInformationRetrieval/CuckooTable.swift @@ -223,14 +223,20 @@ extension CuckooBucket: RangeReplaceableCollection { /// A Cuckoo table is a data structure that stores a set of keyword-value pairs, using cuckoo hashing to resolve /// conflicts. -@usableFromInline -struct CuckooTable { +public struct CuckooTable { typealias KeywordHash = UInt64 - struct CuckooTableInformation: Equatable { - let entryCount: Int - let bucketCount: Int - let emptyBucketCount: Int - let loadFactor: Float + /// Information about the cuckoo table. + public struct CuckooTableInformation: Equatable { + /// The number of entries stored in the table. + public let entryCount: Int + /// The number of cuckoo buckets in the table. + public let bucketCount: Int + /// The number of empty buckets. + public let emptyBucketCount: Int + /// The fraction of capacity that is occupied. + /// + /// A small load factor indicates there is unused capacity in the table. + public let loadFactor: Float } @usableFromInline @@ -245,8 +251,22 @@ struct CuckooTable { } } - @usableFromInline let config: CuckooTableConfig + /// Events happening in a ``CuckooTable``. + public enum Event { + /// The table was initialized. + case createdTable(CuckooTable) + /// The table is being expanded. + case expandingTable(CuckooTable) + /// The table is done expanding. + case finishedExpandingTable(CuckooTable) + /// The `index'th` keyword-value pair was inserted. + case insertedKeywordValuePair(index: Int, KeywordValuePair) + } + + /// Configuration used to create the table. + public let config: CuckooTableConfig @usableFromInline var buckets: [CuckooBucket] + @usableFromInline let onEvent: (Event) throws -> Void @usableFromInline var rng: RandomNumberGenerator @usableFromInline var entryCount: Int { @@ -259,11 +279,13 @@ struct CuckooTable { init( config: CuckooTableConfig, database: some Collection<(KeywordValuePair.Keyword, KeywordValuePair.Value)>, + onEvent: @escaping (Event) throws -> Void = { _ in }, using rng: RandomNumberGenerator = SystemRandomNumberGenerator()) throws { try self.init( config: config, database: database.map { keyword, value in KeywordValuePair(keyword: keyword, value: value) }, + onEvent: onEvent, using: rng) } @@ -271,11 +293,13 @@ struct CuckooTable { init( config: CuckooTableConfig, database: some Collection, + onEvent: @escaping (Event) throws -> Void = { _ in }, using rng: RandomNumberGenerator = SystemRandomNumberGenerator()) throws { self.config = config let targetBucketCount: Int self.buckets = [] + self.onEvent = onEvent self.rng = rng switch config.bucketCount { case let .allowExpansion(_, targetLoadFactor: targetLoadFactor): @@ -290,12 +314,16 @@ struct CuckooTable { } self.buckets = Array(repeating: CuckooBucket(), count: targetBucketCount) - for keywordValuePair in database { + for (index, keywordValuePair) in database.enumerated() { try insert(keywordValuePair) + try onEvent(Event.insertedKeywordValuePair(index: index, keywordValuePair)) } + try onEvent(Event.createdTable(self)) } - func summarize() throws -> CuckooTableInformation { + /// Creates a summary of the cuckoo table. + /// - Throws: Error upon failure to summary the table. + public func summarize() throws -> CuckooTableInformation { let bucketEntryCounts = buckets.map(\.count) let emptyBucketCount: Int = bucketEntryCounts.map { entryCount in entryCount == 0 ? 1 : 0 }.sum() let entryCount: Int = bucketEntryCounts.sum() @@ -403,6 +431,7 @@ struct CuckooTable { mutating func expand() throws { switch config.bucketCount { case let .allowExpansion(expansionFactor: expansionFactor, _): + try onEvent(Event.expandingTable(self)) let oldTable = buckets let bucketCount = Int(ceil(Double(buckets.count) * expansionFactor)).nextMultiple( of: tableCount, @@ -417,6 +446,7 @@ struct CuckooTable { } } } + try onEvent(Event.finishedExpandingTable(self)) default: throw PirError .failedToConstructCuckooTable( diff --git a/Sources/PrivateInformationRetrieval/KeywordDatabase.swift b/Sources/PrivateInformationRetrieval/KeywordDatabase.swift index 92cddaaf..838f4701 100644 --- a/Sources/PrivateInformationRetrieval/KeywordDatabase.swift +++ b/Sources/PrivateInformationRetrieval/KeywordDatabase.swift @@ -365,15 +365,24 @@ public enum ProcessKeywordDatabase { } } + /// Events happening during shard processing. + public enum ProcessShardEvent { + /// A ``CuckooTable`` event. + case cuckooTableEvent(CuckooTable.Event) + } + /// Processes a database shard. /// - Parameters: /// - shard: Shard of a keyword database. /// - arguments: Processing arguments. + /// - onEvent: Function to call when a ``ProcessShardEvent`` happens. /// - Returns: The processed database. /// - Throws: Error upon failure to process the shard. @inlinable public static func processShard(shard: KeywordDatabaseShard, - with arguments: Arguments) throws + with arguments: Arguments, + onEvent: @escaping (ProcessShardEvent) throws -> Void = { _ in + }) throws -> ProcessedDatabaseWithParameters { let keywordConfig = arguments.databaseConfig.keywordPirConfig @@ -383,7 +392,7 @@ public enum ProcessKeywordDatabase { } return try KeywordPirServer>.process(database: shard, config: keywordConfig, - with: context) + with: context, onEvent: onEvent) } /// Validates the correctness of processing on a shard. diff --git a/Sources/PrivateInformationRetrieval/KeywordPirProtocol.swift b/Sources/PrivateInformationRetrieval/KeywordPirProtocol.swift index 318d26a9..0f38ce3e 100644 --- a/Sources/PrivateInformationRetrieval/KeywordPirProtocol.swift +++ b/Sources/PrivateInformationRetrieval/KeywordPirProtocol.swift @@ -148,16 +148,24 @@ public final class KeywordPirServer: KeywordPirProtoc /// - database: Collection of database entries. /// - config: Keyword PIR configuration. /// - context: Context for HE computation. + /// - onEvent: Function to call when a ``ProcessKeywordDatabase.ProcessShardEvent`` happens. /// - Returns: A processed database. /// - Throws: Error upon failure to process the database. @inlinable public static func process(database: some Collection, config: KeywordPirConfig, - with context: Context) + with context: Context, + onEvent: @escaping (ProcessKeywordDatabase.ProcessShardEvent) throws -> Void = { _ in }) throws -> ProcessedDatabaseWithParameters { + func onCuckooEvent(event: CuckooTable.Event) throws { + try onEvent( + ProcessKeywordDatabase.ProcessShardEvent + .cuckooTableEvent(event)) + } + let cuckooTableConfig = config.cuckooTableConfig - let cuckooTable = try CuckooTable(config: cuckooTableConfig, database: database) + let cuckooTable = try CuckooTable(config: cuckooTableConfig, database: database, onEvent: onCuckooEvent) let entryTable = try cuckooTable.serializeBuckets() let maxEntrySize: Int if config.useMaxSerializedBucketSize {