Add differential privacy to FixedPointBoundedL2VecSum (divviup#578).

Co-Authored-By: Olivia <ovi@posteo.de> Co-Authored-By: Maxim Urschumzew <u.maxim@live.de>
dpsa4fl · Aug 25, 2023 · e5bdff3 · e5bdff3
1 parent d45bf21
commit e5bdff3
Show file tree

Hide file tree

Showing 20 changed files with 1,469 additions and 162 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -21,6 +21,12 @@ fiat-crypto = { version = "0.1.20", optional = true }
 fixed = { version = "1.23", optional = true }
 getrandom = { version = "0.2.10", features = ["std"] }
 hmac = { version = "0.12.1", optional = true }
+num-bigint = { version = "0.4.3", optional = true, features = ["rand", "serde"] }
+num-integer = { version = "0.1.45", optional = true }
+num-iter = { version = "0.1.43", optional = true }
+num-rational = { version = "0.4.1", optional = true }
+num-traits = { version = "0.2.15", optional = true }
+rand = { version = "0.8", optional = true }
 rand_core = "0.6.4"
 rayon = { version = "1.7.0", optional = true }
 serde = { version = "1.0", features = ["derive"] }
@@ -50,7 +56,7 @@ zipf = "7.0.1"
 
 [features]
 default = ["crypto-dependencies"]
-experimental = ["bitvec", "fiat-crypto", "fixed"]
+experimental = ["bitvec", "fiat-crypto", "fixed", "num-bigint", "num-rational", "num-traits", "num-integer", "num-iter", "rand"]
 multithreaded = ["rayon"]
 prio2 = ["crypto-dependencies", "hmac", "sha2"]
 crypto-dependencies = ["aes", "ctr", "cmac"]
@@ -69,6 +75,11 @@ harness = false
 name = "cycle_counts"
 harness = false
 
+[[test]]
+name = "discrete_gauss"
+path = "tests/discrete_gauss.rs"
+required-features = ["experimental"]
+
 [package.metadata.docs.rs]
 all-features = true
 rustdoc-args = ["--cfg", "docsrs"]

diff --git a/benches/speed_tests.rs b/benches/speed_tests.rs
@@ -7,6 +7,14 @@ use criterion::{BatchSize, Throughput};
 use fixed::types::{I1F15, I1F31};
 #[cfg(feature = "experimental")]
 use fixed_macro::fixed;
+#[cfg(feature = "experimental")]
+use num_bigint::BigUint;
+#[cfg(feature = "experimental")]
+use num_rational::Ratio;
+#[cfg(feature = "experimental")]
+use num_traits::ToPrimitive;
+#[cfg(feature = "experimental")]
+use prio::dp::distributions::DiscreteGaussian;
 #[cfg(feature = "prio2")]
 use prio::vdaf::prio2::Prio2;
 use prio::{
@@ -49,6 +57,30 @@ fn prng(c: &mut Criterion) {
     group.finish();
 }
 
+/// Speed test for generating samples from the discrete gaussian distribution using different
+/// standard deviations.
+#[cfg(feature = "experimental")]
+pub fn dp_noise(c: &mut Criterion) {
+    let mut group = c.benchmark_group("dp_noise");
+    let mut rng = StdRng::seed_from_u64(RNG_SEED);
+
+    let test_stds = [
+        Ratio::<BigUint>::from_integer(BigUint::from(u128::MAX)).pow(2),
+        Ratio::<BigUint>::from_integer(BigUint::from(u64::MAX)),
+        Ratio::<BigUint>::from_integer(BigUint::from(u32::MAX)),
+        Ratio::<BigUint>::from_integer(BigUint::from(5u8)),
+        Ratio::<BigUint>::new(BigUint::from(10000u32), BigUint::from(23u32)),
+    ];
+    for std in test_stds {
+        let sampler = DiscreteGaussian::new(std.clone()).unwrap();
+        group.bench_function(
+            BenchmarkId::new("discrete_gaussian", std.to_f64().unwrap_or(f64::INFINITY)),
+            |b| b.iter(|| sampler.sample(&mut rng)),
+        );
+    }
+    group.finish();
+}
+
 /// The asymptotic cost of polynomial multiplication is `O(n log n)` using FFT and `O(n^2)` using
 /// the naive method. This benchmark demonstrates that the latter has better concrete performance
 /// for small polynomials. The result is used to pick the `FFT_THRESHOLD` constant in
@@ -312,7 +344,7 @@ fn prio3(c: &mut Criterion) {
                 BenchmarkId::new("serial", dimension),
                 &dimension,
                 |b, dimension| {
-                    let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _, _>, _, 16> =
+                    let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> =
                         Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap();
                     let mut measurement = vec![fixed!(0: I1F15); *dimension];
                     measurement[0] = fixed!(0.5: I1F15);
@@ -329,7 +361,7 @@ fn prio3(c: &mut Criterion) {
                     BenchmarkId::new("parallel", dimension),
                     &dimension,
                     |b, dimension| {
-                        let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _, _>, _, 16> =
+                        let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> =
                             Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded(
                                 num_shares, *dimension,
                             )
@@ -350,7 +382,7 @@ fn prio3(c: &mut Criterion) {
                 BenchmarkId::new("series", dimension),
                 &dimension,
                 |b, dimension| {
-                    let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _, _>, _, 16> =
+                    let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> =
                         Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap();
                     let mut measurement = vec![fixed!(0: I1F15); *dimension];
                     measurement[0] = fixed!(0.5: I1F15);
@@ -379,7 +411,7 @@ fn prio3(c: &mut Criterion) {
                     BenchmarkId::new("parallel", dimension),
                     &dimension,
                     |b, dimension| {
-                        let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _, _>, _, 16> =
+                        let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> =
                             Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded(
                                 num_shares, *dimension,
                             )
@@ -413,7 +445,7 @@ fn prio3(c: &mut Criterion) {
                 BenchmarkId::new("serial", dimension),
                 &dimension,
                 |b, dimension| {
-                    let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _, _>, _, 16> =
+                    let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> =
                         Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap();
                     let mut measurement = vec![fixed!(0: I1F31); *dimension];
                     measurement[0] = fixed!(0.5: I1F31);
@@ -430,7 +462,7 @@ fn prio3(c: &mut Criterion) {
                     BenchmarkId::new("parallel", dimension),
                     &dimension,
                     |b, dimension| {
-                        let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _, _>, _, 16> =
+                        let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> =
                             Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded(
                                 num_shares, *dimension,
                             )
@@ -451,7 +483,7 @@ fn prio3(c: &mut Criterion) {
                 BenchmarkId::new("series", dimension),
                 &dimension,
                 |b, dimension| {
-                    let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _, _>, _, 16> =
+                    let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> =
                         Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap();
                     let mut measurement = vec![fixed!(0: I1F31); *dimension];
                     measurement[0] = fixed!(0.5: I1F31);
@@ -480,7 +512,7 @@ fn prio3(c: &mut Criterion) {
                     BenchmarkId::new("parallel", dimension),
                     &dimension,
                     |b, dimension| {
-                        let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _, _>, _, 16> =
+                        let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> =
                             Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded(
                                 num_shares, *dimension,
                             )
@@ -737,9 +769,9 @@ fn poplar1_generate_zipf_distributed_batch(
 }
 
 #[cfg(all(feature = "prio2", feature = "experimental"))]
-criterion_group!(benches, poplar1, prio3, prio2, poly_mul, prng, idpf);
+criterion_group!(benches, poplar1, prio3, prio2, poly_mul, prng, idpf, dp_noise);
 #[cfg(all(not(feature = "prio2"), feature = "experimental"))]
-criterion_group!(benches, poplar1, prio3, poly_mul, prng, idpf);
+criterion_group!(benches, poplar1, prio3, poly_mul, prng, idpf, dp_noise);
 #[cfg(all(feature = "prio2", not(feature = "experimental")))]
 criterion_group!(benches, prio3, prio2, prng, poly_mul);
 #[cfg(all(not(feature = "prio2"), not(feature = "experimental")))]

diff --git a/binaries/src/bin/vdaf_message_sizes.rs b/binaries/src/bin/vdaf_message_sizes.rs
@@ -1,5 +1,6 @@
 use fixed::{types::extra::U15, FixedI16};
 use fixed_macro::fixed;
+
 use prio::{
     codec::Encode,
     vdaf::{

diff --git a/src/dp.rs b/src/dp.rs
@@ -1,16 +1,77 @@
 // SPDX-License-Identifier: MPL-2.0
 
 //! Differential privacy (DP) primitives.
-use std::fmt::Debug;
+//!
+//! There are three main traits defined in this module:
+//!
+//!  - `DifferentialPrivacyBudget`: Implementors should be types of DP-budgets,
+//!    i.e., methods to measure the amount of privacy provided by DP-mechanisms.
+//!    Examples: zCDP, ApproximateDP (Epsilon-Delta), PureDP
+//!
+//!  - `DifferentialPrivacyDistribution`: Distribution from which noise is sampled.
+//!    Examples: DiscreteGaussian, DiscreteLaplace
+//!
+//!  - `DifferentialPrivacyStrategy`: This is a combination of choices for budget and distribution.
+//!    Examples: zCDP-DiscreteGaussian, EpsilonDelta-DiscreteGaussian
+//!
+use num_bigint::{BigInt, BigUint, TryFromBigIntError};
+use num_rational::{BigRational, Ratio};
 
-/// Positive rational number to represent DP and noise distribution parameters in protocol messages
-/// and manipulate them without rounding errors.
+/// Errors propagated by methods in this module.
+#[derive(Debug, thiserror::Error)]
+pub enum DpError {
+    /// Tried to use an invalid float as privacy parameter.
+    #[error(
+        "DP error: input value was not a valid privacy parameter. \
+             It should to be a non-negative, finite float."
+    )]
+    InvalidFloat,
+
+    /// Tried to construct a rational number with zero denominator.
+    #[error("DP error: input denominator was zero.")]
+    ZeroDenominator,
+
+    /// Tried to convert BigInt into something incompatible.
+    #[error("DP error: {0}")]
+    BigIntConversion(#[from] TryFromBigIntError<BigInt>),
+}
+
+/// Positive arbitrary precision rational number to represent DP and noise distribution parameters in
+/// protocol messages and manipulate them without rounding errors.
 #[derive(Clone, Debug)]
-pub struct Rational {
-    /// Numerator.
-    pub numerator: u32,
-    /// Denominator.
-    pub denominator: u32,
+pub struct Rational(Ratio<BigUint>);
+
+impl Rational {
+    /// Construct a [`Rational`] number from numerator `n` and denominator `d`. Errors if denominator is zero.
+    pub fn from_unsigned<T>(n: T, d: T) -> Result<Self, DpError>
+    where
+        T: Into<u128>,
+    {
+        // we don't want to expose BigUint in the public api, hence the Into<u128> bound
+        let d = d.into();
+        if d == 0 {
+            Err(DpError::ZeroDenominator)
+        } else {
+            Ok(Rational(Ratio::<BigUint>::new(n.into().into(), d.into())))
+        }
+    }
+}
+
+impl TryFrom<f32> for Rational {
+    type Error = DpError;
+    /// Constructs a `Rational` from a given `f32` value.
+    ///
+    /// The special float values (NaN, positive and negative infinity) result in
+    /// an error. All other values are represented exactly, without rounding errors.
+    fn try_from(value: f32) -> Result<Self, DpError> {
+        match BigRational::from_float(value) {
+            Some(y) => Ok(Rational(Ratio::<BigUint>::new(
+                y.numer().clone().try_into()?,
+                y.denom().clone().try_into()?,
+            ))),
+            None => Err(DpError::InvalidFloat)?,
+        }
+    }
 }
 
 /// Marker trait for differential privacy budgets (regardless of the specific accounting method).
@@ -19,50 +80,46 @@ pub trait DifferentialPrivacyBudget {}
 /// Marker trait for differential privacy scalar noise distributions.
 pub trait DifferentialPrivacyDistribution {}
 
-/// Zero-concentrated differential privacy (zCDP) budget as defined in [[BS16]].
+/// Zero-concentrated differential privacy (ZCDP) budget as defined in [[BS16]].
 ///
 /// [BS16]: https://arxiv.org/pdf/1605.02065.pdf
-pub struct ZeroConcentratedDifferentialPrivacyBudget {
-    /// Parameter `epsilon`, using the notation from [[CKS20]] where `rho = (epsilon**2)/2`
-    /// for a `rho`-zCDP budget.
+pub struct ZCdpBudget {
+    epsilon: Ratio<BigUint>,
+}
+
+impl ZCdpBudget {
+    /// Create a budget for parameter `epsilon`, using the notation from [[CKS20]] where `rho = (epsilon**2)/2`
+    /// for a `rho`-ZCDP budget.
     ///
     /// [CKS20]: https://arxiv.org/pdf/2004.00010.pdf
-    pub epsilon: Rational,
+    pub fn new(epsilon: Rational) -> Self {
+        Self { epsilon: epsilon.0 }
+    }
 }
 
-impl DifferentialPrivacyBudget for ZeroConcentratedDifferentialPrivacyBudget {}
+impl DifferentialPrivacyBudget for ZCdpBudget {}
 
-/// Zero-mean Discrete Gaussian noise distribution.
-///
-/// The distribution is defined over the integers, represented by arbitrary-precision integers.
-#[derive(Clone, Debug)]
-pub struct DiscreteGaussian {
-    /// Standard deviation of the distribution.
-    pub sigma: Rational,
-}
+/// Strategy to make aggregate results differentially private, e.g. by adding noise from a specific
+/// type of distribution instantiated with a given DP budget.
+pub trait DifferentialPrivacyStrategy {
+    /// The type of the DP budget, i.e. the variant of differential privacy that can be obtained
+    /// by using this strategy.
+    type Budget: DifferentialPrivacyBudget;
 
-impl DifferentialPrivacyDistribution for DiscreteGaussian {}
+    /// The distribution type this strategy will use to generate the noise.
+    type Distribution: DifferentialPrivacyDistribution;
 
-/// Strategy to make aggregate shares differentially private, e.g. by adding noise from a specific
-/// type of distribution instantiated with a given DP budget
-pub trait DifferentialPrivacyStrategy {}
+    /// The type the sensitivity used for privacy analysis has.
+    type Sensitivity;
 
-/// A zCDP budget used to create a Discrete Gaussian distribution
-pub struct ZCdpDiscreteGaussian {
-    budget: ZeroConcentratedDifferentialPrivacyBudget,
-}
+    /// Create a strategy from a differential privacy budget. The distribution created with
+    /// `create_distribution` should provide the amount of privacy specified here.
+    fn from_budget(b: Self::Budget) -> Self;
 
-impl DifferentialPrivacyStrategy for ZCdpDiscreteGaussian {}
-
-impl ZCdpDiscreteGaussian {
-    /// Creates a new Discrete Gaussian by following Theorem 4 from [[CKS20]]
-    ///
-    /// [CKS20]: https://arxiv.org/pdf/2004.00010.pdf
-    pub fn create_distribution(&self, sensitivity: Rational) -> DiscreteGaussian {
-        let sigma = Rational {
-            numerator: self.budget.epsilon.denominator * sensitivity.numerator,
-            denominator: self.budget.epsilon.numerator * sensitivity.denominator,
-        };
-        DiscreteGaussian { sigma }
-    }
+    /// Create a new distribution parametrized s.t. adding samples to the result of a function
+    /// with sensitivity `s` will yield differential privacy of the DP variant given in the
+    /// `Budget` type. Can error upon invalid parameters.
+    fn create_distribution(&self, s: Self::Sensitivity) -> Result<Self::Distribution, DpError>;
 }
+
+pub mod distributions;