Add support for DP noise during aggregation for Prio3SumVec, Prio3His…

…togram (#1072) * Add discrete Laplace distribution, pure DP budget * Implement TypeWithNoise for SumVec * Implement TypeWithNoise for Histogram * Extract noise addition loop * Add reference tests of noise for FLPs * Add proof of epsilon-DP for new mechanism * Document sensitivity definitions of DP strategies * Input validation: error on epsilon=0 * Apply input validation to serde as well
divviup · Jul 19, 2024 · c5ffd83 · c5ffd83
1 parent 895378d
commit c5ffd83
Show file tree

Hide file tree

Showing 9 changed files with 1,521 additions and 50 deletions.
diff --git a/documentation/Pure DP Mechanism.lyx b/documentation/Pure DP Mechanism.lyx
diff --git a/documentation/Pure DP Mechanism.pdf b/documentation/Pure DP Mechanism.pdf
diff --git a/src/dp.rs b/src/dp.rs
@@ -36,6 +36,10 @@ pub enum DpError {
     /// Tried to convert BigInt into something incompatible.
     #[error("DP error: {0}")]
     BigIntConversion(#[from] TryFromBigIntError<BigInt>),
+
+    /// Invalid parameter value.
+    #[error("invalid parameter: {0}")]
+    InvalidParameter(String),
 }
 
 /// Positive arbitrary precision rational number to represent DP and noise distribution parameters in
@@ -95,13 +99,57 @@ impl ZCdpBudget {
     /// for a `rho`-ZCDP budget.
     ///
     /// [CKS20]: https://arxiv.org/pdf/2004.00010.pdf
+    // TODO(#1095): This should be fallible, and it should return an error if epsilon is zero.
     pub fn new(epsilon: Rational) -> Self {
         Self { epsilon: epsilon.0 }
     }
 }
 
 impl DifferentialPrivacyBudget for ZCdpBudget {}
 
+/// Pure differential privacy budget. (&epsilon;-DP or (&epsilon;, 0)-DP)
+#[derive(Clone, Debug, Eq, PartialEq, Serialize, Ord, PartialOrd)]
+pub struct PureDpBudget {
+    epsilon: Ratio<BigUint>,
+}
+
+impl PureDpBudget {
+    /// Create a budget for parameter `epsilon`.
+    pub fn new(epsilon: Rational) -> Result<Self, DpError> {
+        if epsilon.0.numer() == &BigUint::ZERO {
+            return Err(DpError::InvalidParameter("epsilon cannot be zero".into()));
+        }
+        Ok(Self { epsilon: epsilon.0 })
+    }
+}
+
+impl DifferentialPrivacyBudget for PureDpBudget {}
+
+/// This module encapsulates a deserialization helper struct. It is needed so we can wrap its
+/// derived `Deserialize` implementation in a customized `Deserialize` implementation, which makes
+/// use of the budget's constructor to enforce input validation invariants.
+mod budget_serde {
+    use num_bigint::BigUint;
+    use num_rational::Ratio;
+    use serde::{de, Deserialize};
+
+    #[derive(Deserialize)]
+    pub struct PureDpBudget {
+        epsilon: Ratio<BigUint>,
+    }
+
+    impl<'de> Deserialize<'de> for super::PureDpBudget {
+        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+        where
+            D: serde::Deserializer<'de>,
+        {
+            let helper = PureDpBudget::deserialize(deserializer)?;
+            super::PureDpBudget::new(super::Rational(helper.epsilon))
+                .map_err(|_| de::Error::custom("epsilon cannot be zero"))
+        }
+    }
+}
+
 /// Strategy to make aggregate results differentially private, e.g. by adding noise from a specific
 /// type of distribution instantiated with a given DP budget.
 pub trait DifferentialPrivacyStrategy {
@@ -126,3 +174,17 @@ pub trait DifferentialPrivacyStrategy {
 }
 
 pub mod distributions;
+
+#[cfg(test)]
+mod tests {
+    use serde_json::json;
+
+    use super::PureDpBudget;
+
+    #[test]
+    fn budget_deserialization() {
+        serde_json::from_value::<PureDpBudget>(json!({"epsilon": [[1], [1]]})).unwrap();
+        serde_json::from_value::<PureDpBudget>(json!({"epsilon": [[0], [1]]})).unwrap_err();
+        serde_json::from_value::<PureDpBudget>(json!({"epsilon": [[1], [0]]})).unwrap_err();
+    }
+}
diff --git a/src/dp/distributions.rs b/src/dp/distributions.rs
@@ -60,7 +60,7 @@ use serde::{Deserialize, Serialize};
 
 use super::{
     DifferentialPrivacyBudget, DifferentialPrivacyDistribution, DifferentialPrivacyStrategy,
-    DpError, ZCdpBudget,
+    DpError, PureDpBudget, ZCdpBudget,
 };
 
 /// Sample from the Bernoulli(gamma) distribution, where $gamma /leq 1$.
@@ -262,6 +262,8 @@ where
 }
 
 /// A DP strategy using the discrete gaussian distribution providing zero-concentrated DP.
+///
+/// This uses L2-sensitivity, with the substitution definition of neighboring datasets.
 pub type ZCdpDiscreteGaussian = DiscreteGaussianDpStrategy<ZCdpBudget>;
 
 impl DifferentialPrivacyStrategy for DiscreteGaussianDpStrategy<ZCdpBudget> {
@@ -287,6 +289,89 @@ impl DifferentialPrivacyStrategy for DiscreteGaussianDpStrategy<ZCdpBudget> {
     }
 }
 
+/// Samples `BigInt` numbers according to the discrete Laplace distribution, with the given scale
+/// parameter. The distribution is defined over the integers, represented by arbitrary-precision
+/// integers. The sampling procedure follows [[CKS20]].
+///
+/// [CKS20]: https://arxiv.org/pdf/2004.00010.pdf
+pub struct DiscreteLaplace {
+    /// The scale parameter of the distribution.
+    scale: Ratio<BigUint>,
+}
+
+impl DiscreteLaplace {
+    /// Create a new sampler for the discrete Laplace distribution with the given scale parameter.
+    /// Returns an error if the scale parameter is zero or if it has a denominator of zero.
+    pub fn new(scale: Ratio<BigUint>) -> Result<Self, DpError> {
+        if scale.denom().is_zero() {
+            return Err(DpError::ZeroDenominator);
+        }
+        if scale.numer().is_zero() {
+            return Err(DpError::InvalidParameter(
+                "the scale of the discrete Laplace distribution must be nonzero".into(),
+            ));
+        }
+        Ok(Self { scale })
+    }
+}
+
+impl Distribution<BigInt> for DiscreteLaplace {
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> BigInt {
+        sample_discrete_laplace(&self.scale, rng)
+    }
+}
+
+impl DifferentialPrivacyDistribution for DiscreteLaplace {}
+
+/// A DP strategy using the discrete Laplace distribution.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Ord, PartialOrd)]
+pub struct DiscreteLaplaceDpStrategy<B>
+where
+    B: DifferentialPrivacyBudget,
+{
+    budget: B,
+}
+
+/// A DP strategy using the discrete Laplace distribution, providing pure DP.
+///
+/// This uses L1-sensitivity, with the substitution definition of neighboring datasets.
+pub type PureDpDiscreteLaplace = DiscreteLaplaceDpStrategy<PureDpBudget>;
+
+impl DifferentialPrivacyStrategy for PureDpDiscreteLaplace {
+    type Budget = PureDpBudget;
+    type Distribution = DiscreteLaplace;
+    type Sensitivity = Ratio<BigUint>;
+
+    fn from_budget(budget: Self::Budget) -> Self {
+        DiscreteLaplaceDpStrategy { budget }
+    }
+
+    /// Create a new sampler for the discrete Laplace distribution with a scale parameter calibrated
+    /// to provide `epsilon`-differential privacy when added to the result of an integer-valued
+    /// function with L1-sensitivity `sensitivity`.
+    ///
+    /// A mechanism is defined for 1-dimensional query results in [[GRS12]], and restated in Lemma
+    /// 29 from [[CKS20]]. However, most VDAF instances will produce query results of higher
+    /// dimensions. Proposition 1 of [[DMNS06]] gives a mechanism for multidimensional queries using
+    /// the continuous Laplace distribution. In both cases, the scale parameter of the respective
+    /// distribution is set to the sensitivity divided by epsilon, and independent samples from the
+    /// distribution are added to each component of the query result. Intuitively, adding discrete
+    /// Laplace noise using this scale parameter to each vector element of the query result should
+    /// provide epsilon-DP, since continuous Laplce noise can be used in the multi-dimensional case,
+    /// and discrete and continuous Laplace noise provide the same pure DP with the same parameters
+    /// in the one-dimensional case.
+    ///
+    /// [GRS12]: https://theory.stanford.edu/~tim/papers/priv.pdf
+    /// [CKS20]: https://arxiv.org/pdf/2004.00010.pdf
+    /// [DMNS06]: https://people.csail.mit.edu/asmith/PS/sensitivity-tcc-final.pdf
+    fn create_distribution(
+        &self,
+        sensitivity: Self::Sensitivity,
+    ) -> Result<Self::Distribution, DpError> {
+        DiscreteLaplace::new(sensitivity / &self.budget.epsilon)
+    }
+}
+
 #[cfg(test)]
 mod tests {
 

diff --git a/src/flp.rs b/src/flp.rs
@@ -553,6 +553,7 @@ where
     S: DifferentialPrivacyStrategy,
 {
     /// Add noise to the aggregate share to obtain differential privacy.
+    // TODO(#1073): Rename to add_noise_to_agg_share.
     fn add_noise_to_result(
         &self,
         dp_strategy: &S,

diff --git a/src/flp/types.rs b/src/flp/types.rs
@@ -11,6 +11,9 @@ use std::fmt::{self, Debug};
 use std::marker::PhantomData;
 use subtle::Choice;
 
+#[cfg(feature = "experimental")]
+mod dp;
+
 /// The counter data type. Each measurement is `0` or `1` and the aggregate result is the sum of the
 /// measurements (i.e., the total number of `1s`).
 #[derive(Clone, PartialEq, Eq)]