Skip to content

Commit

Permalink
Add support for DP noise during aggregation for Prio3SumVec, Prio3His…
Browse files Browse the repository at this point in the history
…togram (#1072)

* Add discrete Laplace distribution, pure DP budget

* Implement TypeWithNoise for SumVec

* Implement TypeWithNoise for Histogram

* Extract noise addition loop

* Add reference tests of noise for FLPs

* Add proof of epsilon-DP for new mechanism

* Document sensitivity definitions of DP strategies

* Input validation: error on epsilon=0

* Apply input validation to serde as well
  • Loading branch information
divergentdave authored Jul 19, 2024
1 parent 895378d commit c5ffd83
Show file tree
Hide file tree
Showing 9 changed files with 1,521 additions and 50 deletions.
1,019 changes: 1,019 additions & 0 deletions documentation/Pure DP Mechanism.lyx

Large diffs are not rendered by default.

Binary file added documentation/Pure DP Mechanism.pdf
Binary file not shown.
62 changes: 62 additions & 0 deletions src/dp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ pub enum DpError {
/// Tried to convert BigInt into something incompatible.
#[error("DP error: {0}")]
BigIntConversion(#[from] TryFromBigIntError<BigInt>),

/// Invalid parameter value.
#[error("invalid parameter: {0}")]
InvalidParameter(String),
}

/// Positive arbitrary precision rational number to represent DP and noise distribution parameters in
Expand Down Expand Up @@ -95,13 +99,57 @@ impl ZCdpBudget {
/// for a `rho`-ZCDP budget.
///
/// [CKS20]: https://arxiv.org/pdf/2004.00010.pdf
// TODO(#1095): This should be fallible, and it should return an error if epsilon is zero.
pub fn new(epsilon: Rational) -> Self {
Self { epsilon: epsilon.0 }
}
}

impl DifferentialPrivacyBudget for ZCdpBudget {}

/// Pure differential privacy budget. (&epsilon;-DP or (&epsilon;, 0)-DP)
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Ord, PartialOrd)]
pub struct PureDpBudget {
epsilon: Ratio<BigUint>,
}

impl PureDpBudget {
/// Create a budget for parameter `epsilon`.
pub fn new(epsilon: Rational) -> Result<Self, DpError> {
if epsilon.0.numer() == &BigUint::ZERO {
return Err(DpError::InvalidParameter("epsilon cannot be zero".into()));
}
Ok(Self { epsilon: epsilon.0 })
}
}

impl DifferentialPrivacyBudget for PureDpBudget {}

/// This module encapsulates a deserialization helper struct. It is needed so we can wrap its
/// derived `Deserialize` implementation in a customized `Deserialize` implementation, which makes
/// use of the budget's constructor to enforce input validation invariants.
mod budget_serde {
use num_bigint::BigUint;
use num_rational::Ratio;
use serde::{de, Deserialize};

#[derive(Deserialize)]
pub struct PureDpBudget {
epsilon: Ratio<BigUint>,
}

impl<'de> Deserialize<'de> for super::PureDpBudget {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let helper = PureDpBudget::deserialize(deserializer)?;
super::PureDpBudget::new(super::Rational(helper.epsilon))
.map_err(|_| de::Error::custom("epsilon cannot be zero"))
}
}
}

/// Strategy to make aggregate results differentially private, e.g. by adding noise from a specific
/// type of distribution instantiated with a given DP budget.
pub trait DifferentialPrivacyStrategy {
Expand All @@ -126,3 +174,17 @@ pub trait DifferentialPrivacyStrategy {
}

pub mod distributions;

#[cfg(test)]
mod tests {
use serde_json::json;

use super::PureDpBudget;

#[test]
fn budget_deserialization() {
serde_json::from_value::<PureDpBudget>(json!({"epsilon": [[1], [1]]})).unwrap();
serde_json::from_value::<PureDpBudget>(json!({"epsilon": [[0], [1]]})).unwrap_err();
serde_json::from_value::<PureDpBudget>(json!({"epsilon": [[1], [0]]})).unwrap_err();
}
}
87 changes: 86 additions & 1 deletion src/dp/distributions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ use serde::{Deserialize, Serialize};

use super::{
DifferentialPrivacyBudget, DifferentialPrivacyDistribution, DifferentialPrivacyStrategy,
DpError, ZCdpBudget,
DpError, PureDpBudget, ZCdpBudget,
};

/// Sample from the Bernoulli(gamma) distribution, where $gamma /leq 1$.
Expand Down Expand Up @@ -262,6 +262,8 @@ where
}

/// A DP strategy using the discrete gaussian distribution providing zero-concentrated DP.
///
/// This uses L2-sensitivity, with the substitution definition of neighboring datasets.
pub type ZCdpDiscreteGaussian = DiscreteGaussianDpStrategy<ZCdpBudget>;

impl DifferentialPrivacyStrategy for DiscreteGaussianDpStrategy<ZCdpBudget> {
Expand All @@ -287,6 +289,89 @@ impl DifferentialPrivacyStrategy for DiscreteGaussianDpStrategy<ZCdpBudget> {
}
}

/// Samples `BigInt` numbers according to the discrete Laplace distribution, with the given scale
/// parameter. The distribution is defined over the integers, represented by arbitrary-precision
/// integers. The sampling procedure follows [[CKS20]].
///
/// [CKS20]: https://arxiv.org/pdf/2004.00010.pdf
pub struct DiscreteLaplace {
/// The scale parameter of the distribution.
scale: Ratio<BigUint>,
}

impl DiscreteLaplace {
/// Create a new sampler for the discrete Laplace distribution with the given scale parameter.
/// Returns an error if the scale parameter is zero or if it has a denominator of zero.
pub fn new(scale: Ratio<BigUint>) -> Result<Self, DpError> {
if scale.denom().is_zero() {
return Err(DpError::ZeroDenominator);
}
if scale.numer().is_zero() {
return Err(DpError::InvalidParameter(
"the scale of the discrete Laplace distribution must be nonzero".into(),
));
}
Ok(Self { scale })
}
}

impl Distribution<BigInt> for DiscreteLaplace {
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> BigInt {
sample_discrete_laplace(&self.scale, rng)
}
}

impl DifferentialPrivacyDistribution for DiscreteLaplace {}

/// A DP strategy using the discrete Laplace distribution.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Ord, PartialOrd)]
pub struct DiscreteLaplaceDpStrategy<B>
where
B: DifferentialPrivacyBudget,
{
budget: B,
}

/// A DP strategy using the discrete Laplace distribution, providing pure DP.
///
/// This uses L1-sensitivity, with the substitution definition of neighboring datasets.
pub type PureDpDiscreteLaplace = DiscreteLaplaceDpStrategy<PureDpBudget>;

impl DifferentialPrivacyStrategy for PureDpDiscreteLaplace {
type Budget = PureDpBudget;
type Distribution = DiscreteLaplace;
type Sensitivity = Ratio<BigUint>;

fn from_budget(budget: Self::Budget) -> Self {
DiscreteLaplaceDpStrategy { budget }
}

/// Create a new sampler for the discrete Laplace distribution with a scale parameter calibrated
/// to provide `epsilon`-differential privacy when added to the result of an integer-valued
/// function with L1-sensitivity `sensitivity`.
///
/// A mechanism is defined for 1-dimensional query results in [[GRS12]], and restated in Lemma
/// 29 from [[CKS20]]. However, most VDAF instances will produce query results of higher
/// dimensions. Proposition 1 of [[DMNS06]] gives a mechanism for multidimensional queries using
/// the continuous Laplace distribution. In both cases, the scale parameter of the respective
/// distribution is set to the sensitivity divided by epsilon, and independent samples from the
/// distribution are added to each component of the query result. Intuitively, adding discrete
/// Laplace noise using this scale parameter to each vector element of the query result should
/// provide epsilon-DP, since continuous Laplce noise can be used in the multi-dimensional case,
/// and discrete and continuous Laplace noise provide the same pure DP with the same parameters
/// in the one-dimensional case.
///
/// [GRS12]: https://theory.stanford.edu/~tim/papers/priv.pdf
/// [CKS20]: https://arxiv.org/pdf/2004.00010.pdf
/// [DMNS06]: https://people.csail.mit.edu/asmith/PS/sensitivity-tcc-final.pdf
fn create_distribution(
&self,
sensitivity: Self::Sensitivity,
) -> Result<Self::Distribution, DpError> {
DiscreteLaplace::new(sensitivity / &self.budget.epsilon)
}
}

#[cfg(test)]
mod tests {

Expand Down
1 change: 1 addition & 0 deletions src/flp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ where
S: DifferentialPrivacyStrategy,
{
/// Add noise to the aggregate share to obtain differential privacy.
// TODO(#1073): Rename to add_noise_to_agg_share.
fn add_noise_to_result(
&self,
dp_strategy: &S,
Expand Down
3 changes: 3 additions & 0 deletions src/flp/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ use std::fmt::{self, Debug};
use std::marker::PhantomData;
use subtle::Choice;

#[cfg(feature = "experimental")]
mod dp;

/// The counter data type. Each measurement is `0` or `1` and the aggregate result is the sum of the
/// measurements (i.e., the total number of `1s`).
#[derive(Clone, PartialEq, Eq)]
Expand Down
Loading

0 comments on commit c5ffd83

Please sign in to comment.