Skip to content

Commit

Permalink
feat: karatsuba fp6 mul (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
0xWOLAND authored Aug 21, 2024
1 parent 8b816fe commit 652ebc1
Showing 1 changed file with 106 additions and 79 deletions.
185 changes: 106 additions & 79 deletions src/fp6.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,96 +265,123 @@ impl Fp6 {
}

/// Returns `c = self * b`.
///
/// Implements the full-tower interleaving strategy from
/// [ePrint 2022-376](https://eprint.iacr.org/2022/367).
#[inline]
fn mul_interleaved(&self, b: &Self) -> Self {
// The intuition for this algorithm is that we can look at F_p^6 as a direct
// extension of F_p^2, and express the overall operations down to the base field
// F_p instead of only over F_p^2. This enables us to interleave multiplications
// and reductions, ensuring that we don't require double-width intermediate
// representations (with around twice as many limbs as F_p elements).

// We want to express the multiplication c = a x b, where a = (a_0, a_1, a_2) is
// an element of F_p^6, and a_i = (a_i,0, a_i,1) is an element of F_p^2. The fully
// expanded multiplication is given by (2022-376 §5):
//
// c_0,0 = a_0,0 b_0,0 - a_0,1 b_0,1 + a_1,0 b_2,0 - a_1,1 b_2,1 + a_2,0 b_1,0 - a_2,1 b_1,1
// - a_1,0 b_2,1 - a_1,1 b_2,0 - a_2,0 b_1,1 - a_2,1 b_1,0.
// = a_0,0 b_0,0 - a_0,1 b_0,1 + a_1,0 (b_2,0 - b_2,1) - a_1,1 (b_2,0 + b_2,1)
// + a_2,0 (b_1,0 - b_1,1) - a_2,1 (b_1,0 + b_1,1).
//
// c_0,1 = a_0,0 b_0,1 + a_0,1 b_0,0 + a_1,0 b_2,1 + a_1,1 b_2,0 + a_2,0 b_1,1 + a_2,1 b_1,0
// + a_1,0 b_2,0 - a_1,1 b_2,1 + a_2,0 b_1,0 - a_2,1 b_1,1.
// = a_0,0 b_0,1 + a_0,1 b_0,0 + a_1,0(b_2,0 + b_2,1) + a_1,1(b_2,0 - b_2,1)
// + a_2,0(b_1,0 + b_1,1) + a_2,1(b_1,0 - b_1,1).
//
// c_1,0 = a_0,0 b_1,0 - a_0,1 b_1,1 + a_1,0 b_0,0 - a_1,1 b_0,1 + a_2,0 b_2,0 - a_2,1 b_2,1
// - a_2,0 b_2,1 - a_2,1 b_2,0.
// = a_0,0 b_1,0 - a_0,1 b_1,1 + a_1,0 b_0,0 - a_1,1 b_0,1 + a_2,0(b_2,0 - b_2,1)
// - a_2,1(b_2,0 + b_2,1).
//
// c_1,1 = a_0,0 b_1,1 + a_0,1 b_1,0 + a_1,0 b_0,1 + a_1,1 b_0,0 + a_2,0 b_2,1 + a_2,1 b_2,0
// + a_2,0 b_2,0 - a_2,1 b_2,1
// = a_0,0 b_1,1 + a_0,1 b_1,0 + a_1,0 b_0,1 + a_1,1 b_0,0 + a_2,0(b_2,0 + b_2,1)
// + a_2,1(b_2,0 - b_2,1).
//
// c_2,0 = a_0,0 b_2,0 - a_0,1 b_2,1 + a_1,0 b_1,0 - a_1,1 b_1,1 + a_2,0 b_0,0 - a_2,1 b_0,1.
// c_2,1 = a_0,0 b_2,1 + a_0,1 b_2,0 + a_1,0 b_1,1 + a_1,1 b_1,0 + a_2,0 b_0,1 + a_2,1 b_0,0.
//
// Each of these is a "sum of products", which we can compute efficiently.

let a = self;
cfg_if::cfg_if! {
if #[cfg(target_os = "zkvm")] {
let mut b10_p_b11 = b.c1.c0;
b10_p_b11.add_inp(&b.c1.c1);
let mut b10_m_b11 = b.c1.c0;
b10_m_b11.sub_inp(&b.c1.c1);
let mut b20_p_b21 = b.c2.c0;
b20_p_b21.add_inp(&b.c2.c1);
let mut b20_m_b21 = b.c2.c0;
b20_m_b21.sub_inp(&b.c2.c1)
// Implements Algorithm 13 from https://eprint.iacr.org/2010/354.pdf
let mut t0 = self.c0;
t0.mul_inp(&b.c0);
let mut t1 = self.c1;
t1.mul_inp(&b.c1);
let mut t2 = self.c2;
t2.mul_inp(&b.c2);
let mut c0 = self.c1;
c0.add_inp(&self.c2);
let mut tmp = b.c1;
tmp.add_inp(&b.c2);
c0.mul_inp(&tmp);
tmp = t2;
tmp.add_inp(&t1);
c0.sub_inp(&tmp);
c0 = c0.mul_by_nonresidue();
c0.add_inp(&t0);
let mut c1 = self.c0;
c1.add_inp(&self.c1);
tmp = b.c0;
tmp.add_inp(&b.c1);
c1.mul_inp(&tmp);
tmp = t0;
tmp.add_inp(&t1);
c1.sub_inp(&tmp);
tmp = t2.mul_by_nonresidue();
c1.add_inp(&tmp);
tmp = self.c0;
tmp.add_inp(&self.c2);
let mut c2 = b.c0;
c2.add_inp(&b.c2);
c2.mul_inp(&tmp);
tmp = t0;
tmp.add_inp(&t2);
c2.sub_inp(&tmp);
c2.add_inp(&t1);
Fp6 { c0, c1, c2 }
} else {
// Implements the full-tower interleaving strategy from
// [ePrint 2022-376](https://eprint.iacr.org/2022/367).
// The intuition for this algorithm is that we can look at F_p^6 as a direct
// extension of F_p^2, and express the overall operations down to the base field
// F_p instead of only over F_p^2. This enables us to interleave multiplications
// and reductions, ensuring that we don't require double-width intermediate
// representations (with around twice as many limbs as F_p elements).

// We want to express the multiplication c = a x b, where a = (a_0, a_1, a_2) is
// an element of F_p^6, and a_i = (a_i,0, a_i,1) is an element of F_p^2. The fully
// expanded multiplication is given by (2022-376 §5):
//
// c_0,0 = a_0,0 b_0,0 - a_0,1 b_0,1 + a_1,0 b_2,0 - a_1,1 b_2,1 + a_2,0 b_1,0 - a_2,1 b_1,1
// - a_1,0 b_2,1 - a_1,1 b_2,0 - a_2,0 b_1,1 - a_2,1 b_1,0.
// = a_0,0 b_0,0 - a_0,1 b_0,1 + a_1,0 (b_2,0 - b_2,1) - a_1,1 (b_2,0 + b_2,1)
// + a_2,0 (b_1,0 - b_1,1) - a_2,1 (b_1,0 + b_1,1).
//
// c_0,1 = a_0,0 b_0,1 + a_0,1 b_0,0 + a_1,0 b_2,1 + a_1,1 b_2,0 + a_2,0 b_1,1 + a_2,1 b_1,0
// + a_1,0 b_2,0 - a_1,1 b_2,1 + a_2,0 b_1,0 - a_2,1 b_1,1.
// = a_0,0 b_0,1 + a_0,1 b_0,0 + a_1,0(b_2,0 + b_2,1) + a_1,1(b_2,0 - b_2,1)
// + a_2,0(b_1,0 + b_1,1) + a_2,1(b_1,0 - b_1,1).
//
// c_1,0 = a_0,0 b_1,0 - a_0,1 b_1,1 + a_1,0 b_0,0 - a_1,1 b_0,1 + a_2,0 b_2,0 - a_2,1 b_2,1
// - a_2,0 b_2,1 - a_2,1 b_2,0.
// = a_0,0 b_1,0 - a_0,1 b_1,1 + a_1,0 b_0,0 - a_1,1 b_0,1 + a_2,0(b_2,0 - b_2,1)
// - a_2,1(b_2,0 + b_2,1).
//
// c_1,1 = a_0,0 b_1,1 + a_0,1 b_1,0 + a_1,0 b_0,1 + a_1,1 b_0,0 + a_2,0 b_2,1 + a_2,1 b_2,0
// + a_2,0 b_2,0 - a_2,1 b_2,1
// = a_0,0 b_1,1 + a_0,1 b_1,0 + a_1,0 b_0,1 + a_1,1 b_0,0 + a_2,0(b_2,0 + b_2,1)
// + a_2,1(b_2,0 - b_2,1).
//
// c_2,0 = a_0,0 b_2,0 - a_0,1 b_2,1 + a_1,0 b_1,0 - a_1,1 b_1,1 + a_2,0 b_0,0 - a_2,1 b_0,1.
// c_2,1 = a_0,0 b_2,1 + a_0,1 b_2,0 + a_1,0 b_1,1 + a_1,1 b_1,0 + a_2,0 b_0,1 + a_2,1 b_0,0.
//
// Each of these is a "sum of products", which we can compute efficiently.
let b10_p_b11 = b.c1.c0 + b.c1.c1;
let b10_m_b11 = b.c1.c0 - b.c1.c1;
let b20_p_b21 = b.c2.c0 + b.c2.c1;
let b20_m_b21 = b.c2.c0 - b.c2.c1;
}
}

Fp6 {
c0: Fp2 {
c0: Fp::sum_of_products(
[a.c0.c0, -a.c0.c1, a.c1.c0, -a.c1.c1, a.c2.c0, -a.c2.c1],
[b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21, b10_m_b11, b10_p_b11],
),
c1: Fp::sum_of_products(
[a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1],
[b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21, b10_p_b11, b10_m_b11],
),
},
c1: Fp2 {
c0: Fp::sum_of_products(
[a.c0.c0, -a.c0.c1, a.c1.c0, -a.c1.c1, a.c2.c0, -a.c2.c1],
[b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21],
),
c1: Fp::sum_of_products(
[a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1],
[b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21],
),
},
c2: Fp2 {
c0: Fp::sum_of_products(
[a.c0.c0, -a.c0.c1, a.c1.c0, -a.c1.c1, a.c2.c0, -a.c2.c1],
[b.c2.c0, b.c2.c1, b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1],
),
c1: Fp::sum_of_products(
[a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1],
[b.c2.c1, b.c2.c0, b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0],
),
},
Fp6 {
c0: Fp2 {
c0: Fp::sum_of_products(
[a.c0.c0, -a.c0.c1, a.c1.c0, -a.c1.c1, a.c2.c0, -a.c2.c1],
[b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21, b10_m_b11, b10_p_b11],
),
c1: Fp::sum_of_products(
[a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1],
[b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21, b10_p_b11, b10_m_b11],
),
},
c1: Fp2 {
c0: Fp::sum_of_products(
[a.c0.c0, -a.c0.c1, a.c1.c0, -a.c1.c1, a.c2.c0, -a.c2.c1],
[b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21],
),
c1: Fp::sum_of_products(
[a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1],
[b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21],
),
},
c2: Fp2 {
c0: Fp::sum_of_products(
[a.c0.c0, -a.c0.c1, a.c1.c0, -a.c1.c1, a.c2.c0, -a.c2.c1],
[b.c2.c0, b.c2.c1, b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1],
),
c1: Fp::sum_of_products(
[a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1],
[b.c2.c1, b.c2.c0, b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0],
),
},
}
}
}
}

Expand Down

0 comments on commit 652ebc1

Please sign in to comment.