// FIXME(#75598): Direct use of these intrinsics improves codegen significantly at opt-level <=
// 1, where the method versions of these operations are not inlined.
use intrinsics::{
- cttz_nonzero, exact_div, unchecked_rem, unchecked_shl, unchecked_shr, unchecked_sub,
- wrapping_add, wrapping_mul, wrapping_sub,
+ cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_shl, unchecked_shr,
+ unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub,
};
/// Calculate multiplicative modular inverse of `x` modulo `m`.
const INV_TABLE_MOD_16: [u8; 8] = [1, 11, 13, 7, 9, 3, 5, 15];
/// Modulo for which the `INV_TABLE_MOD_16` is intended.
const INV_TABLE_MOD: usize = 16;
- /// INV_TABLE_MOD²
- const INV_TABLE_MOD_SQUARED: usize = INV_TABLE_MOD * INV_TABLE_MOD;
- let table_inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1] as usize;
// SAFETY: `m` is required to be a power-of-two, hence non-zero.
let m_minus_one = unsafe { unchecked_sub(m, 1) };
- if m <= INV_TABLE_MOD {
- table_inverse & m_minus_one
- } else {
- // We iterate "up" using the following formula:
- //
- // $$ xy ≡ 1 (mod 2ⁿ) → xy (2 - xy) ≡ 1 (mod 2²ⁿ) $$
+ let mut inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1] as usize;
+ let mut mod_gate = INV_TABLE_MOD;
+ // We iterate "up" using the following formula:
+ //
+ // $$ xy ≡ 1 (mod 2ⁿ) → xy (2 - xy) ≡ 1 (mod 2²ⁿ) $$
+ //
+ // This application needs to be applied at least until `2²ⁿ ≥ m`, at which point we can
+ // finally reduce the computation to our desired `m` by taking `inverse mod m`.
+ //
+ // This computation is `O(log log m)`, which is to say, that on 64-bit machines this loop
+ // will always finish in at most 4 iterations.
+ loop {
+ // y = y * (2 - xy) mod n
//
- // until 2²ⁿ ≥ m. Then we can reduce to our desired `m` by taking the result `mod m`.
- let mut inverse = table_inverse;
- let mut going_mod = INV_TABLE_MOD_SQUARED;
- loop {
- // y = y * (2 - xy) mod n
- //
- // Note, that we use wrapping operations here intentionally – the original formula
- // uses e.g., subtraction `mod n`. It is entirely fine to do them `mod
- // usize::MAX` instead, because we take the result `mod n` at the end
- // anyway.
- inverse = wrapping_mul(inverse, wrapping_sub(2usize, wrapping_mul(x, inverse)));
- if going_mod >= m {
- return inverse & m_minus_one;
- }
- going_mod = wrapping_mul(going_mod, going_mod);
+ // Note, that we use wrapping operations here intentionally – the original formula
+ // uses e.g., subtraction `mod n`. It is entirely fine to do them `mod
+ // usize::MAX` instead, because we take the result `mod n` at the end
+ // anyway.
+ if mod_gate >= m {
+ break;
+ }
+ inverse = wrapping_mul(inverse, wrapping_sub(2usize, wrapping_mul(x, inverse)));
+ let (new_gate, overflow) = mul_with_overflow(mod_gate, mod_gate);
+ if overflow {
+ break;
}
+ mod_gate = new_gate;
}
+ inverse & m_minus_one
}
let addr = p.addr();