17template <
class Fq,
class Fr,
class T>
24template <
class Fq,
class Fr,
class T>
31template <
class Fq,
class Fr,
class T>
38template <
class Fq,
class Fr,
class T>
45template <
class Fq,
class Fr,
class T>
57template <
class Fq,
class Fr,
class T>
68 if (is_point_at_infinity()) {
76 Fq zz_inv = z_inv.
sqr();
77 Fq zzz_inv = zz_inv * z_inv;
78 affine_element<Fq, Fr, T> result(x * zz_inv, y * zzz_inv);
84 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
85 if (is_point_at_infinity()) {
89 if (x.is_msb_set_word()) {
121 if constexpr (T::has_a) {
122 T3 += (T::a * z.sqr().sqr());
158template <
class Fq,
class Fr,
class T>
160 const uint64_t predicate)
noexcept
162 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
163 if (is_point_at_infinity()) {
169 const bool edge_case_trigger = x.is_msb_set() || other.x.is_msb_set();
170 if (edge_case_trigger) {
171 if (x.is_msb_set()) {
183 Fq T1 = other.x * T0;
190 T2.self_conditional_negate(predicate);
193 if (__builtin_expect(T1.is_zero(), 0)) {
251template <
class Fq,
class Fr,
class T>
254 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
255 if (is_point_at_infinity()) {
256 *
this = { other.
x, other.y,
Fq::one() };
260 const bool edge_case_trigger = x.
is_msb_set() || other.x.is_msb_set();
261 if (edge_case_trigger) {
262 if (x.is_msb_set()) {
263 *
this = { other.x, other.y,
Fq::one() };
273 Fq T1 = other.x * T0;
282 if (__builtin_expect(T1.
is_zero(), 0)) {
340template <
class Fq,
class Fr,
class T>
344 return (result += other);
347template <
class Fq,
class Fr,
class T>
350 const affine_element<Fq, Fr, T> to_add{ other.
x, -other.y };
351 return operator+=(to_add);
354template <
class Fq,
class Fr,
class T>
358 return (result -= other);
361template <
class Fq,
class Fr,
class T>
364 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
365 bool p1_zero = is_point_at_infinity();
366 bool p2_zero = other.is_point_at_infinity();
367 if (__builtin_expect((p1_zero || p2_zero), 0)) {
368 if (p1_zero && !p2_zero) {
372 if (p2_zero && !p1_zero) {
379 bool p1_zero = x.is_msb_set();
380 bool p2_zero = other.x.is_msb_set();
381 if (__builtin_expect((p1_zero || p2_zero), 0)) {
382 if (p1_zero && !p2_zero) {
386 if (p2_zero && !p1_zero) {
394 Fq Z2Z2(other.z.sqr());
396 Fq U2(Z1Z1 * other.x);
399 Fq S1(Z2Z2 * other.z);
406 if (__builtin_expect(H.is_zero(), 0)) {
450template <
class Fq,
class Fr,
class T>
454 return (result += other);
457template <
class Fq,
class Fr,
class T>
460 const element to_add{ other.
x, -other.y, other.z };
461 return operator+=(to_add);
464template <
class Fq,
class Fr,
class T>
468 return (result -= other);
476template <
class Fq,
class Fr,
class T>
479 if constexpr (T::USE_ENDOMORPHISM) {
480 return mul_with_endomorphism(exponent);
482 return mul_without_endomorphism(exponent);
513 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
529 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
534 return (x.is_msb_set());
540 if (is_point_at_infinity()) {
549 Fq bz_6 = zzzz * zz * T::b;
550 if constexpr (T::has_a) {
551 bz_6 += (x * T::a) * zzzz;
553 Fq xxx = x.
sqr() * x + bz_6;
558template <
class Fq,
class Fr,
class T>
562 if ((!on_curve()) || (!other.on_curve())) {
565 bool am_infinity = is_point_at_infinity();
566 bool is_infinity = other.is_point_at_infinity();
567 bool both_infinity = am_infinity && is_infinity;
569 if ((!both_infinity) && (am_infinity || is_infinity)) {
572 const Fq lhs_zz = z.sqr();
573 const Fq lhs_zzz = lhs_zz * z;
574 const Fq rhs_zz = other.z.sqr();
575 const Fq rhs_zzz = rhs_zz * other.z;
577 const Fq lhs_x = x * rhs_zz;
578 const Fq lhs_y = y * rhs_zzz;
580 const Fq rhs_x = other.x * lhs_zz;
581 const Fq rhs_y = other.y * lhs_zzz;
582 return both_infinity || ((lhs_x == rhs_x) && (lhs_y == rhs_y));
585template <
class Fq,
class Fr,
class T>
588 if constexpr (T::can_hash_to_curve) {
591 Fq zz = result.
z.sqr();
592 Fq zzz = zz * result.
z;
602template <
class Fq,
class Fr,
class T>
605 const uint256_t converted_scalar(scalar);
607 if (converted_scalar == 0) {
612 const uint64_t maximum_set_bit = converted_scalar.
get_msb();
615 for (uint64_t i = maximum_set_bit - 1; i < maximum_set_bit; --i) {
617 if (converted_scalar.
get_bit(i)) {
618 accumulator += *
this;
640 std::array<uint64_t, NUM_ROUNDS * 2>
table;
657template <
class Fq,
class Fr,
class T>
661 if (is_point_at_infinity()) {
664 constexpr size_t NUM_ROUNDS = 32;
667 if (converted_scalar.
is_zero()) {
670 static constexpr size_t LOOKUP_SIZE = 8;
674 lookup_table[0] =
element(*
this);
675 for (
size_t i = 1; i < LOOKUP_SIZE; ++i) {
676 lookup_table[i] = lookup_table[i - 1] + d2;
682 accumulator.self_set_infinity();
685 for (
size_t i = 0; i < NUM_ROUNDS * 2; ++i) {
686 uint64_t wnaf_entry = wnaf.table[i];
687 uint64_t
index = wnaf_entry & 0x0fffffffU;
688 bool sign =
static_cast<bool>((wnaf_entry >> 31) & 1);
689 const bool is_odd = ((i & 1) == 1);
690 auto to_add = lookup_table[
static_cast<size_t>(
index)];
691 to_add.y.self_conditional_negate(sign ^ is_odd);
695 accumulator += to_add;
697 if (i != ((2 * NUM_ROUNDS) - 1) && is_odd) {
698 for (
size_t j = 0; j < 4; ++j) {
699 accumulator.self_dbl();
705 accumulator += -lookup_table[0];
707 if (wnaf.endo_skew) {
708 accumulator +=
element{ lookup_table[0].
x * beta, lookup_table[0].y, lookup_table[0].z };
728template <
typename AffineElement,
typename Fq>
729__attribute__((always_inline))
inline void batch_affine_add_impl(
const AffineElement* lhs,
732 Fq* scratch_space)
noexcept
738 scratch_space[i] = lhs[i].x +
rhs[i].x;
739 rhs[i].x -= lhs[i].x;
740 rhs[i].y -= lhs[i].y;
746 throw_or_abort(
"attempted to invert zero in batch_affine_add_impl");
755 rhs[i].x =
rhs[i].y.sqr();
756 rhs[i].x -= scratch_space[i];
759 Fq temp = lhs[i].x -
rhs[i].x;
761 rhs[i].y = temp - lhs[i].y;
776template <
typename AffineElement,
typename Fq>
777__attribute__((always_inline))
inline void batch_affine_add_interleaved(AffineElement* points,
779 Fq* scratch_space)
noexcept
785 scratch_space[i >> 1] = points[i].x + points[i + 1].x;
786 points[i + 1].x -= points[i].x;
787 points[i + 1].y -= points[i].y;
793 throw_or_abort(
"attempted to invert zero in batch_affine_add_interleaved");
802 points[i + 1].x = points[i + 1].y.sqr();
804 points[(i +
num_points) >> 1].x = points[i + 1].x - scratch_space[i >> 1];
807 __builtin_prefetch(points + i - 2);
808 __builtin_prefetch(points + i - 1);
809 __builtin_prefetch(points + ((i +
num_points - 2) >> 1));
810 __builtin_prefetch(scratch_space + ((i - 2) >> 1));
814 points[i].x -= points[(i +
num_points) >> 1].x;
815 points[i].x *= points[i + 1].y;
816 points[(i +
num_points) >> 1].y = points[i].x - points[i].y;
833template <
typename AffineElement,
typename Fq>
834__attribute__((always_inline))
inline void batch_affine_double_impl(AffineElement* points,
836 Fq* scratch_space)
noexcept
842 scratch_space[i] = points[i].x.sqr();
843 scratch_space[i] = scratch_space[i] + scratch_space[i] + scratch_space[i];
849 throw_or_abort(
"attempted to invert zero in batch_affine_double_impl");
855 for (
size_t i_plus_1 =
num_points; i_plus_1 > 0; --i_plus_1) {
856 size_t i = i_plus_1 - 1;
862 points[i].x = scratch_space[i].
sqr() - (points[i].x + points[i].x);
863 points[i].y = scratch_space[i] * (
temp_x - points[i].x) - points[i].y;
878template <
class Fq,
class Fr,
class T>
896 [&](
size_t start,
size_t end,
BB_UNUSED size_t chunk_index) {
897 batch_affine_add_impl<affine_element, Fq>(
898 &second_group[start], &results[start], end - start, &scratch_space[start]);
913template <
class Fq,
class Fr,
class T>
939 if (converted_scalar.
is_zero()) {
947 constexpr size_t LOOKUP_SIZE = 8;
948 constexpr size_t NUM_ROUNDS = 32;
955 for (
auto& table : lookup_table) {
960 auto execute_range = [&](
size_t start,
size_t end) {
963 batch_affine_add_impl<affine_element, Fq>(&lhs[start], &
rhs[start], end - start, &scratch_space[start]);
968 batch_affine_double_impl<affine_element, Fq>(&lhs[start], end - start, &scratch_space[start]);
972 for (
size_t i = start; i < end; ++i) {
973 if (points[i].is_point_at_infinity()) {
977 temp_point_vector[i] = points[i];
978 lookup_table[0][i] = points[i];
982 double_chunked(&temp_point_vector[0]);
983 for (
size_t j = 1; j < LOOKUP_SIZE; ++j) {
984 for (
size_t i = start; i < end; ++i) {
985 lookup_table[j][i] = lookup_table[j - 1][i];
987 add_chunked(&temp_point_vector[0], &lookup_table[j][0]);
991 uint64_t wnaf_entry = 0;
995 for (
size_t j = 0; j < 2; ++j) {
996 wnaf_entry = wnaf.table[j];
997 index = wnaf_entry & 0x0fffffffU;
998 sign =
static_cast<bool>((wnaf_entry >> 31) & 1);
999 const bool is_odd = ((j & 1) == 1);
1000 for (
size_t i = start; i < end; ++i) {
1001 auto to_add = lookup_table[
static_cast<size_t>(
index)][i];
1002 to_add.y.self_conditional_negate(sign ^ is_odd);
1007 work_elements[i] = to_add;
1009 temp_point_vector[i] = to_add;
1013 add_chunked(&temp_point_vector[0], &work_elements[0]);
1015 for (
size_t j = 2; j < NUM_ROUNDS * 2; ++j) {
1016 wnaf_entry = wnaf.table[j];
1017 index = wnaf_entry & 0x0fffffffU;
1018 sign =
static_cast<bool>((wnaf_entry >> 31) & 1);
1019 const bool is_odd = ((j & 1) == 1);
1021 for (
size_t k = 0; k < 4; ++k) {
1022 double_chunked(&work_elements[0]);
1025 for (
size_t i = start; i < end; ++i) {
1026 auto to_add = lookup_table[
static_cast<size_t>(
index)][i];
1027 to_add.y.self_conditional_negate(sign ^ is_odd);
1031 temp_point_vector[i] = to_add;
1033 add_chunked(&temp_point_vector[0], &work_elements[0]);
1037 for (
size_t i = start; i < end; ++i) {
1038 temp_point_vector[i] = -lookup_table[0][i];
1040 add_chunked(&temp_point_vector[0], &work_elements[0]);
1043 if (wnaf.endo_skew) {
1044 for (
size_t i = start; i < end; ++i) {
1045 temp_point_vector[i] = lookup_table[0][i];
1046 temp_point_vector[i].x *= beta;
1048 add_chunked(&temp_point_vector[0], &work_elements[0]);
1051 for (
size_t i = start; i < end; ++i) {
1052 work_elements[i] = points[i].is_point_at_infinity() ? work_elements[i].set_infinity() : work_elements[i];
1057 return work_elements;
1060template <
typename Fq,
typename Fr,
typename T>
1063 const uint64_t predicate)
noexcept
1065 out = { in.x, predicate ? -in.y : in.y };
1068template <
typename Fq,
typename Fr,
typename T>
1072 temporaries.reserve(num_elements * 2);
1077 for (
size_t i = 0; i < num_elements; ++i) {
1078 temporaries.emplace_back(accumulator);
1079 if (!elements[i].is_point_at_infinity()) {
1080 accumulator *= elements[i].z;
1085 accumulator = accumulator.invert();
1109 for (
size_t i = num_elements - 1; i < num_elements; --i) {
1110 if (!elements[i].is_point_at_infinity()) {
1111 Fq z_inv = accumulator * temporaries[i];
1112 Fq zz_inv = z_inv.
sqr();
1113 elements[i].x *= zz_inv;
1114 elements[i].y *= (zz_inv * z_inv);
1115 accumulator *= elements[i].z;
1121template <
typename Fq,
typename Fr,
typename T>
1125 bool found_one =
false;
1129 while (!found_one) {
1131 yy = x.sqr() * x + T::b;
1132 if constexpr (T::has_a) {
1135 auto [found_root, y1] = yy.sqrt();
1137 found_one = found_root;
#define BB_ASSERT_EQ(actual, expected,...)
constexpr void self_set_infinity() noexcept
static constexpr affine_element one() noexcept
element class. Implements ecc group arithmetic using Jacobian coordinates See https://hyperelliptic....
element operator*=(const Fr &exponent) noexcept
BB_INLINE constexpr element set_infinity() const noexcept
element mul_with_endomorphism(const Fr &scalar) const noexcept
static element infinity()
static std::vector< affine_element< Fq, Fr, Params > > batch_mul_with_endomorphism(const std::span< const affine_element< Fq, Fr, Params > > &points, const Fr &scalar) noexcept
Multiply each point by the same scalar.
constexpr element operator-=(const element &other) noexcept
constexpr element operator-() const noexcept
friend constexpr element operator+(const affine_element< Fq, Fr, Params > &left, const element &right) noexcept
constexpr element dbl() const noexcept
constexpr element normalize() const noexcept
constexpr void self_dbl() noexcept
static element random_element(numeric::RNG *engine=nullptr) noexcept
static void batch_normalize(element *elements, size_t num_elements) noexcept
constexpr element operator+=(const element &other) noexcept
static void batch_affine_add(const std::span< affine_element< Fq, Fr, Params > > &first_group, const std::span< affine_element< Fq, Fr, Params > > &second_group, const std::span< affine_element< Fq, Fr, Params > > &results) noexcept
Pairwise affine add points in first and second group.
BB_INLINE constexpr bool on_curve() const noexcept
BB_INLINE constexpr bool operator==(const element &other) const noexcept
element operator*(const Fr &exponent) const noexcept
constexpr void self_mixed_add_or_sub(const affine_element< Fq, Fr, Params > &other, uint64_t predicate) noexcept
element() noexcept=default
static void conditional_negate_affine(const affine_element< Fq, Fr, Params > &in, affine_element< Fq, Fr, Params > &out, uint64_t predicate) noexcept
static element random_coordinates_on_curve(numeric::RNG *engine=nullptr) noexcept
element mul_without_endomorphism(const Fr &scalar) const noexcept
constexpr element & operator=(const element &other) noexcept
BB_INLINE constexpr void self_set_infinity() noexcept
BB_INLINE constexpr bool is_point_at_infinity() const noexcept
constexpr bool get_bit(uint64_t bit_index) const
constexpr uint64_t get_msb() const
std::pair< std::array< uint64_t, 2 >, std::array< uint64_t, 2 > > EndoScalars
__attribute__((always_inline)) inline void batch_affine_add_impl(const AffineElement *lhs
Batch affine addition for parallel arrays: (lhs[i], rhs[i]) → rhs[i].
AffineElement const size_t num_pairs
AffineElement const size_t Fq *scratch_space noexcept
batch_inversion_accumulator
std::conditional_t< IsGoblinBigGroup< C, Fq, Fr, G >, element_goblin::goblin_element< C, goblin_field< C >, Fr, G >, element_default::element< C, Fq, Fr, G > > element
element wraps either element_default::element or element_goblin::goblin_element depending on parametr...
constexpr size_t FF_COPY_COST
constexpr size_t FF_ADDITION_COST
constexpr size_t FF_MULTIPLICATION_COST
void fixed_wnaf(const uint64_t *scalar, uint64_t *wnaf, bool &skew_map, const uint64_t point_index, const uint64_t num_points, const size_t wnaf_bits) noexcept
Performs fixed-window non-adjacent form (WNAF) computation for scalar multiplication.
Univariate< Fr, domain_end > operator*(const Fr &ff, const Univariate< Fr, domain_end > &uv)
void parallel_for_heuristic(size_t num_points, const std::function< void(size_t, size_t, size_t)> &func, size_t heuristic_cost)
Split a loop into several loops running in parallel based on operations in 1 iteration.
void parallel_for_range(size_t num_points, const std::function< void(size_t, size_t)> &func, size_t no_multhreading_if_less_or_equal)
Split a loop into several loops running in parallel.
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
static constexpr field cube_root_of_unity()
static constexpr field one()
static constexpr uint256_t modulus
static void split_into_endomorphism_scalars(const field &k, field &k1, field &k2)
BB_INLINE constexpr void self_sqr() &noexcept
constexpr field invert() const noexcept
BB_INLINE constexpr bool is_msb_set() const noexcept
static field random_element(numeric::RNG *engine=nullptr) noexcept
BB_INLINE constexpr field sqr() const noexcept
BB_INLINE constexpr bool is_zero() const noexcept
BB_INLINE constexpr field from_montgomery_form() const noexcept
static constexpr field zero()
Handles the WNAF computation for scalars that are split using an endomorphism, achieved through split...
EndomorphismWnaf(const EndoScalars &scalars)
std::array< uint64_t, NUM_ROUNDS *2 > table
static constexpr size_t NUM_WNAF_BITS
void throw_or_abort(std::string const &err)
curve::BN254::BaseField Fq