乗算との複合演算 (integer/fused_operations.cpp)

category: integer
View this file on GitHub
- Last commit date: 2020-07-11 14:53:01+09:00

Depends on

Required by

連立合同式の解の構成 (ModularArithmetic/chinese_remaindering.cpp)

Verified with

Code

#ifndef H_fused_operations
#define H_fused_operations

/**
 * @brief 乗算との複合演算
 * @author えびちゃん
 */

#include <climits>
#include <algorithm>
#include <type_traits>

#include "integer/overflow.cpp"
#include "integer/mul_upper.cpp"

template <typename Tp>
Tp fused_mul_add(Tp x, Tp y, Tp z) {
  // Return x * y + z without overflow
  using unsigned_type = typename std::make_unsigned<Tp>::type;
  unsigned_type ux = x, uy = y;
  unsigned_type lo = ux * uy;
  return lo + z;
}

template <typename Tp>
Tp fused_mul_min(Tp x, Tp y, Tp z) {
  // min(x * y, z) without overflow
  Tp w;
  if (mul_overflow(x, y, w)) return z;  // undefined if x*y < minimum
  return std::min(w, z);
}

template <typename Tp>
Tp fused_add_mod(Tp x, Tp y, Tp z) {
  // (x + y) % z, same sign as z, without overflow
  if (std::is_signed_v<Tp>) {
    if ((x %= z) != 0 && ((x < 0) != (z < 0))) x += z;
    if ((y %= z) != 0 && ((y < 0) != (z < 0))) y += z;
    x -= z - y;
    if ((x %= z) != 0 && ((x < 0) != (z < 0))) x += z;
  } else {
    x %= z;
    y %= z;
    x += ((x < z-y)? y: y-z);
  }
  return x;
}

template <typename Tp>
Tp fused_mul_mod(Tp x, Tp y, Tp z) {
  // (x * y) % z, same sign as z, without overflow
  using value_type = Tp;
  using unsigned_type = typename std::make_unsigned<Tp>::type;
  unsigned_type ux = x, uy = y;
  value_type hi = mul_upper(x, y) % z;
  int const bits = CHAR_BIT * sizeof(Tp);
  for (int i = 0; i < bits; ++i) {
    hi = fused_add_mod(hi, hi, z);
  }
  unsigned_type uxy = ux * uy;
  value_type loh = uxy >> (bits/2);
  value_type lol = uxy & (~unsigned_type(0) >> (bits/2));
  for (int i = 0; i < bits/2; ++i) {
    loh = fused_add_mod(loh, loh, z);
  }
  lol = fused_add_mod(loh, lol, z);
  return fused_add_mod(hi, lol, z);
}

#endif  /* !defined(H_fused_operations) */

#line 1 "integer/fused_operations.cpp"



/**
 * @brief 乗算との複合演算
 * @author えびちゃん
 */

#include <climits>
#include <algorithm>
#include <type_traits>

#line 1 "integer/overflow.cpp"



/**
 * @brief オーバーフロー判定つき演算
 * @author えびちゃん
 */

#line 10 "integer/overflow.cpp"
#include <type_traits>

#line 1 "integer/mul_upper.cpp"



/**
 * @brief 整数の乗算の上位ワード
 * @author えびちゃん
 */

#include <cstdint>
#line 11 "integer/mul_upper.cpp"
#include <type_traits>
#include <utility>

#line 1 "utility/literals.cpp"



/**
 * @brief ユーザ定義リテラル
 * @author えびちゃん
 */

#include <cstddef>
#line 11 "utility/literals.cpp"

constexpr intmax_t  operator ""_jd(unsigned long long n) { return n; }
constexpr uintmax_t operator ""_ju(unsigned long long n) { return n; }
constexpr size_t    operator ""_zu(unsigned long long n) { return n; }
constexpr ptrdiff_t operator ""_td(unsigned long long n) { return n; }

constexpr int8_t   operator ""_i8(unsigned long long n)  { return n; }
constexpr int16_t  operator ""_i16(unsigned long long n) { return n; }
constexpr int32_t  operator ""_i32(unsigned long long n) { return n; }
constexpr int64_t  operator ""_i64(unsigned long long n) { return n; }
constexpr uint8_t  operator ""_u8(unsigned long long n)  { return n; }
constexpr uint16_t operator ""_u16(unsigned long long n) { return n; }
constexpr uint32_t operator ""_u32(unsigned long long n) { return n; }
constexpr uint64_t operator ""_u64(unsigned long long n) { return n; }


#line 15 "integer/mul_upper.cpp"

template <typename Tp>
auto mul_upper(Tp u, Tp v)
  -> typename std::enable_if<std::is_integral_v<Tp>, Tp>::type
{
  using value_type = Tp;
  using unsigned_type = typename std::make_unsigned<Tp>::type;
  unsigned_type hi;
  int const bits = CHAR_BIT * sizeof(value_type);
  if (false && (sizeof u) < sizeof(uintmax_t)) {
    uintmax_t mul = uintmax_t(u) * v;
    hi = mul >> bits;
    // XXX unsigned only
  } else {
    int const half_bits = bits / 2;
    unsigned_type const half_mask = (unsigned_type(1) << half_bits) - 1;
    unsigned_type u0 = u & half_mask, v0 = v & half_mask;
    unsigned_type u1 = unsigned_type(u) >> half_bits, v1 = unsigned_type(v) >> half_bits;
    unsigned_type w0 = u0 * v0;
    unsigned_type t = u1 * v0 + (w0 >> half_bits);
    unsigned_type w1 = t & half_mask;
    unsigned_type w2 = t >> half_bits;
    w1 += u0 * v1;
    hi = u1 * v1 + w2 + (w1 >> half_bits);
    if (u < 0) hi -= v;
    if (v < 0) hi -= u;
  }
  return hi;
}


#line 13 "integer/overflow.cpp"

template <typename Tp>
auto add_overflow(Tp x, Tp y, Tp& z)
  -> typename std::enable_if<std::is_integral_v<Tp>, bool>::type
{
  using unsigned_type = typename std::make_unsigned<Tp>::type;
  unsigned_type ux = x, uy = y;
  unsigned_type sign_bit = unsigned_type{1} << ((CHAR_BIT * sizeof(Tp)) - 1);
  if ((ux & sign_bit) ^ (uy & sign_bit)) return (z = x + y), false;
  if (((ux + uy) & sign_bit) != (ux & sign_bit)) return true;
  z = x + y;
  return false;
}

template <typename Tp>
auto sub_overflow(Tp x, Tp y, Tp& z)
  -> typename std::enable_if<std::is_integral_v<Tp>, bool>::type
{
  using unsigned_type = typename std::make_unsigned<Tp>::type;
  if (y == 0) return (z = x), false;
  unsigned_type uy = y;
  if (~(uy | (uy-1)) == 0 && y < 0) return true;
  return add_overflow(x, -y, z);
}

template <typename Tp>
auto mul_overflow(Tp x, Tp y, Tp& z)
  -> typename std::enable_if<std::is_integral_v<Tp>, bool>::type
{
  using unsigned_type = typename std::make_unsigned<Tp>::type;
  unsigned_type ux = x, uy = y;
  unsigned_type sign_bit = ~(~unsigned_type(0) >> 1);
  if (((ux * uy) & sign_bit) != ((ux & sign_bit) ^ (uy & sign_bit))) return true;
  z = x * y;
  return false;
}


#line 15 "integer/fused_operations.cpp"

template <typename Tp>
Tp fused_mul_add(Tp x, Tp y, Tp z) {
  // Return x * y + z without overflow
  using unsigned_type = typename std::make_unsigned<Tp>::type;
  unsigned_type ux = x, uy = y;
  unsigned_type lo = ux * uy;
  return lo + z;
}

template <typename Tp>
Tp fused_mul_min(Tp x, Tp y, Tp z) {
  // min(x * y, z) without overflow
  Tp w;
  if (mul_overflow(x, y, w)) return z;  // undefined if x*y < minimum
  return std::min(w, z);
}

template <typename Tp>
Tp fused_add_mod(Tp x, Tp y, Tp z) {
  // (x + y) % z, same sign as z, without overflow
  if (std::is_signed_v<Tp>) {
    if ((x %= z) != 0 && ((x < 0) != (z < 0))) x += z;
    if ((y %= z) != 0 && ((y < 0) != (z < 0))) y += z;
    x -= z - y;
    if ((x %= z) != 0 && ((x < 0) != (z < 0))) x += z;
  } else {
    x %= z;
    y %= z;
    x += ((x < z-y)? y: y-z);
  }
  return x;
}

template <typename Tp>
Tp fused_mul_mod(Tp x, Tp y, Tp z) {
  // (x * y) % z, same sign as z, without overflow
  using value_type = Tp;
  using unsigned_type = typename std::make_unsigned<Tp>::type;
  unsigned_type ux = x, uy = y;
  value_type hi = mul_upper(x, y) % z;
  int const bits = CHAR_BIT * sizeof(Tp);
  for (int i = 0; i < bits; ++i) {
    hi = fused_add_mod(hi, hi, z);
  }
  unsigned_type uxy = ux * uy;
  value_type loh = uxy >> (bits/2);
  value_type lol = uxy & (~unsigned_type(0) >> (bits/2));
  for (int i = 0; i < bits/2; ++i) {
    loh = fused_add_mod(loh, loh, z);
  }
  lol = fused_add_mod(loh, lol, z);
  return fused_add_mod(hi, lol, z);
}