MNN/source/backend/cuda/execution/MNNCUDAFunction.cuh

38 lines
898 B
Plaintext

#ifndef MNNCUDAFunction_cuh
#define MNNCUDAFunction_cuh
struct DivModFast {
DivModFast(int d = 1)
{
d_ = (d == 0) ? 1 : d;
for (l_ = 0;; ++l_) {
if ((1U << l_) >= d_)
break;
}
uint64_t one = 1;
uint64_t m = ((one << 32) * ((one << l_) - d_)) / d_ + 1;
m_ = static_cast<uint32_t>(m);
}
__device__ __inline__ int div(int idx) const
{
uint32_t tm = __umulhi(m_, idx); // get high 32-bit of the product
return (tm + idx) >> l_;
}
__device__ __inline__ int mod(int idx) const
{
return idx - d_ * div(idx);
}
__device__ __inline__ void divmod(int idx, int &quo, int &rem)
{
quo = div(idx);
rem = idx - quo * d_;
}
uint32_t d_; // divisor
uint32_t l_; // ceil(log2(d_))
uint32_t m_; // m' in the papaer
};
#endif