Commit 4cc3fcd7 authored by Chris Sullivan's avatar Chris Sullivan Committed by Robert Kimball

Added missing sources to a few helpers. (#986)

parent c842d353
......@@ -80,6 +80,8 @@ void runtime::gpu::cuda_memset(void* dst, int value, size_t buffer_size)
namespace
{
// Unsigned integer exponentiation by squaring adapted
// from https://stackoverflow.com/a/101613/882253
uint64_t powU64(uint64_t base, uint64_t exp)
{
uint64_t result = 1;
......@@ -100,6 +102,9 @@ namespace
return result;
}
// Most significant bit search via de bruijn multiplication
// Adopted from https://stackoverflow.com/a/31718095/882253
// Additional ref: http://supertech.csail.mit.edu/papers/debruijn.pdf
uint32_t msbDeBruijnU32(uint32_t v)
{
static const int multiply_de_Bruijn_bit_position[32] = {
......@@ -115,6 +120,8 @@ namespace
return multiply_de_Bruijn_bit_position[static_cast<uint32_t>(v * 0x07C4ACDDU) >> 27];
}
// perform msb on upper 32 bits if the first 32 bits are filled
// otherwise do normal de bruijn mutliplication on the 32 bit word
int msbU64(uint64_t val)
{
if (val > 0x00000000FFFFFFFFul)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment