Commit 4cc3fcd7 authored by Chris Sullivan's avatar Chris Sullivan Committed by Robert Kimball

Added missing sources to a few helpers. (#986)

parent c842d353
...@@ -80,6 +80,8 @@ void runtime::gpu::cuda_memset(void* dst, int value, size_t buffer_size) ...@@ -80,6 +80,8 @@ void runtime::gpu::cuda_memset(void* dst, int value, size_t buffer_size)
namespace namespace
{ {
// Unsigned integer exponentiation by squaring adapted
// from https://stackoverflow.com/a/101613/882253
uint64_t powU64(uint64_t base, uint64_t exp) uint64_t powU64(uint64_t base, uint64_t exp)
{ {
uint64_t result = 1; uint64_t result = 1;
...@@ -100,6 +102,9 @@ namespace ...@@ -100,6 +102,9 @@ namespace
return result; return result;
} }
// Most significant bit search via de bruijn multiplication
// Adopted from https://stackoverflow.com/a/31718095/882253
// Additional ref: http://supertech.csail.mit.edu/papers/debruijn.pdf
uint32_t msbDeBruijnU32(uint32_t v) uint32_t msbDeBruijnU32(uint32_t v)
{ {
static const int multiply_de_Bruijn_bit_position[32] = { static const int multiply_de_Bruijn_bit_position[32] = {
...@@ -115,6 +120,8 @@ namespace ...@@ -115,6 +120,8 @@ namespace
return multiply_de_Bruijn_bit_position[static_cast<uint32_t>(v * 0x07C4ACDDU) >> 27]; return multiply_de_Bruijn_bit_position[static_cast<uint32_t>(v * 0x07C4ACDDU) >> 27];
} }
// perform msb on upper 32 bits if the first 32 bits are filled
// otherwise do normal de bruijn mutliplication on the 32 bit word
int msbU64(uint64_t val) int msbU64(uint64_t val)
{ {
if (val > 0x00000000FFFFFFFFul) if (val > 0x00000000FFFFFFFFul)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment