Commit 4659d60d authored by Chris Sullivan's avatar Chris Sullivan Committed by Jayaram Bobba

Fix incorrect hash strings for softmax and 1d maxpool. (#1195)

* Bug fix in softmax cache parameters.

* Additional bug fix for maxpool1d cache parameters.

* Formatting.

* Use nthreads in primitive hash.
parent b5e69eaa
...@@ -607,9 +607,8 @@ size_t runtime::gpu::CUDAEmitter::build_1d_max_pool(const GPURuntimeContext* ctx ...@@ -607,9 +607,8 @@ size_t runtime::gpu::CUDAEmitter::build_1d_max_pool(const GPURuntimeContext* ctx
std::to_string(window_width) + "_wst" + std::to_string(window_stride); std::to_string(window_width) + "_wst" + std::to_string(window_stride);
std::replace(kernel_name.begin(), kernel_name.end(), ' ', '_'); std::replace(kernel_name.begin(), kernel_name.end(), ' ', '_');
// primitive hash and kernel name are equivalent for maxpool_1d size_t nthreads = shape_size(output_shape);
auto hash = kernel_name; std::string hash = kernel_name + "_n" + std::to_string(nthreads);
// check if the requested kernel is already an inserted primitive // check if the requested kernel is already an inserted primitive
size_t primitive_index = m_primitive_emitter->lookup(hash); size_t primitive_index = m_primitive_emitter->lookup(hash);
if (primitive_index != std::numeric_limits<size_t>::max()) if (primitive_index != std::numeric_limits<size_t>::max())
...@@ -617,12 +616,6 @@ size_t runtime::gpu::CUDAEmitter::build_1d_max_pool(const GPURuntimeContext* ctx ...@@ -617,12 +616,6 @@ size_t runtime::gpu::CUDAEmitter::build_1d_max_pool(const GPURuntimeContext* ctx
return primitive_index; return primitive_index;
} }
size_t nthreads = shape_size(output_shape);
//TODO: currently we set it to 64, will add tuning method later
uint32_t block_size_x = 64;
uint32_t aligned_grid_size_x =
align_to_block_size(static_cast<uint32_t>(nthreads), block_size_x);
// if the kernel has not been compiled, build it // if the kernel has not been compiled, build it
auto compiled_kernel = ctx->compiled_kernel_pool->get(hash); auto compiled_kernel = ctx->compiled_kernel_pool->get(hash);
if (compiled_kernel == nullptr) if (compiled_kernel == nullptr)
...@@ -630,9 +623,14 @@ size_t runtime::gpu::CUDAEmitter::build_1d_max_pool(const GPURuntimeContext* ctx ...@@ -630,9 +623,14 @@ size_t runtime::gpu::CUDAEmitter::build_1d_max_pool(const GPURuntimeContext* ctx
codegen::CodeWriter writer; codegen::CodeWriter writer;
CudaKernelBuilder::get_max_pool_1d( CudaKernelBuilder::get_max_pool_1d(
writer, kernel_name, dtypes, input_width, output_width, window_width, window_stride); writer, kernel_name, dtypes, input_width, output_width, window_width, window_stride);
compiled_kernel = ctx->compiled_kernel_pool->set(hash, writer.get_code()); compiled_kernel = ctx->compiled_kernel_pool->set(kernel_name, writer.get_code());
} }
//TODO: currently we set it to 64, will add tuning method later
uint32_t block_size_x = 64;
uint32_t aligned_grid_size_x =
align_to_block_size(static_cast<uint32_t>(nthreads), block_size_x);
std::unique_ptr<gpu::primitive> pool( std::unique_ptr<gpu::primitive> pool(
new gpu::primitive{[=](void** inputs, void** outputs) mutable { new gpu::primitive{[=](void** inputs, void** outputs) mutable {
void* args_list[] = {&inputs[0], &outputs[0], &nthreads}; void* args_list[] = {&inputs[0], &outputs[0], &nthreads};
...@@ -935,13 +933,14 @@ size_t ...@@ -935,13 +933,14 @@ size_t
// kernel_name is used to check if the cuda kernel has been previously compiled // kernel_name is used to check if the cuda kernel has been previously compiled
std::stringstream kernel_name; std::stringstream kernel_name;
kernel_name << "ew_collective" kernel_name << "ew_collective"
<< "_" << op << "_" << join(dtypes, "_") << "_" << reduce_op << "_" << op << "_" << join(dtypes, "_") << "_" << reduce_op << "_r"
<< tensor_shape.size() << "_rt" << join(reduced_tensors, "_")
// multi-output op // multi-output op
<< "_mo" << int(save_elementwise); << "_mo" << int(save_elementwise);
// hash is used to check if the emitted primitive already exists // hash is used to check if the emitted primitive already exists
std::stringstream ss; std::stringstream ss;
ss << kernel_name.str() << "_s" << join(tensor_shape, "_"); ss << kernel_name.str() << "_s" << join(tensor_shape, "_") << "_ra" << join(axes, "_");
auto hash = ss.str(); auto hash = ss.str();
// if the primitive exists, we are done // if the primitive exists, we are done
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment