cudnnFind/Get interoperability (#1721)

* add find algorithm for convolution without extra padding * Use cudnnFind* or cudnnGet* depending on tuning param boolean. Add select function to search the perf results of the cudnn queries. * Formatting. * Algo search no longer binary, now it is either off, a heuristic search (cudnnGet*) or an explicit search (cudnnFind*). * Formatting. * switch to explicit. * Throw if no suitable cudnn algo found. * Formatting * Remove comment.

cudnnFind/Get interoperability (#1721)
* add find algorithm for convolution without extra padding * Use cudnnFind* or cudnnGet* depending on tuning param boolean. Add select function to search the perf results of the cudnn queries. * Formatting. * Algo search no longer binary, now it is either off, a heuristic search (cudnnGet*) or an explicit search (cudnnFind*). * Formatting. * switch to explicit. * Throw if no suitable cudnn algo found. * Formatting * Remove comment.
20a01781 · Chris Sullivan · Robert Kimball · f2f42fa9 · 20a01781 · 20a01781
Commit 20a01781 authored Oct 09, 2018 by Chris Sullivan Committed by Robert Kimball Oct 09, 2018
Expand all Show whitespace changes
Inline Side-by-side

Showing with 32 additions and 5 deletions

cudnn_emitter.cpp src/ngraph/runtime/gpu/cudnn_emitter.cpp +0 -0

cudnn_emitter.hpp src/ngraph/runtime/gpu/cudnn_emitter.hpp +32 -5

No files found.
--- a/src/ngraph/runtime/gpu/cudnn_emitter.cpp
+++ b/src/ngraph/runtime/gpu/cudnn_emitter.cpp
--- a/src/ngraph/runtime/gpu/cudnn_emitter.hpp
+++ b/src/ngraph/runtime/gpu/cudnn_emitter.hpp
@@ -72,6 +72,13 @@ namespace ngraph
                    Backward
                };
+                enum class algo_search
+                {
+                    HEURISTIC,
+                    EXPLICIT,
+                    NONE
+                };
                size_t build_convolution(const std::string& dtype,
                                         const Shape& input_tensor_shape,
                                         const Shape& input_filter_shape,
@@ -79,25 +86,27 @@ namespace ngraph
                                         const Strides& window_movement_strides,
                                         const Strides& window_dilation_strides,
                                         const Shape& padding_below,
-                                         const bool find_algo = false);
+                                         const algo_search find_algo = algo_search::NONE);
-                size_t build_convolution_backward_data(const std::string& dtype,
+                size_t build_convolution_backward_data(
+                    const std::string& dtype,
                    const Shape& input_filter_shape,
                    const Shape& input_tensor_shape,
                    const Shape& output_tensor_shape,
                    const Strides& window_movement_strides,
                    const Strides& window_dilation_strides,
                    const Shape& padding_below,
-                                                       const bool find_algo = false);
+                    const algo_search find_algo = algo_search::NONE);
-                size_t build_convolution_backward_filter(const std::string& dtype,
+                size_t build_convolution_backward_filter(
+                    const std::string& dtype,
                    const Shape& input_tensor_shape_0,
                    const Shape& input_tensor_shape_1,
                    const Shape& output_filter_shape,
                    const Strides& window_movement_strides,
                    const Strides& window_dilation_strides,
                    const Shape& padding_below,
-                                                         const bool find_algo = false);
+                    const algo_search find_algo = algo_search::NONE);
                size_t build_reduce_forward(const cudnnReduceTensorOp_t& reduce_op,
                                            const std::string& dtype,
@@ -178,6 +187,24 @@ namespace ngraph
                                                     cudnnConvolutionMode_t mode,
                                                     cudnnDataType_t data_type);
+                template <typename PERF_TYPE, typename ALGO_TYPE>
+                ALGO_TYPE
+                    select_cudnn_algo(const std::vector<PERF_TYPE>& perf_results,
+                                      size_t workspace_byte = std::numeric_limits<size_t>::max())
+                {
+                    for (auto i = 0; i != perf_results.size(); ++i)
+                    {
+                        auto const& result = perf_results[i];
+                        if (result.status == CUDNN_STATUS_SUCCESS &&
+                            result.memory <= workspace_byte)
+                        {
+                            return result.algo;
+                        }
+                    }
+                    throw ngraph_error(
+                        "No suitable cuDNN algorithm was found for the requested operation.");
+                }
                CUDNNDescriptors m_descriptors;
                CUDNNHostParameters m_host_parameters;