Commit bcf608e1 authored by Jayaram Bobba's avatar Jayaram Bobba Committed by Robert Kimball

Flush denormals to zero in nbench (#1857)

parent 2c7cacd2
...@@ -29,6 +29,15 @@ using namespace ngraph; ...@@ -29,6 +29,15 @@ using namespace ngraph;
static default_random_engine s_random_engine; static default_random_engine s_random_engine;
void set_denormals_flush_to_zero()
{
#if defined(__x86_64__) || defined(__amd64__)
// Avoids perf impact from denormals while benchmarking with random data
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
#endif
}
template <typename T> template <typename T>
void init_int_tv(shared_ptr<runtime::Tensor> tv, T min, T max) void init_int_tv(shared_ptr<runtime::Tensor> tv, T min, T max)
{ {
...@@ -176,6 +185,8 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f, ...@@ -176,6 +185,8 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
arg_data.push_back(tensor_data); arg_data.push_back(tensor_data);
args_cacheable.push_back(param->get_cacheable()); args_cacheable.push_back(param->get_cacheable());
} }
set_denormals_flush_to_zero();
vector<shared_ptr<runtime::HostTensor>> result_data; vector<shared_ptr<runtime::HostTensor>> result_data;
vector<shared_ptr<runtime::Tensor>> results; vector<shared_ptr<runtime::Tensor>> results;
for (shared_ptr<Node> out : f->get_results()) for (shared_ptr<Node> out : f->get_results())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment