Commit 7ecc1d12 authored by Chris Sullivan's avatar Chris Sullivan Committed by Scott Cyphers

Remove old cudnn test. (#2257)

parent 6d984a5a
...@@ -90,7 +90,7 @@ if (NGRAPH_CPU_ENABLE) ...@@ -90,7 +90,7 @@ if (NGRAPH_CPU_ENABLE)
endif() endif()
if(NGRAPH_GPU_ENABLE) if(NGRAPH_GPU_ENABLE)
list(APPEND SRC cudnn.cpp gpu_test.cpp gpu_fusion.cpp) list(APPEND SRC gpu_test.cpp gpu_fusion.cpp)
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} GPU) set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} GPU)
endif() endif()
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <sstream>
#include <string>
#include <vector>
#include <gtest/gtest.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cudnn.h>
#include "ngraph/codegen/compiler.hpp"
#include "ngraph/runtime/gpu/gpu_external_function.hpp"
#include "ngraph/ngraph.hpp"
#include "util/ndarray.hpp"
#include "util/test_tools.hpp"
using namespace ngraph;
using namespace std;
TEST(cudnn, loadTest)
{
auto cudnn_version = cudnnGetVersion();
EXPECT_FLOAT_EQ(cudnn_version, CUDNN_VERSION);
}
TEST(cudnn, compileTest)
{
const auto source = R"###(
// Example developed from LLVM documentation https://llvm.org/docs/NVPTXUsage.html
#include <cassert>
#include <fstream>
#include <iostream>
#include "cublas_v2.h"
#include "cuda.h"
void check_cuda_errors(CUresult err) {
assert(err == CUDA_SUCCESS);
}
/// main - Program entry point
int main(int argc, char **argv) {
CUdevice device;
CUmodule cuda_module;
CUcontext context;
CUfunction function;
CUlinkState linker;
int dev_count;
// Cublas init
cudaError_t cudaStat;
cublasStatus_t stat;
cublasHandle_t handle;
stat = cublasCreate(&handle);
cublasDestroy(handle);
// CUDA initialization
check_cuda_errors(cuInit(0));
check_cuda_errors(cuDeviceGetCount(&dev_count));
check_cuda_errors(cuDeviceGet(&device, 0));
char name[128];
check_cuda_errors(cuDeviceGetName(name, 128, device));
std::cout << "Using CUDA Device [0]: " << name << "\n";
int dev_major, dev_minor;
check_cuda_errors(cuDeviceComputeCapability(&dev_major, &dev_minor, device));
std::cout << "Device Compute Capability: "
<< dev_major << "." << dev_minor << "\n";
if (dev_major < 2) {
std::cerr << "ERROR: Device 0 is not SM 2.0 or greater\n";
return 1;
}
const auto str = R"(
.version 5.0
.target sm_60
.address_size 64
// .globl _Z7ew_multPfS_S_ // -- Begin function _Z7ew_multPfS_S_
.global .align 1 .b8 threadIdx[1];
// @_Z7ew_multPfS_S_
.visible .entry _Z7ew_multPfS_S_(
.param .u64 _Z7ew_multPfS_S__param_0,
.param .u64 _Z7ew_multPfS_S__param_1,
.param .u64 _Z7ew_multPfS_S__param_2
)
{
.local .align 8 .b8 __local_depot0[24];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .f32 %f<4>;
.reg .b32 %r<2>;
.reg .b64 %rd<17>;
// BB#0:
mov.u64 %SPL, __local_depot0;
cvta.local.u64 %SP, %SPL;
ld.param.u64 %rd3, [_Z7ew_multPfS_S__param_2];
ld.param.u64 %rd2, [_Z7ew_multPfS_S__param_1];
ld.param.u64 %rd1, [_Z7ew_multPfS_S__param_0];
cvta.to.global.u64 %rd4, %rd3;
cvta.global.u64 %rd5, %rd4;
cvta.to.global.u64 %rd6, %rd2;
cvta.global.u64 %rd7, %rd6;
cvta.to.global.u64 %rd8, %rd1;
cvta.global.u64 %rd9, %rd8;
st.u64 [%SP+0], %rd9;
st.u64 [%SP+8], %rd7;
st.u64 [%SP+16], %rd5;
ld.u64 %rd10, [%SP+0];
mov.u32 %r1, %tid.x;
mul.wide.u32 %rd11, %r1, 4;
add.s64 %rd12, %rd10, %rd11;
ld.f32 %f1, [%rd12];
ld.u64 %rd13, [%SP+8];
add.s64 %rd14, %rd13, %rd11;
ld.f32 %f2, [%rd14];
mul.rn.f32 %f3, %f1, %f2;
ld.u64 %rd15, [%SP+16];
add.s64 %rd16, %rd15, %rd11;
st.f32 [%rd16], %f3;
ret;
}
// -- End function
// .globl _Z6ew_addPfS_S_ // -- Begin function _Z6ew_addPfS_S_
.visible .entry _Z6ew_addPfS_S_(
.param .u64 _Z6ew_addPfS_S__param_0,
.param .u64 _Z6ew_addPfS_S__param_1,
.param .u64 _Z6ew_addPfS_S__param_2
) // @_Z6ew_addPfS_S_
{
.local .align 8 .b8 __local_depot1[24];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .f32 %f<4>;
.reg .b32 %r<2>;
.reg .b64 %rd<17>;
// BB#0:
mov.u64 %SPL, __local_depot1;
cvta.local.u64 %SP, %SPL;
ld.param.u64 %rd3, [_Z6ew_addPfS_S__param_2];
ld.param.u64 %rd2, [_Z6ew_addPfS_S__param_1];
ld.param.u64 %rd1, [_Z6ew_addPfS_S__param_0];
cvta.to.global.u64 %rd4, %rd3;
cvta.global.u64 %rd5, %rd4;
cvta.to.global.u64 %rd6, %rd2;
cvta.global.u64 %rd7, %rd6;
cvta.to.global.u64 %rd8, %rd1;
cvta.global.u64 %rd9, %rd8;
st.u64 [%SP+0], %rd9;
st.u64 [%SP+8], %rd7;
st.u64 [%SP+16], %rd5;
ld.u64 %rd10, [%SP+0];
mov.u32 %r1, %tid.x;
mul.wide.u32 %rd11, %r1, 4;
add.s64 %rd12, %rd10, %rd11;
ld.f32 %f1, [%rd12];
ld.u64 %rd13, [%SP+8];
add.s64 %rd14, %rd13, %rd11;
ld.f32 %f2, [%rd14];
add.rn.f32 %f3, %f1, %f2;
ld.u64 %rd15, [%SP+16];
add.s64 %rd16, %rd15, %rd11;
st.f32 [%rd16], %f3;
ret;
}
// -- End function
)";
// Create driver context
check_cuda_errors(cuCtxCreate(&context, 0, device));
// Create module for object
check_cuda_errors(cuModuleLoadDataEx(&cuda_module, str, 0, 0, 0));
// Get kernel function
check_cuda_errors(cuModuleGetFunction(&function, cuda_module, "_Z7ew_multPfS_S_"));
// Device data
CUdeviceptr dev_bufferA;
CUdeviceptr dev_bufferB;
CUdeviceptr dev_bufferC;
check_cuda_errors(cuMemAlloc(&dev_bufferA, sizeof(float)*16));
check_cuda_errors(cuMemAlloc(&dev_bufferB, sizeof(float)*16));
check_cuda_errors(cuMemAlloc(&dev_bufferC, sizeof(float)*16));
float* host_A = new float[16];
float* host_B = new float[16];
float* host_C = new float[16];
// Populate input
for (unsigned i = 0; i != 16; ++i) {
host_A[i] = (float)i;
host_B[i] = (float)(2*i);
host_C[i] = 0.0f;
}
check_cuda_errors(cuMemcpyHtoD(dev_bufferA, &host_A[0], sizeof(float)*16));
check_cuda_errors(cuMemcpyHtoD(dev_bufferB, &host_B[0], sizeof(float)*16));
unsigned block_size_X = 16;
unsigned block_size_Y = 1;
unsigned block_size_Z = 1;
unsigned grid_size_X = 1;
unsigned grid_size_Y = 1;
unsigned grid_size_Z = 1;
// Kernel parameters
void *kernel_params[] = { &dev_bufferA, &dev_bufferB, &dev_bufferC };
std::cout << "Launching kernel\n";
// Kernel launch
check_cuda_errors(cuLaunchKernel(function, grid_size_X, grid_size_Y, grid_size_Z,
block_size_X, block_size_Y, block_size_Z,
0, NULL, kernel_params, NULL));
// Retrieve device data
check_cuda_errors(cuMemcpyDtoH(&host_C[0], dev_bufferC, sizeof(float)*16));
std::cout << "Results:\n";
for (unsigned i = 0; i != 16; ++i) {
std::cout << host_A[i] << " + " << host_B[i] << " = " << host_C[i] << "\n";
}
// Clean up after ourselves
delete [] host_A;
delete [] host_B;
delete [] host_C;
// Clean-up
check_cuda_errors(cuMemFree(dev_bufferA));
check_cuda_errors(cuMemFree(dev_bufferB));
check_cuda_errors(cuMemFree(dev_bufferC));
check_cuda_errors(cuModuleUnload(cuda_module));
check_cuda_errors(cuCtxDestroy(context));
return 0;
})###";
codegen::Compiler compiler;
auto module = compiler.compile(source);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment