Commit 047c7e0f authored by Anatoly Baksheev's avatar Anatoly Baksheev

*applied patch from NVidia (nppstTraspose bug)

*fixed some warnings
*finished gpu test port to gtest framework
parent 916690a6
...@@ -100,6 +100,9 @@ macro(define_opencv_module name) ...@@ -100,6 +100,9 @@ macro(define_opencv_module name)
file(GLOB test_srcs "test/*.cpp") file(GLOB test_srcs "test/*.cpp")
file(GLOB test_hdrs "test/*.h*") file(GLOB test_hdrs "test/*.h*")
source_group("Src" FILES ${test_srcs})
source_group("Include" FILES ${test_hdrs})
set(the_target "opencv_test_${name}") set(the_target "opencv_test_${name}")
......
...@@ -55,5 +55,6 @@ ...@@ -55,5 +55,6 @@
#include "opencv2/imgproc/imgproc_c.h" #include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/core/internal.hpp" #include "opencv2/core/internal.hpp"
#include "opencv2/features2d/features2d.hpp" #include "opencv2/features2d/features2d.hpp"
#include <vector>
#endif #endif
...@@ -41,6 +41,8 @@ ...@@ -41,6 +41,8 @@
#include "precomp.hpp" #include "precomp.hpp"
using namespace std;
#undef INFINITY #undef INFINITY
#define INFINITY 10000 #define INFINITY 10000
#define OCCLUSION_PENALTY 10000 #define OCCLUSION_PENALTY 10000
......
...@@ -38,10 +38,9 @@ source_group("Device" FILES ${lib_device_hdrs}) ...@@ -38,10 +38,9 @@ source_group("Device" FILES ${lib_device_hdrs})
if (HAVE_CUDA) if (HAVE_CUDA)
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp") file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu") file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
file(GLOB_RECURSE ncv_hdr1 "src/nvidia/*.hpp") file(GLOB_RECURSE ncv_hdrs "src/nvidia/*.hpp" "src/nvidia/*.h")
file(GLOB_RECURSE ncv_hdr2 "src/nvidia/*.h")
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda})
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda})
include_directories("src/nvidia/core" "src/nvidia/NPP_staging") include_directories("src/nvidia/core" "src/nvidia/NPP_staging")
endif() endif()
...@@ -83,7 +82,7 @@ if (HAVE_CUDA) ...@@ -83,7 +82,7 @@ if (HAVE_CUDA)
#CUDA_BUILD_CLEAN_TARGET() #CUDA_BUILD_CLEAN_TARGET()
endif() endif()
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda} ${cuda_objs}) add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
if(PCHSupport_FOUND) if(PCHSupport_FOUND)
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp) set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
...@@ -147,10 +146,15 @@ install(FILES ${lib_hdrs} ...@@ -147,10 +146,15 @@ install(FILES ${lib_hdrs}
# DESTINATION include/opencv2/${name}/device # DESTINATION include/opencv2/${name}/device
# COMPONENT main) # COMPONENT main)
################################################################################################################
################################ GPU Module Tests #####################################################
################################################################################################################
# Test files processing is in the separated directory to avoid 'Src' source # Test files processing is in the separated directory to avoid 'Src' source
# filter creation in Visual Studio # filter creation in Visual Studio
if(BUILD_TESTS AND NOT ANDROID AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/test) if(BUILD_TESTS AND NOT ANDROID AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/test)
set(the_target "opencv_test_${name}") set(the_test_target "opencv_test_${name}")
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include" include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
"${CMAKE_CURRENT_SOURCE_DIR}/test" "${CMAKE_CURRENT_SOURCE_DIR}/test"
...@@ -169,44 +173,46 @@ if(BUILD_TESTS AND NOT ANDROID AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/test) ...@@ -169,44 +173,46 @@ if(BUILD_TESTS AND NOT ANDROID AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/test)
file(GLOB test_srcs "test/*.cpp") file(GLOB test_srcs "test/*.cpp")
file(GLOB test_hdrs "test/*.h*") file(GLOB test_hdrs "test/*.h*")
source_group("Src" FILES ${test_hdrs} ${test_srcs})
if(HAVE_CUDA) if(HAVE_CUDA)
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging) include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
file(GLOB nvidia "test/nvidia/*.cpp" "test/nvidia/*.h*")
source_group("nvidia" FILES ${nvidia}) file(GLOB nvidia "test/nvidia/*.cpp" "test/nvidia/*.h*")
source_group("Src\\NVidia" FILES ${nvidia})
endif() endif()
add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia}) add_executable(${the_test_target} ${test_srcs} ${test_hdrs} ${nvidia})
if(PCHSupport_FOUND) if(PCHSupport_FOUND)
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/test/test_precomp.hpp) set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/test/test_precomp.hpp)
if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*") if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*")
if(${CMAKE_GENERATOR} MATCHES "Visual*") if(${CMAKE_GENERATOR} MATCHES "Visual*")
set(${the_target}_pch "test/test_precomp.cpp") set(${the_test_target}_pch "test/test_precomp.cpp")
endif() endif()
add_native_precompiled_header(${the_target} ${pch_header}) add_native_precompiled_header(${the_test_target} ${pch_header})
elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles") elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles")
add_precompiled_header(${the_target} ${pch_header}) add_precompiled_header(${the_test_target} ${pch_header})
endif() endif()
endif() endif()
# Additional target properties # Additional target properties
set_target_properties(${the_target} PROPERTIES set_target_properties(${the_test_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}" DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/" RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/"
) )
add_dependencies(${the_target} ${test_deps}) add_dependencies(${the_test_target} ${test_deps})
# Add the required libraries for linking: # Add the required libraries for linking:
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${test_deps}) target_link_libraries(${the_test_target} ${OPENCV_LINKER_LIBS} ${test_deps})
enable_testing() enable_testing()
get_target_property(LOC ${the_target} LOCATION) get_target_property(LOC ${the_test_target} LOCATION)
add_test(${the_target} "${LOC}") add_test(${the_test_target} "${LOC}")
if(WIN32) if(WIN32)
install(TARGETS ${the_target} RUNTIME DESTINATION bin COMPONENT main) install(TARGETS ${the_test_target} RUNTIME DESTINATION bin COMPONENT main)
endif() endif()
endif() endif()
\ No newline at end of file
...@@ -1198,26 +1198,32 @@ __global__ void transpose(T *d_src, Ncv32u srcStride, ...@@ -1198,26 +1198,32 @@ __global__ void transpose(T *d_src, Ncv32u srcStride,
Ncv32u xIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.x; Ncv32u xIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.x;
Ncv32u yIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.y; Ncv32u yIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.y;
Ncv32u index_in = xIndex + yIndex * srcStride; Ncv32u index_gmem = xIndex + yIndex * srcStride;
xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x; if (xIndex < srcRoi.width)
yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;
Ncv32u index_out = xIndex + yIndex * dstStride;
for (Ncv32u i=0; i<TRANSPOSE_TILE_DIM; i+=TRANSPOSE_BLOCK_ROWS)
{ {
tile[threadIdx.y+i][threadIdx.x] = d_src[index_in+i*srcStride]; for (Ncv32u i=0; i<TRANSPOSE_TILE_DIM; i+=TRANSPOSE_BLOCK_ROWS)
{
if (yIndex + i < srcRoi.height)
{
tile[threadIdx.y+i][threadIdx.x] = d_src[index_gmem+i*srcStride];
}
}
} }
__syncthreads(); __syncthreads();
xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x;
yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;
index_gmem = xIndex + yIndex * dstStride;
if (xIndex < srcRoi.height) if (xIndex < srcRoi.height)
{ {
for (Ncv32u i=0; i<TRANSPOSE_TILE_DIM; i+=TRANSPOSE_BLOCK_ROWS) for (Ncv32u i=0; i<TRANSPOSE_TILE_DIM; i+=TRANSPOSE_BLOCK_ROWS)
{ {
if (yIndex + i < srcRoi.width) if (yIndex + i < srcRoi.width)
{ {
d_dst[index_out+i*dstStride] = tile[threadIdx.x][threadIdx.y+i]; d_dst[index_gmem+i*dstStride] = tile[threadIdx.x][threadIdx.y+i];
} }
} }
} }
......
...@@ -71,21 +71,21 @@ namespace cv { namespace gpu { namespace device ...@@ -71,21 +71,21 @@ namespace cv { namespace gpu { namespace device
template <size_t src_elem_size, size_t dst_elem_size> template <size_t src_elem_size, size_t dst_elem_size>
struct UnReadWriteTraits_ struct UnReadWriteTraits_
{ {
enum {shift=1}; enum { shift = 1 };
}; };
template <size_t src_elem_size> template <size_t src_elem_size>
struct UnReadWriteTraits_<src_elem_size, 1> struct UnReadWriteTraits_<src_elem_size, 1>
{ {
enum {shift=4}; enum { shift = 4 };
}; };
template <size_t src_elem_size> template <size_t src_elem_size>
struct UnReadWriteTraits_<src_elem_size, 2> struct UnReadWriteTraits_<src_elem_size, 2>
{ {
enum {shift=2}; enum { shift = 2 };
}; };
template <typename T, typename D> struct UnReadWriteTraits template <typename T, typename D> struct UnReadWriteTraits
{ {
enum {shift=UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift}; enum { shift = UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift };
typedef typename TypeVec<T, shift>::vec_t read_type; typedef typename TypeVec<T, shift>::vec_t read_type;
typedef typename TypeVec<D, shift>::vec_t write_type; typedef typename TypeVec<D, shift>::vec_t write_type;
...@@ -94,21 +94,21 @@ namespace cv { namespace gpu { namespace device ...@@ -94,21 +94,21 @@ namespace cv { namespace gpu { namespace device
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>
struct BinReadWriteTraits_ struct BinReadWriteTraits_
{ {
enum {shift=1}; enum { shift = 1 };
}; };
template <size_t src_elem_size1, size_t src_elem_size2> template <size_t src_elem_size1, size_t src_elem_size2>
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1> struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>
{ {
enum {shift=4}; enum { shift = 4 };
}; };
template <size_t src_elem_size1, size_t src_elem_size2> template <size_t src_elem_size1, size_t src_elem_size2>
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2> struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>
{ {
enum {shift=2}; enum { shift = 2 };
}; };
template <typename T1, typename T2, typename D> struct BinReadWriteTraits template <typename T1, typename T2, typename D> struct BinReadWriteTraits
{ {
enum {shift=BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift}; enum {shift = BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
typedef typename TypeVec<T1, shift>::vec_t read_type1; typedef typename TypeVec<T1, shift>::vec_t read_type1;
typedef typename TypeVec<T2, shift>::vec_t read_type2; typedef typename TypeVec<T2, shift>::vec_t read_type2;
......
...@@ -179,6 +179,9 @@ void generateTransposeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider ...@@ -179,6 +179,9 @@ void generateTransposeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider
testLister.add(new TestTranspose<T>("TestTranspose_VGA", src, 640, 480)); testLister.add(new TestTranspose<T>("TestTranspose_VGA", src, 640, 480));
testLister.add(new TestTranspose<T>("TestTranspose_HD1080", src, 1920, 1080)); testLister.add(new TestTranspose<T>("TestTranspose_HD1080", src, 1920, 1080));
//regression tests
testLister.add(new TestTranspose<T>("TestTranspose_reg_0", src, 1072, 375));
} }
......
...@@ -69,4 +69,4 @@ protected: ...@@ -69,4 +69,4 @@ protected:
} }
}; };
TEST(NPP_Staging, DISABLED_multitest) { CV_NVidiaTestsCaller test; test.safe_run(); } TEST(NVidia, DISABLED_multitest) { CV_NVidiaTestsCaller test; test.safe_run(); }
#define GTEST_CREATE_AS_SHARED_LIBRARY 1 #define GTEST_CREATE_AS_SHARED_LIBRARY 1
#if _MSC_VER >= 1200
#pragma warning( disable: 4127 4251)
#endif
#include "opencv2/ts/ts.hpp" #include "opencv2/ts/ts.hpp"
#include "opencv2/core/core_c.h" #include "opencv2/core/core_c.h"
...@@ -5,10 +5,5 @@ ...@@ -5,10 +5,5 @@
#add_subdirectory(ml) #add_subdirectory(ml)
#add_subdirectory(cxts) #add_subdirectory(cxts)
#if(WITH_CUDA) #add_subdirectory(gpu)
# set (BUILD_TESTS_GPU OFF CACHE BOOL "Build tests GPU")
# if(BUILD_TESTS_GPU AND WITH_CUDA)
# add_subdirectory(gpu)
# endif()
#endif()
...@@ -38,12 +38,11 @@ include_directories(../cxts) ...@@ -38,12 +38,11 @@ include_directories(../cxts)
if(HAVE_CUDA) if(HAVE_CUDA)
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging) include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
file(GLOB nvidia "src/nvidia/*.*") file(GLOB nvidia "src/nvidia/*.*")
SET(ncv_cpp ../../modules/gpu/src/nvidia/core/NCV.cpp)
source_group("Src\\nvidia" FILES ${nvidia}) source_group("Src\\nvidia" FILES ${nvidia})
endif() endif()
add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia} ${ncv_cpp}) add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia})
# Additional target properties # Additional target properties
set_target_properties(${the_target} PROPERTIES set_target_properties(${the_target} PROPERTIES
......
...@@ -179,6 +179,9 @@ void generateTransposeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider ...@@ -179,6 +179,9 @@ void generateTransposeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider
testLister.add(new TestTranspose<T>("TestTranspose_VGA", src, 640, 480)); testLister.add(new TestTranspose<T>("TestTranspose_VGA", src, 640, 480));
testLister.add(new TestTranspose<T>("TestTranspose_HD1080", src, 1920, 1080)); testLister.add(new TestTranspose<T>("TestTranspose_HD1080", src, 1920, 1080));
//regression tests
testLister.add(new TestTranspose<T>("TestTranspose_reg_0", src, 1072, 375));
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment