Commit 3ab2728d authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

gpu device layer code refactoring

parent fa0daa48
...@@ -23,7 +23,9 @@ source_group("Include" FILES ${lib_hdrs}) ...@@ -23,7 +23,9 @@ source_group("Include" FILES ${lib_hdrs})
#file(GLOB lib_device_hdrs "include/opencv2/${name}/device/*.h*") #file(GLOB lib_device_hdrs "include/opencv2/${name}/device/*.h*")
file(GLOB lib_device_hdrs "src/opencv2/gpu/device/*.h*") file(GLOB lib_device_hdrs "src/opencv2/gpu/device/*.h*")
file(GLOB lib_device_hdrs_detail "src/opencv2/gpu/device/detail/*.h*")
source_group("Device" FILES ${lib_device_hdrs}) source_group("Device" FILES ${lib_device_hdrs})
source_group("Device\\Detail" FILES ${lib_device_hdrs_detail})
if (HAVE_CUDA) if (HAVE_CUDA)
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp") file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
...@@ -83,7 +85,7 @@ foreach(d ${DEPS}) ...@@ -83,7 +85,7 @@ foreach(d ${DEPS})
endif() endif()
endforeach() endforeach()
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs}) add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${lib_device_hdrs_detail} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
# For dynamic link numbering convenions # For dynamic link numbering convenions
set_target_properties(${the_target} PROPERTIES set_target_properties(${the_target} PROPERTIES
......
This diff is collapsed.
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
//M*/ //M*/
#include "internal_shared.hpp" #include "internal_shared.hpp"
#include "opencv2/gpu/device/limits_gpu.hpp" #include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/datamov_utils.hpp" #include "opencv2/gpu/device/datamov_utils.hpp"
using namespace cv::gpu; using namespace cv::gpu;
...@@ -565,7 +565,7 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -565,7 +565,7 @@ namespace cv { namespace gpu { namespace bfmatcher
int myBestTrainIdx = -1; int myBestTrainIdx = -1;
int myBestImgIdx = -1; int myBestImgIdx = -1;
typename Dist::ResultType myMin = numeric_limits_gpu<typename Dist::ResultType>::max(); typename Dist::ResultType myMin = numeric_limits<typename Dist::ResultType>::max();
{ {
typename Dist::ResultType* sdiff_row = smem + BLOCK_DIM_X * threadIdx.y; typename Dist::ResultType* sdiff_row = smem + BLOCK_DIM_X * threadIdx.y;
...@@ -821,7 +821,7 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -821,7 +821,7 @@ namespace cv { namespace gpu { namespace bfmatcher
{ {
const T* trainDescs = trainDescs_.ptr(trainIdx); const T* trainDescs = trainDescs_.ptr(trainIdx);
typename Dist::ResultType myDist = numeric_limits_gpu<typename Dist::ResultType>::max(); typename Dist::ResultType myDist = numeric_limits<typename Dist::ResultType>::max();
if (mask(queryIdx, trainIdx)) if (mask(queryIdx, trainIdx))
{ {
...@@ -932,7 +932,7 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -932,7 +932,7 @@ namespace cv { namespace gpu { namespace bfmatcher
{ {
const int tid = threadIdx.x; const int tid = threadIdx.x;
T myMin = numeric_limits_gpu<T>::max(); T myMin = numeric_limits<T>::max();
int myMinIdx = -1; int myMinIdx = -1;
for (int i = tid; i < n; i += BLOCK_SIZE) for (int i = tid; i < n; i += BLOCK_SIZE)
...@@ -1007,10 +1007,10 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -1007,10 +1007,10 @@ namespace cv { namespace gpu { namespace bfmatcher
if (threadIdx.x == 0) if (threadIdx.x == 0)
{ {
float dist = sdist[0]; float dist = sdist[0];
if (dist < numeric_limits_gpu<float>::max()) if (dist < numeric_limits<float>::max())
{ {
int bestIdx = strainIdx[0]; int bestIdx = strainIdx[0];
allDist[bestIdx] = numeric_limits_gpu<float>::max(); allDist[bestIdx] = numeric_limits<float>::max();
trainIdx[i] = bestIdx; trainIdx[i] = bestIdx;
distance[i] = dist; distance[i] = dist;
} }
......
This diff is collapsed.
This diff is collapsed.
...@@ -42,8 +42,8 @@ ...@@ -42,8 +42,8 @@
#include "opencv2/gpu/devmem2d.hpp" #include "opencv2/gpu/devmem2d.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vecmath.hpp" #include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/limits_gpu.hpp" #include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp" #include "opencv2/gpu/device/border_interpolate.hpp"
#include "safe_call.hpp" #include "safe_call.hpp"
...@@ -76,7 +76,7 @@ namespace filter_krnls ...@@ -76,7 +76,7 @@ namespace filter_krnls
{ {
template <typename T, size_t size> struct SmemType_ template <typename T, size_t size> struct SmemType_
{ {
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t smem_t; typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type smem_t;
}; };
template <typename T> struct SmemType_<T, 4> template <typename T> struct SmemType_<T, 4>
{ {
...@@ -111,7 +111,7 @@ namespace filter_krnls ...@@ -111,7 +111,7 @@ namespace filter_krnls
if (x < src.cols) if (x < src.cols)
{ {
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t sum_t; typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
sum_t sum = VecTraits<sum_t>::all(0); sum_t sum = VecTraits<sum_t>::all(0);
sDataRow += threadIdx.x + BLOCK_DIM_X - anchor; sDataRow += threadIdx.x + BLOCK_DIM_X - anchor;
...@@ -253,7 +253,7 @@ namespace filter_krnls ...@@ -253,7 +253,7 @@ namespace filter_krnls
if (y < src.rows) if (y < src.rows)
{ {
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t sum_t; typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
sum_t sum = VecTraits<sum_t>::all(0); sum_t sum = VecTraits<sum_t>::all(0);
sDataColumn += (threadIdx.y + BLOCK_DIM_Y - anchor) * BLOCK_DIM_X; sDataColumn += (threadIdx.y + BLOCK_DIM_Y - anchor) * BLOCK_DIM_X;
...@@ -475,7 +475,7 @@ namespace bf_krnls ...@@ -475,7 +475,7 @@ namespace bf_krnls
} }
} }
float minimum = numeric_limits_gpu<float>::max(); float minimum = numeric_limits<float>::max();
int id = 0; int id = 0;
if (cost[0] < minimum) if (cost[0] < minimum)
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
//M*/ //M*/
#include "internal_shared.hpp" #include "internal_shared.hpp"
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/saturate_cast.hpp"
using namespace cv::gpu; using namespace cv::gpu;
...@@ -50,14 +51,11 @@ using namespace cv::gpu::device; ...@@ -50,14 +51,11 @@ using namespace cv::gpu::device;
#define UINT_BITS 32U #define UINT_BITS 32U
#define LOG2_WARP_SIZE 5U
#define WARP_SIZE (1U << LOG2_WARP_SIZE)
//Warps == subhistograms per threadblock //Warps == subhistograms per threadblock
#define WARP_COUNT 6 #define WARP_COUNT 6
//Threadblock size //Threadblock size
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * WARP_SIZE) #define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
#define HISTOGRAM256_BIN_COUNT 256 #define HISTOGRAM256_BIN_COUNT 256
//Shared memory per threadblock //Shared memory per threadblock
...@@ -73,7 +71,7 @@ namespace cv { namespace gpu { namespace histograms ...@@ -73,7 +71,7 @@ namespace cv { namespace gpu { namespace histograms
{ {
#if (!USE_SMEM_ATOMICS) #if (!USE_SMEM_ATOMICS)
#define TAG_MASK ( (1U << (UINT_BITS - LOG2_WARP_SIZE)) - 1U ) #define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
__forceinline__ __device__ void addByte(volatile uint* s_WarpHist, uint data, uint threadTag) __forceinline__ __device__ void addByte(volatile uint* s_WarpHist, uint data, uint threadTag)
{ {
...@@ -111,7 +109,7 @@ namespace cv { namespace gpu { namespace histograms ...@@ -111,7 +109,7 @@ namespace cv { namespace gpu { namespace histograms
{ {
//Per-warp subhistogram storage //Per-warp subhistogram storage
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY]; __shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
uint* s_WarpHist= s_Hist + (threadIdx.x >> LOG2_WARP_SIZE) * HISTOGRAM256_BIN_COUNT; uint* s_WarpHist= s_Hist + (threadIdx.x >> OPENCV_GPU_LOG_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
//Clear shared memory storage for current threadblock before processing //Clear shared memory storage for current threadblock before processing
#pragma unroll #pragma unroll
...@@ -119,7 +117,7 @@ namespace cv { namespace gpu { namespace histograms ...@@ -119,7 +117,7 @@ namespace cv { namespace gpu { namespace histograms
s_Hist[threadIdx.x + i * HISTOGRAM256_THREADBLOCK_SIZE] = 0; s_Hist[threadIdx.x + i * HISTOGRAM256_THREADBLOCK_SIZE] = 0;
//Cycle through the entire data set, update subhistograms for each warp //Cycle through the entire data set, update subhistograms for each warp
const uint tag = threadIdx.x << (UINT_BITS - LOG2_WARP_SIZE); const uint tag = threadIdx.x << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE);
__syncthreads(); __syncthreads();
const uint colsui = d_Data.step / sizeof(uint); const uint colsui = d_Data.step / sizeof(uint);
......
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
//M*/ //M*/
#include "internal_shared.hpp" #include "internal_shared.hpp"
#include "opencv2/gpu/device/vecmath.hpp" #include "opencv2/gpu/device/vec_math.hpp"
using namespace cv::gpu; using namespace cv::gpu;
using namespace cv::gpu::device; using namespace cv::gpu::device;
...@@ -84,8 +84,8 @@ __global__ void matchTemplateNaiveKernel_CCORR( ...@@ -84,8 +84,8 @@ __global__ void matchTemplateNaiveKernel_CCORR(
int w, int h, const PtrStep image, const PtrStep templ, int w, int h, const PtrStep image, const PtrStep templ,
DevMem2Df result) DevMem2Df result)
{ {
typedef typename TypeVec<T, cn>::vec_t Type; typedef typename TypeVec<T, cn>::vec_type Type;
typedef typename TypeVec<float, cn>::vec_t Typef; typedef typename TypeVec<float, cn>::vec_type Typef;
int x = blockDim.x * blockIdx.x + threadIdx.x; int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y; int y = blockDim.y * blockIdx.y + threadIdx.y;
...@@ -174,8 +174,8 @@ __global__ void matchTemplateNaiveKernel_SQDIFF( ...@@ -174,8 +174,8 @@ __global__ void matchTemplateNaiveKernel_SQDIFF(
int w, int h, const PtrStep image, const PtrStep templ, int w, int h, const PtrStep image, const PtrStep templ,
DevMem2Df result) DevMem2Df result)
{ {
typedef typename TypeVec<T, cn>::vec_t Type; typedef typename TypeVec<T, cn>::vec_type Type;
typedef typename TypeVec<float, cn>::vec_t Typef; typedef typename TypeVec<float, cn>::vec_type Typef;
int x = blockDim.x * blockIdx.x + threadIdx.x; int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y; int y = blockDim.y * blockIdx.y + threadIdx.y;
...@@ -884,7 +884,7 @@ void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, ...@@ -884,7 +884,7 @@ void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
template <int cn> template <int cn>
__global__ void extractFirstChannel_32F(const PtrStep image, DevMem2Df result) __global__ void extractFirstChannel_32F(const PtrStep image, DevMem2Df result)
{ {
typedef typename TypeVec<float, cn>::vec_t Typef; typedef typename TypeVec<float, cn>::vec_type Typef;
int x = blockDim.x * blockIdx.x + threadIdx.x; int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y; int y = blockDim.y * blockIdx.y + threadIdx.y;
......
...@@ -40,9 +40,9 @@ ...@@ -40,9 +40,9 @@
// //
//M*/ //M*/
#include "opencv2/gpu/device/limits_gpu.hpp" #include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vecmath.hpp" #include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/transform.hpp" #include "opencv2/gpu/device/transform.hpp"
#include "internal_shared.hpp" #include "internal_shared.hpp"
......
This diff is collapsed.
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
#include "opencv2/gpu/devmem2d.hpp" #include "opencv2/gpu/devmem2d.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/limits_gpu.hpp" #include "opencv2/gpu/device/limits.hpp"
#include "safe_call.hpp" #include "safe_call.hpp"
using namespace cv::gpu; using namespace cv::gpu;
...@@ -381,7 +381,7 @@ namespace cv { namespace gpu { namespace bp ...@@ -381,7 +381,7 @@ namespace cv { namespace gpu { namespace bp
template <typename T> template <typename T>
__device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* data, T* dst, size_t msg_disp_step, size_t data_disp_step) __device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* data, T* dst, size_t msg_disp_step, size_t data_disp_step)
{ {
float minimum = numeric_limits_gpu<float>::max(); float minimum = numeric_limits<float>::max();
for(int i = 0; i < cndisp; ++i) for(int i = 0; i < cndisp; ++i)
{ {
...@@ -486,7 +486,7 @@ namespace cv { namespace gpu { namespace bp ...@@ -486,7 +486,7 @@ namespace cv { namespace gpu { namespace bp
size_t disp_step = disp.rows * u.step; size_t disp_step = disp.rows * u.step;
int best = 0; int best = 0;
float best_val = numeric_limits_gpu<float>::max(); float best_val = numeric_limits<float>::max();
for (int d = 0; d < cndisp; ++d) for (int d = 0; d < cndisp; ++d)
{ {
float val = us[d * disp_step]; float val = us[d * disp_step];
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
#include "opencv2/gpu/devmem2d.hpp" #include "opencv2/gpu/devmem2d.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/limits_gpu.hpp" #include "opencv2/gpu/device/limits.hpp"
#include "safe_call.hpp" #include "safe_call.hpp"
using namespace cv::gpu; using namespace cv::gpu;
...@@ -147,7 +147,7 @@ namespace cv { namespace gpu { namespace csbp ...@@ -147,7 +147,7 @@ namespace cv { namespace gpu { namespace csbp
for(int i = 0; i < nr_plane; i++) for(int i = 0; i < nr_plane; i++)
{ {
T minimum = numeric_limits_gpu<T>::max(); T minimum = numeric_limits<T>::max();
int id = 0; int id = 0;
for(int d = 0; d < cndisp; d++) for(int d = 0; d < cndisp; d++)
{ {
...@@ -161,7 +161,7 @@ namespace cv { namespace gpu { namespace csbp ...@@ -161,7 +161,7 @@ namespace cv { namespace gpu { namespace csbp
data_cost_selected[i * cdisp_step1] = minimum; data_cost_selected[i * cdisp_step1] = minimum;
selected_disparity[i * cdisp_step1] = id; selected_disparity[i * cdisp_step1] = id;
data_cost [id * cdisp_step1] = numeric_limits_gpu<T>::max(); data_cost [id * cdisp_step1] = numeric_limits<T>::max();
} }
} }
} }
...@@ -192,7 +192,7 @@ namespace cv { namespace gpu { namespace csbp ...@@ -192,7 +192,7 @@ namespace cv { namespace gpu { namespace csbp
data_cost_selected[nr_local_minimum * cdisp_step1] = cur; data_cost_selected[nr_local_minimum * cdisp_step1] = cur;
selected_disparity[nr_local_minimum * cdisp_step1] = d; selected_disparity[nr_local_minimum * cdisp_step1] = d;
data_cost[d * cdisp_step1] = numeric_limits_gpu<T>::max(); data_cost[d * cdisp_step1] = numeric_limits<T>::max();
nr_local_minimum++; nr_local_minimum++;
} }
...@@ -203,7 +203,7 @@ namespace cv { namespace gpu { namespace csbp ...@@ -203,7 +203,7 @@ namespace cv { namespace gpu { namespace csbp
for (int i = nr_local_minimum; i < nr_plane; i++) for (int i = nr_local_minimum; i < nr_plane; i++)
{ {
T minimum = numeric_limits_gpu<T>::max(); T minimum = numeric_limits<T>::max();
int id = 0; int id = 0;
for (int d = 0; d < cndisp; d++) for (int d = 0; d < cndisp; d++)
...@@ -218,7 +218,7 @@ namespace cv { namespace gpu { namespace csbp ...@@ -218,7 +218,7 @@ namespace cv { namespace gpu { namespace csbp
data_cost_selected[i * cdisp_step1] = minimum; data_cost_selected[i * cdisp_step1] = minimum;
selected_disparity[i * cdisp_step1] = id; selected_disparity[i * cdisp_step1] = id;
data_cost[id * cdisp_step1] = numeric_limits_gpu<T>::max(); data_cost[id * cdisp_step1] = numeric_limits<T>::max();
} }
} }
} }
...@@ -610,7 +610,7 @@ namespace cv { namespace gpu { namespace csbp ...@@ -610,7 +610,7 @@ namespace cv { namespace gpu { namespace csbp
{ {
for(int i = 0; i < nr_plane; i++) for(int i = 0; i < nr_plane; i++)
{ {
T minimum = numeric_limits_gpu<T>::max(); T minimum = numeric_limits<T>::max();
int id = 0; int id = 0;
for(int j = 0; j < nr_plane2; j++) for(int j = 0; j < nr_plane2; j++)
{ {
...@@ -630,7 +630,7 @@ namespace cv { namespace gpu { namespace csbp ...@@ -630,7 +630,7 @@ namespace cv { namespace gpu { namespace csbp
l_new[i * cdisp_step1] = l_cur[id * cdisp_step2]; l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2]; r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
data_cost_new[id * cdisp_step1] = numeric_limits_gpu<T>::max(); data_cost_new[id * cdisp_step1] = numeric_limits<T>::max();
} }
} }
...@@ -737,7 +737,7 @@ namespace cv { namespace gpu { namespace csbp ...@@ -737,7 +737,7 @@ namespace cv { namespace gpu { namespace csbp
__device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3, __device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3,
const T* dst_disp, const T* src_disp, int nr_plane, T* temp) const T* dst_disp, const T* src_disp, int nr_plane, T* temp)
{ {
T minimum = numeric_limits_gpu<T>::max(); T minimum = numeric_limits<T>::max();
for(int d = 0; d < nr_plane; d++) for(int d = 0; d < nr_plane; d++)
{ {
...@@ -850,7 +850,7 @@ namespace cv { namespace gpu { namespace csbp ...@@ -850,7 +850,7 @@ namespace cv { namespace gpu { namespace csbp
const T* r = r_ + (y+0) * cmsg_step1 + (x-1); const T* r = r_ + (y+0) * cmsg_step1 + (x-1);
int best = 0; int best = 0;
T best_val = numeric_limits_gpu<T>::max(); T best_val = numeric_limits<T>::max();
for (int i = 0; i < nr_plane; ++i) for (int i = 0; i < nr_plane; ++i)
{ {
int idx = i * cdisp_step1; int idx = i * cdisp_step1;
......
...@@ -46,8 +46,10 @@ ...@@ -46,8 +46,10 @@
//M*/ //M*/
#include "internal_shared.hpp" #include "internal_shared.hpp"
#include "opencv2/gpu/device/limits_gpu.hpp" #include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/functional.hpp"
using namespace cv::gpu; using namespace cv::gpu;
using namespace cv::gpu::device; using namespace cv::gpu::device;
...@@ -393,31 +395,10 @@ namespace cv { namespace gpu { namespace surf ...@@ -393,31 +395,10 @@ namespace cv { namespace gpu { namespace surf
//dss //dss
H[2][2] = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1]; H[2][2] = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1];
float det = H[0][0] * (H[1][1] * H[2][2] - H[1][2] * H[2][1]) __shared__ float x[3];
- H[0][1] * (H[1][0] * H[2][2] - H[1][2] * H[2][0])
+ H[0][2] * (H[1][0] * H[2][1] - H[1][1] * H[2][0]);
if (det != 0.0f) if (solve3x3(H, dD, x))
{ {
float invdet = 1.0f / det;
__shared__ float x[3];
x[0] = invdet *
(dD[0] * (H[1][1] * H[2][2] - H[1][2] * H[2][1]) -
H[0][1] * (dD[1] * H[2][2] - H[1][2] * dD[2]) +
H[0][2] * (dD[1] * H[2][1] - H[1][1] * dD[2]));
x[1] = invdet *
(H[0][0] * (dD[1] * H[2][2] - H[1][2] * dD[2]) -
dD[0] * (H[1][0] * H[2][2] - H[1][2] * H[2][0]) +
H[0][2] * (H[1][0] * dD[2] - dD[1] * H[2][0]));
x[2] = invdet *
(H[0][0] * (H[1][1] * dD[2] - dD[1] * H[2][1]) -
H[0][1] * (H[1][0] * dD[2] - dD[1] * H[2][0]) +
dD[0] * (H[1][0] * H[2][1] - H[1][1] * H[2][0]));
if (fabs(x[0]) <= 1.f && fabs(x[1]) <= 1.f && fabs(x[2]) <= 1.f) if (fabs(x[0]) <= 1.f && fabs(x[1]) <= 1.f && fabs(x[2]) <= 1.f)
{ {
// if the step is within the interpolation region, perform it // if the step is within the interpolation region, perform it
...@@ -500,20 +481,6 @@ namespace cv { namespace gpu { namespace surf ...@@ -500,20 +481,6 @@ namespace cv { namespace gpu { namespace surf
__constant__ float c_NX[2][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}}; __constant__ float c_NX[2][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
__constant__ float c_NY[2][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}}; __constant__ float c_NY[2][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
__device__ void reduceSum32(volatile float* v_sum, float& sum)
{
v_sum[threadIdx.x] = sum;
if (threadIdx.x < 16)
{
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 16];
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 8];
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 4];
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 2];
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 1];
}
}
__global__ void icvCalcOrientation(const float* featureX, const float* featureY, const float* featureSize, float* featureDir) __global__ void icvCalcOrientation(const float* featureX, const float* featureY, const float* featureSize, float* featureDir)
{ {
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110 #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
...@@ -599,8 +566,11 @@ namespace cv { namespace gpu { namespace surf ...@@ -599,8 +566,11 @@ namespace cv { namespace gpu { namespace surf
float* s_sum_row = s_sum + threadIdx.y * 32; float* s_sum_row = s_sum + threadIdx.y * 32;
reduceSum32(s_sum_row, sumx); //reduceSum32(s_sum_row, sumx);
reduceSum32(s_sum_row, sumy); //reduceSum32(s_sum_row, sumy);
warpReduce32(s_sum_row, sumx, threadIdx.x, plus<volatile float>());
warpReduce32(s_sum_row, sumy, threadIdx.x, plus<volatile float>());
const float temp_mod = sumx * sumx + sumy * sumy; const float temp_mod = sumx * sumx + sumy * sumy;
if (temp_mod > best_mod) if (temp_mod > best_mod)
......
...@@ -43,8 +43,8 @@ ...@@ -43,8 +43,8 @@
#ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__ #ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
#define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__ #define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
#include "opencv2/gpu/device/saturate_cast.hpp" #include "saturate_cast.hpp"
#include "opencv2/gpu/device/vecmath.hpp" #include "vec_traits.hpp"
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
...@@ -72,64 +72,53 @@ namespace cv { namespace gpu { namespace device ...@@ -72,64 +72,53 @@ namespace cv { namespace gpu { namespace device
return -last <= mini && maxi <= 2 * last; return -last <= mini && maxi <= 2 * last;
} }
private:
int last; int last;
}; };
template <typename D> struct BrdRowReflect101 : BrdReflect101
template <typename D>
struct BrdRowReflect101: BrdReflect101
{ {
explicit BrdRowReflect101(int len): BrdReflect101(len) {} explicit BrdRowReflect101(int len): BrdReflect101(len) {}
template <typename T> template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
__device__ __forceinline__ D at_low(int i, const T* data) const
{ {
return saturate_cast<D>(data[idx_low(i)]); return saturate_cast<D>(data[idx_low(i)]);
} }
template <typename T> template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const
__device__ __forceinline__ D at_high(int i, const T* data) const
{ {
return saturate_cast<D>(data[idx_high(i)]); return saturate_cast<D>(data[idx_high(i)]);
} }
}; };
template <typename D> struct BrdColReflect101 : BrdReflect101
template <typename D>
struct BrdColReflect101: BrdReflect101
{ {
BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {} BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {}
template <typename T> template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
__device__ __forceinline__ D at_low(int i, const T* data) const
{ {
return saturate_cast<D>(*(const D*)((const char*)data + idx_low(i)*step)); return saturate_cast<D>(*(const D*)((const char*)data + idx_low(i)*step));
} }
template <typename T> template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const
__device__ __forceinline__ D at_high(int i, const T* data) const
{ {
return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step)); return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step));
} }
private:
int step; int step;
}; };
struct BrdReplicate struct BrdReplicate
{ {
explicit BrdReplicate(int len): last(len - 1) {} explicit BrdReplicate(int len): last(len - 1) {}
__device__ __forceinline__ int idx_low(int i) const __device__ __forceinline__ int idx_low(int i) const
{ {
return max(i, 0); return ::max(i, 0);
} }
__device__ __forceinline__ int idx_high(int i) const __device__ __forceinline__ int idx_high(int i) const
{ {
return min(i, last); return ::min(i, last);
} }
__device__ __forceinline__ int idx(int i) const __device__ __forceinline__ int idx(int i) const
...@@ -142,64 +131,52 @@ namespace cv { namespace gpu { namespace device ...@@ -142,64 +131,52 @@ namespace cv { namespace gpu { namespace device
return true; return true;
} }
private:
int last; int last;
}; };
template <typename D> struct BrdRowReplicate : BrdReplicate
template <typename D>
struct BrdRowReplicate: BrdReplicate
{ {
explicit BrdRowReplicate(int len): BrdReplicate(len) {} explicit BrdRowReplicate(int len): BrdReplicate(len) {}
template <typename T> template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
__device__ __forceinline__ D at_low(int i, const T* data) const
{ {
return saturate_cast<D>(data[idx_low(i)]); return saturate_cast<D>(data[idx_low(i)]);
} }
template <typename T> template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const
__device__ __forceinline__ D at_high(int i, const T* data) const
{ {
return saturate_cast<D>(data[idx_high(i)]); return saturate_cast<D>(data[idx_high(i)]);
} }
}; };
template <typename D> template <typename D> struct BrdColReplicate : BrdReplicate
struct BrdColReplicate: BrdReplicate
{ {
BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {} BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {}
template <typename T> template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
__device__ __forceinline__ D at_low(int i, const T* data) const
{ {
return saturate_cast<D>(*(const D*)((const char*)data + idx_low(i)*step)); return saturate_cast<D>(*(const D*)((const char*)data + idx_low(i)*step));
} }
template <typename T> template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const
__device__ __forceinline__ D at_high(int i, const T* data) const
{ {
return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step)); return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step));
} }
private:
int step; int step;
}; };
template <typename D> template <typename D> struct BrdRowConstant
struct BrdRowConstant
{ {
explicit BrdRowConstant(int len_, const D& val_ = VecTraits<D>::all(0)): len(len_), val(val_) {} explicit BrdRowConstant(int len_, const D& val_ = VecTraits<D>::all(0)): len(len_), val(val_) {}
template <typename T> template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
__device__ __forceinline__ D at_low(int i, const T* data) const
{ {
return i >= 0 ? saturate_cast<D>(data[i]) : val; return i >= 0 ? saturate_cast<D>(data[i]) : val;
} }
template <typename T> template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const
__device__ __forceinline__ D at_high(int i, const T* data) const
{ {
return i < len ? saturate_cast<D>(data[i]) : val; return i < len ? saturate_cast<D>(data[i]) : val;
} }
...@@ -209,24 +186,20 @@ namespace cv { namespace gpu { namespace device ...@@ -209,24 +186,20 @@ namespace cv { namespace gpu { namespace device
return true; return true;
} }
private:
int len; int len;
D val; D val;
}; };
template <typename D> template <typename D> struct BrdColConstant
struct BrdColConstant
{ {
BrdColConstant(int len_, int step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {} BrdColConstant(int len_, int step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {}
template <typename T> template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
__device__ __forceinline__ D at_low(int i, const T* data) const
{ {
return i >= 0 ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val; return i >= 0 ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val;
} }
template <typename T> template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const
__device__ __forceinline__ D at_high(int i, const T* data) const
{ {
return i < len ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val; return i < len ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val;
} }
...@@ -236,15 +209,12 @@ namespace cv { namespace gpu { namespace device ...@@ -236,15 +209,12 @@ namespace cv { namespace gpu { namespace device
return true; return true;
} }
private:
int len; int len;
int step; int step;
D val; D val;
}; };
template <typename OutT> struct BrdConstant
template <typename OutT>
struct BrdConstant
{ {
BrdConstant(int w, int h, const OutT &val = VecTraits<OutT>::all(0)) : w(w), h(h), val(val) {} BrdConstant(int w, int h, const OutT &val = VecTraits<OutT>::all(0)) : w(w), h(h), val(val) {}
...@@ -255,11 +225,9 @@ namespace cv { namespace gpu { namespace device ...@@ -255,11 +225,9 @@ namespace cv { namespace gpu { namespace device
return val; return val;
} }
private:
int w, h; int w, h;
OutT val; OutT val;
}; };
}}} }}}
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__ #endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
This diff is collapsed.
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#define __OPENCV_GPU_DATAMOV_UTILS_HPP__ #define __OPENCV_GPU_DATAMOV_UTILS_HPP__
#include "internal_shared.hpp" #include "internal_shared.hpp"
#include "utility.hpp"
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
...@@ -55,49 +56,40 @@ namespace cv { namespace gpu { namespace device ...@@ -55,49 +56,40 @@ namespace cv { namespace gpu { namespace device
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; } __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
}; };
#else // __CUDA_ARCH__ >= 200 #else // __CUDA_ARCH__ >= 200
#if defined(_WIN64) || defined(__LP64__)
// 64-bit register modifier for inlined asm
#define _OPENCV_ASM_PTR_ "l"
#else
// 32-bit register modifier for inlined asm
#define _OPENCV_ASM_PTR_ "r"
#endif
template<class T> struct ForceGlob; template<class T> struct ForceGlob;
#define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \ #define OPENCV_GPU_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
template <> struct ForceGlob<base_type> \ template <> struct ForceGlob<base_type> \
{ \ { \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \ __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \ { \
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset)); \ asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
} \ } \
}; };
#define DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
template <> struct ForceGlob<base_type> \ #define OPENCV_GPU_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
{ \ template <> struct ForceGlob<base_type> \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \ { \
{ \ __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset)); \ { \
} \ asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
}; } \
};
DEFINE_FORCE_GLOB_B(uchar, u8) OPENCV_GPU_DEFINE_FORCE_GLOB_B(uchar, u8)
DEFINE_FORCE_GLOB_B(schar, s8) OPENCV_GPU_DEFINE_FORCE_GLOB_B(schar, s8)
DEFINE_FORCE_GLOB_B(char, b8) OPENCV_GPU_DEFINE_FORCE_GLOB_B(char, b8)
DEFINE_FORCE_GLOB (ushort, u16, h) OPENCV_GPU_DEFINE_FORCE_GLOB (ushort, u16, h)
DEFINE_FORCE_GLOB (short, s16, h) OPENCV_GPU_DEFINE_FORCE_GLOB (short, s16, h)
DEFINE_FORCE_GLOB (uint, u32, r) OPENCV_GPU_DEFINE_FORCE_GLOB (uint, u32, r)
DEFINE_FORCE_GLOB (int, s32, r) OPENCV_GPU_DEFINE_FORCE_GLOB (int, s32, r)
DEFINE_FORCE_GLOB (float, f32, f) OPENCV_GPU_DEFINE_FORCE_GLOB (float, f32, f)
DEFINE_FORCE_GLOB (double, f64, d) OPENCV_GPU_DEFINE_FORCE_GLOB (double, f64, d)
#undef DEFINE_FORCE_GLOB #undef OPENCV_GPU_DEFINE_FORCE_GLOB
#undef DEFINE_FORCE_GLOB_B #undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
#undef _OPENCV_ASM_PTR_
#endif // __CUDA_ARCH__ >= 200 #endif // __CUDA_ARCH__ >= 200
}}} }}}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
template<class T> struct numeric_limits_gpu template<class T> struct numeric_limits
{ {
typedef T type; typedef T type;
__device__ __forceinline__ static type min() { return type(); }; __device__ __forceinline__ static type min() { return type(); };
...@@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device ...@@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device
static const bool is_signed; static const bool is_signed;
}; };
template<> struct numeric_limits_gpu<bool> template<> struct numeric_limits<bool>
{ {
typedef bool type; typedef bool type;
__device__ __forceinline__ static type min() { return false; }; __device__ __forceinline__ static type min() { return false; };
...@@ -73,7 +73,7 @@ namespace cv { namespace gpu { namespace device ...@@ -73,7 +73,7 @@ namespace cv { namespace gpu { namespace device
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits_gpu<char> template<> struct numeric_limits<char>
{ {
typedef char type; typedef char type;
__device__ __forceinline__ static type min() { return CHAR_MIN; }; __device__ __forceinline__ static type min() { return CHAR_MIN; };
...@@ -87,7 +87,7 @@ namespace cv { namespace gpu { namespace device ...@@ -87,7 +87,7 @@ namespace cv { namespace gpu { namespace device
static const bool is_signed = (char)-1 == -1; static const bool is_signed = (char)-1 == -1;
}; };
template<> struct numeric_limits_gpu<signed char> template<> struct numeric_limits<signed char>
{ {
typedef char type; typedef char type;
__device__ __forceinline__ static type min() { return CHAR_MIN; }; __device__ __forceinline__ static type min() { return CHAR_MIN; };
...@@ -101,7 +101,7 @@ namespace cv { namespace gpu { namespace device ...@@ -101,7 +101,7 @@ namespace cv { namespace gpu { namespace device
static const bool is_signed = (signed char)-1 == -1; static const bool is_signed = (signed char)-1 == -1;
}; };
template<> struct numeric_limits_gpu<unsigned char> template<> struct numeric_limits<unsigned char>
{ {
typedef unsigned char type; typedef unsigned char type;
__device__ __forceinline__ static type min() { return 0; }; __device__ __forceinline__ static type min() { return 0; };
...@@ -115,7 +115,7 @@ namespace cv { namespace gpu { namespace device ...@@ -115,7 +115,7 @@ namespace cv { namespace gpu { namespace device
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits_gpu<short> template<> struct numeric_limits<short>
{ {
typedef short type; typedef short type;
__device__ __forceinline__ static type min() { return SHRT_MIN; }; __device__ __forceinline__ static type min() { return SHRT_MIN; };
...@@ -129,7 +129,7 @@ namespace cv { namespace gpu { namespace device ...@@ -129,7 +129,7 @@ namespace cv { namespace gpu { namespace device
static const bool is_signed = true; static const bool is_signed = true;
}; };
template<> struct numeric_limits_gpu<unsigned short> template<> struct numeric_limits<unsigned short>
{ {
typedef unsigned short type; typedef unsigned short type;
__device__ __forceinline__ static type min() { return 0; }; __device__ __forceinline__ static type min() { return 0; };
...@@ -143,7 +143,7 @@ namespace cv { namespace gpu { namespace device ...@@ -143,7 +143,7 @@ namespace cv { namespace gpu { namespace device
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits_gpu<int> template<> struct numeric_limits<int>
{ {
typedef int type; typedef int type;
__device__ __forceinline__ static type min() { return INT_MIN; }; __device__ __forceinline__ static type min() { return INT_MIN; };
...@@ -158,7 +158,7 @@ namespace cv { namespace gpu { namespace device ...@@ -158,7 +158,7 @@ namespace cv { namespace gpu { namespace device
}; };
template<> struct numeric_limits_gpu<unsigned int> template<> struct numeric_limits<unsigned int>
{ {
typedef unsigned int type; typedef unsigned int type;
__device__ __forceinline__ static type min() { return 0; }; __device__ __forceinline__ static type min() { return 0; };
...@@ -172,7 +172,7 @@ namespace cv { namespace gpu { namespace device ...@@ -172,7 +172,7 @@ namespace cv { namespace gpu { namespace device
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits_gpu<long> template<> struct numeric_limits<long>
{ {
typedef long type; typedef long type;
__device__ __forceinline__ static type min() { return LONG_MIN; }; __device__ __forceinline__ static type min() { return LONG_MIN; };
...@@ -186,7 +186,7 @@ namespace cv { namespace gpu { namespace device ...@@ -186,7 +186,7 @@ namespace cv { namespace gpu { namespace device
static const bool is_signed = true; static const bool is_signed = true;
}; };
template<> struct numeric_limits_gpu<unsigned long> template<> struct numeric_limits<unsigned long>
{ {
typedef unsigned long type; typedef unsigned long type;
__device__ __forceinline__ static type min() { return 0; }; __device__ __forceinline__ static type min() { return 0; };
...@@ -200,7 +200,7 @@ namespace cv { namespace gpu { namespace device ...@@ -200,7 +200,7 @@ namespace cv { namespace gpu { namespace device
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits_gpu<float> template<> struct numeric_limits<float>
{ {
typedef float type; typedef float type;
__device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; }; __device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
...@@ -214,7 +214,7 @@ namespace cv { namespace gpu { namespace device ...@@ -214,7 +214,7 @@ namespace cv { namespace gpu { namespace device
static const bool is_signed = true; static const bool is_signed = true;
}; };
template<> struct numeric_limits_gpu<double> template<> struct numeric_limits<double>
{ {
typedef double type; typedef double type;
__device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; }; __device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
......
This diff is collapsed.
This diff is collapsed.
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VEC_TRAITS_HPP__
#define __OPENCV_GPU_VEC_TRAITS_HPP__
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device
{
template<typename T, int N> struct TypeVec;
#define OPENCV_GPU_IMPLEMENT_TYPE_VEC(type) \
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; };
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uchar)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(char)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(ushort)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(short)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(int)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uint)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(float)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(double)
#undef OPENCV_GPU_IMPLEMENT_TYPE_VEC
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
template<typename T> struct VecTraits;
#define OPENCV_GPU_IMPLEMENT_VEC_TRAITS(type) \
template<> struct VecTraits<type> \
{ \
typedef type elem_type; \
enum {cn=1}; \
static __device__ __host__ type all(type v) {return v;} \
static __device__ __host__ type make(type x) {return x;} \
}; \
template<> struct VecTraits<type ## 1> \
{ \
typedef type elem_type; \
enum {cn=1}; \
static __device__ __host__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
static __device__ __host__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
}; \
template<> struct VecTraits<type ## 2> \
{ \
typedef type elem_type; \
enum {cn=2}; \
static __device__ __host__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
static __device__ __host__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
}; \
template<> struct VecTraits<type ## 3> \
{ \
typedef type elem_type; \
enum {cn=3}; \
static __device__ __host__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
static __device__ __host__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
}; \
template<> struct VecTraits<type ## 4> \
{ \
typedef type elem_type; \
enum {cn=4}; \
static __device__ __host__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
static __device__ __host__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
};
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(char)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uint)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(float)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(double)
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS
template<> struct VecTraits<schar>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ schar all(schar v) {return v;}
static __device__ __host__ schar make(schar x) {return x;}
};
}}}
#endif // __OPENCV_GPU_VEC_TRAITS_HPP__
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment