Commit 154fe4f6 authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #1519 from ilya-lavrenov:ocl_minMax

parents 8c15d276 4322c47b
...@@ -584,7 +584,8 @@ namespace cv ...@@ -584,7 +584,8 @@ namespace cv
CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0); CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
CV_EXPORTS void setIdentity(oclMat& src, double val); //! initializes a scaled identity matrix
CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1));
//////////////////////////////// Filter Engine //////////////////////////////// //////////////////////////////// Filter Engine ////////////////////////////////
......
This diff is collapsed.
...@@ -7,12 +7,17 @@ ...@@ -7,12 +7,17 @@
// copy or use the software. // copy or use the software.
// //
// //
// Intel License Agreement // License Agreement
// For Open Source Computer Vision Library // For Open Source Computer Vision Library
// //
// Copyright (C) 2000, Intel Corporation, all rights reserved. // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners. // Third party copyrights are property of their respective owners.
// //
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
//
//
// Redistribution and use in source and binary forms, with or without modification, // Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met: // are permitted provided that the following conditions are met:
// //
...@@ -21,12 +26,12 @@ ...@@ -21,12 +26,12 @@
// //
// * Redistribution's in binary form must reproduce the above copyright notice, // * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation // this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution. // and/or other oclMaterials provided with the distribution.
// //
// * The name of Intel Corporation may not be used to endorse or promote products // * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission. // derived from this software without specific prior written permission.
// //
// This software is provided by the copyright holders and contributors "as is" and // This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied // any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed. // warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct, // In no event shall the Intel Corporation or contributors be liable for any direct,
...@@ -39,25 +44,50 @@ ...@@ -39,25 +44,50 @@
// //
//M*/ //M*/
#include "test_precomp.hpp" #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
typedef ::testing::TestWithParam<cv::Size> normFixture; #pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
TEST_P(normFixture, DISABLED_accuracy) __kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_step, int src1_offset,
__global srcT *src2, int src2_step, int src2_offset,
__global dstT *dst, int dst_step, int dst_offset,
int cols, int rows)
{ {
const cv::Size srcSize = GetParam(); int x = get_global_id(0);
int y = get_global_id(1);
cv::Mat src1(srcSize, CV_8UC1), src2(srcSize, CV_8UC1); if (x < cols && y < rows)
cv::randu(src1, 0, 2); {
cv::randu(src2, 0, 2); int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
cv::ocl::oclMat oclSrc1(src1), oclSrc2(src2); dstT t0 = convertToDstT(src1[src1_index]);
dstT t1 = convertToDstT(src2[src2_index]);
dstT t2 = t0 - t1;
double value = cv::norm(src1, src2, cv::NORM_INF); dst[dst_index] = t2 >= 0 ? t2 : -t2;
double oclValue = cv::ocl::norm(oclSrc1, oclSrc2, cv::NORM_INF); }
ASSERT_EQ(value, oclValue);
} }
INSTANTIATE_TEST_CASE_P(oclNormTest, normFixture, __kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int src1_offset,
::testing::Values(cv::Size(500, 500), cv::Size(1000, 1000))); __global dstT *dst, int dst_step, int dst_offset,
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
dstT t0 = convertToDstT(src1[src1_index]);
dst[dst_index] = t0 >= 0 ? t0 : -t0;
}
}
...@@ -53,169 +53,117 @@ ...@@ -53,169 +53,117 @@
#endif #endif
#endif #endif
#if defined (DEPTH_0)
#define VEC_TYPE uchar8
#define CONVERT_TYPE convert_uchar8
#define MIN_VAL 0
#define MAX_VAL 255
#endif
#if defined (DEPTH_1)
#define VEC_TYPE char8
#define CONVERT_TYPE convert_char8
#define MIN_VAL -128
#define MAX_VAL 127
#endif
#if defined (DEPTH_2)
#define VEC_TYPE ushort8
#define CONVERT_TYPE convert_ushort8
#define MIN_VAL 0
#define MAX_VAL 65535
#endif
#if defined (DEPTH_3)
#define VEC_TYPE short8
#define CONVERT_TYPE convert_short8
#define MIN_VAL -32768
#define MAX_VAL 32767
#endif
#if defined (DEPTH_4)
#define VEC_TYPE int8
#define CONVERT_TYPE convert_int8
#define MIN_VAL INT_MIN
#define MAX_VAL INT_MAX
#endif
#if defined (DEPTH_5)
#define VEC_TYPE float8
#define CONVERT_TYPE convert_float8
#define MIN_VAL (-FLT_MAX)
#define MAX_VAL FLT_MAX
#endif
#if defined (DEPTH_6)
#define VEC_TYPE double8
#define CONVERT_TYPE convert_double8
#define MIN_VAL (-DBL_MAX)
#define MAX_VAL DBL_MAX
#endif
#if defined (REPEAT_S0)
#define repeat_s(a) a = a;
#endif
#if defined (REPEAT_S1)
#define repeat_s(a) a.s0 = a.s1;
#endif
#if defined (REPEAT_S2)
#define repeat_s(a) a.s0 = a.s2;a.s1 = a.s2;
#endif
#if defined (REPEAT_S3)
#define repeat_s(a) a.s0 = a.s3;a.s1 = a.s3;a.s2 = a.s3;
#endif
#if defined (REPEAT_S4)
#define repeat_s(a) a.s0 = a.s4;a.s1 = a.s4;a.s2 = a.s4;a.s3 = a.s4;
#endif
#if defined (REPEAT_S5)
#define repeat_s(a) a.s0 = a.s5;a.s1 = a.s5;a.s2 = a.s5;a.s3 = a.s5;a.s4 = a.s5;
#endif
#if defined (REPEAT_S6)
#define repeat_s(a) a.s0 = a.s6;a.s1 = a.s6;a.s2 = a.s6;a.s3 = a.s6;a.s4 = a.s6;a.s5 = a.s6;
#endif
#if defined (REPEAT_S7)
#define repeat_s(a) a.s0 = a.s7;a.s1 = a.s7;a.s2 = a.s7;a.s3 = a.s7;a.s4 = a.s7;a.s5 = a.s7;a.s6 = a.s7;
#endif
#if defined (REPEAT_E0)
#define repeat_e(a) a = a;
#endif
#if defined (REPEAT_E1)
#define repeat_e(a) a.s7 = a.s6;
#endif
#if defined (REPEAT_E2)
#define repeat_e(a) a.s7 = a.s5;a.s6 = a.s5;
#endif
#if defined (REPEAT_E3)
#define repeat_e(a) a.s7 = a.s4;a.s6 = a.s4;a.s5 = a.s4;
#endif
#if defined (REPEAT_E4)
#define repeat_e(a) a.s7 = a.s3;a.s6 = a.s3;a.s5 = a.s3;a.s4 = a.s3;
#endif
#if defined (REPEAT_E5)
#define repeat_e(a) a.s7 = a.s2;a.s6 = a.s2;a.s5 = a.s2;a.s4 = a.s2;a.s3 = a.s2;
#endif
#if defined (REPEAT_E6)
#define repeat_e(a) a.s7 = a.s1;a.s6 = a.s1;a.s5 = a.s1;a.s4 = a.s1;a.s3 = a.s1;a.s2 = a.s1;
#endif
#if defined (REPEAT_E7)
#define repeat_e(a) a.s7 = a.s0;a.s6 = a.s0;a.s5 = a.s0;a.s4 = a.s0;a.s3 = a.s0;a.s2 = a.s0;a.s1 = a.s0;
#endif
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
/**************************************Array minMax**************************************/ /**************************************Array minMax**************************************/
__kernel void arithm_op_minMax (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global VEC_TYPE *src, __global VEC_TYPE *dst) __kernel void arithm_op_minMax(__global const T * src, __global T * dst,
int cols, int invalid_cols, int offset, int elemnum, int groupnum)
{ {
unsigned int lid = get_local_id(0); unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0); unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0); unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols; unsigned int idx = offset + id + (id / cols) * invalid_cols;
__local VEC_TYPE localmem_max[128],localmem_min[128];
VEC_TYPE minval,maxval,temp; __local T localmem_max[128], localmem_min[128];
if(id < elemnum) T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
{ {
idx = offset + id + (id / cols) * invalid_cols;
temp = src[idx]; temp = src[idx];
if(id % cols == 0 ) minval = min(minval, temp);
{ maxval = max(maxval, temp);
repeat_s(temp); }
}
if(id % cols == cols - 1) if(lid > 127)
{
localmem_min[lid - 128] = minval;
localmem_max[lid - 128] = maxval;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{
localmem_min[lid] = min(minval, localmem_min[lid]);
localmem_max[lid] = max(maxval, localmem_max[lid]);
}
barrier(CLK_LOCAL_MEM_FENCE);
for (int lsize = 64; lsize > 0; lsize >>= 1)
{
if (lid < lsize)
{ {
repeat_e(temp); int lid2 = lsize + lid;
localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
} }
minval = temp; barrier(CLK_LOCAL_MEM_FENCE);
maxval = temp;
} }
else
if (lid == 0)
{ {
minval = MAX_VAL; dst[gid] = localmem_min[0];
maxval = MIN_VAL; dst[gid + groupnum] = localmem_max[0];
} }
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8)) }
__kernel void arithm_op_minMax_mask(__global const T * src, __global T * dst,
int cols, int invalid_cols, int offset,
int elemnum, int groupnum,
const __global uchar * mask, int minvalid_cols, int moffset)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols;
unsigned int midx = moffset + id + (id / cols) * minvalid_cols;
__local T localmem_max[128], localmem_min[128];
T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
{ {
idx = offset + id + (id / cols) * invalid_cols; idx = offset + id + (id / cols) * invalid_cols;
temp = src[idx]; midx = moffset + id + (id / cols) * minvalid_cols;
if(id % cols == 0 )
{ if (mask[midx])
repeat_s(temp);
}
if(id % cols == cols - 1)
{ {
repeat_e(temp); temp = src[idx];
minval = min(minval, temp);
maxval = max(maxval, temp);
} }
minval = min(minval,temp);
maxval = max(maxval,temp);
} }
if(lid > 127) if(lid > 127)
{ {
localmem_min[lid - 128] = minval; localmem_min[lid - 128] = minval;
localmem_max[lid - 128] = maxval; localmem_max[lid - 128] = maxval;
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128) if(lid < 128)
{ {
localmem_min[lid] = min(minval,localmem_min[lid]); localmem_min[lid] = min(minval, localmem_min[lid]);
localmem_max[lid] = max(maxval,localmem_max[lid]); localmem_max[lid] = max(maxval, localmem_max[lid]);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
for (int lsize = 64; lsize > 0; lsize >>= 1)
{ {
if(lid < lsize) if (lid < lsize)
{ {
int lid2 = lsize + lid; int lid2 = lsize + lid;
localmem_min[lid] = min(localmem_min[lid] , localmem_min[lid2]); localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
localmem_max[lid] = max(localmem_max[lid] , localmem_max[lid2]); localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
if( lid == 0)
if (lid == 0)
{ {
dst[gid] = localmem_min[0]; dst[gid] = localmem_min[0];
dst[gid + groupnum] = localmem_max[0]; dst[gid + groupnum] = localmem_max[0];
......
...@@ -41,151 +41,53 @@ ...@@ -41,151 +41,53 @@
// or tort (including negligence or otherwise) arising in any way out of // or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
///
/**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#if defined (DEPTH_0)
#define VEC_TYPE uchar8
#endif
#if defined (DEPTH_1)
#define VEC_TYPE char8
#endif
#if defined (DEPTH_2)
#define VEC_TYPE ushort8
#endif
#if defined (DEPTH_3)
#define VEC_TYPE short8
#endif
#if defined (DEPTH_4)
#define VEC_TYPE int8
#endif
#if defined (DEPTH_5)
#define VEC_TYPE float8
#endif
#if defined (DEPTH_6)
#define VEC_TYPE double8
#endif #endif
#if defined (REPEAT_S0) /**************************************Count NonZero**************************************/
#define repeat_s(a) a = a;
#endif
#if defined (REPEAT_S1)
#define repeat_s(a) a.s0 = 0;
#endif
#if defined (REPEAT_S2)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;
#endif
#if defined (REPEAT_S3)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_S4)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;
#endif
#if defined (REPEAT_S5)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;
#endif
#if defined (REPEAT_S6)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;
#endif
#if defined (REPEAT_S7)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0;
#endif
#if defined (REPEAT_E0) __kernel void arithm_op_nonzero(int cols, int invalid_cols, int offset, int elemnum, int groupnum,
#define repeat_e(a) a = a; __global srcT *src, __global dstT *dst)
#endif {
#if defined (REPEAT_E1) unsigned int lid = get_local_id(0);
#define repeat_e(a) a.s7 = 0; unsigned int gid = get_group_id(0);
#endif unsigned int id = get_global_id(0);
#if defined (REPEAT_E2)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;
#endif
#if defined (REPEAT_E3)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;
#endif
#if defined (REPEAT_E4)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;
#endif
#if defined (REPEAT_E5)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;
#endif
#if defined (REPEAT_E6)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_E7)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0;
#endif
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable unsigned int idx = offset + id + (id / cols) * invalid_cols;
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable __local dstT localmem_nonzero[128];
dstT nonzero = (dstT)(0);
srcT zero = (srcT)(0), one = (srcT)(1);
/**************************************Count NonZero**************************************/ for (int grain = groupnum << 8; id < elemnum; id += grain)
__kernel void arithm_op_nonzero (int cols,int invalid_cols,int offset,int elemnum,int groupnum, {
__global VEC_TYPE *src, __global int8 *dst) idx = offset + id + (id / cols) * invalid_cols;
{ nonzero += src[idx] == zero ? zero : one;
unsigned int lid = get_local_id(0); }
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0); if (lid > 127)
unsigned int idx = offset + id + (id / cols) * invalid_cols; localmem_nonzero[lid - 128] = nonzero;
__local int8 localmem_nonzero[128]; barrier(CLK_LOCAL_MEM_FENCE);
int8 nonzero;
VEC_TYPE zero=0,one=1,temp; if (lid < 128)
if(id < elemnum) localmem_nonzero[lid] = nonzero + localmem_nonzero[lid];
{ barrier(CLK_LOCAL_MEM_FENCE);
temp = src[idx];
if(id % cols == 0 ) for (int lsize = 64; lsize > 0; lsize >>= 1)
{ {
repeat_s(temp); if (lid < lsize)
} {
if(id % cols == cols - 1)
{
repeat_e(temp);
}
nonzero = convert_int8(temp == zero ? zero:one);
}
else
{
nonzero = 0;
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
{
idx = offset + id + (id / cols) * invalid_cols;
temp = src[idx];
if(id % cols == 0 )
{
repeat_s(temp);
}
if(id % cols == cols - 1)
{
repeat_e(temp);
}
nonzero = nonzero + convert_int8(temp == zero ? zero:one);
}
if(lid > 127)
{
localmem_nonzero[lid - 128] = nonzero;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{
localmem_nonzero[lid] = nonzero + localmem_nonzero[lid];
}
barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
{
if(lid < lsize)
{
int lid2 = lsize + lid; int lid2 = lsize + lid;
localmem_nonzero[lid] = localmem_nonzero[lid] + localmem_nonzero[lid2]; localmem_nonzero[lid] = localmem_nonzero[lid] + localmem_nonzero[lid2];
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
if( lid == 0)
{ if (lid == 0)
dst[gid] = localmem_nonzero[0]; dst[gid] = localmem_nonzero[0];
}
} }
...@@ -45,110 +45,125 @@ ...@@ -45,110 +45,125 @@
// //
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif #endif
#endif
#define CV_PI 3.1415926535898 #define CV_PI 3.1415926535898
#define CV_2PI 2*3.1415926535898
/**************************************phase inradians**************************************/ /**************************************phase inradians**************************************/
__kernel void arithm_phase_inradians_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
__global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
__kernel void arithm_phase_inradians_D5(__global float *src1, int src1_step1, int src1_offset1,
__global float *src2, int src2_step1, int src2_offset1,
__global float *dst, int dst_step1, int dst_offset1,
int cols, int rows)
{
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); int dst_index = mad24(y, dst_step1, x + dst_offset1);
float data1 = *((__global float *)((__global char *)src1 + src1_index)); float data1 = src1[src1_index];
float data2 = *((__global float *)((__global char *)src2 + src2_index)); float data2 = src2[src2_index];
float tmp = atan2(data2,data1); float tmp = atan2(data2, data1);
*((__global float *)((__global char *)dst + dst_index)) = tmp; if (tmp < 0)
} tmp += CV_2PI;
dst[dst_index] = tmp;
}
} }
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
__kernel void arithm_phase_inradians_D6 (__global double *src1, int src1_step, int src1_offset, __kernel void arithm_phase_inradians_D6(__global double *src1, int src1_step1, int src1_offset1,
__global double *src2, int src2_step, int src2_offset, __global double *src2, int src2_step1, int src2_offset1,
__global double *dst, int dst_step, int dst_offset, __global double *dst, int dst_step1, int dst_offset1,
int rows, int cols, int dst_step1) int cols, int rows)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); int dst_index = mad24(y, dst_step1, x + dst_offset1);
double data1 = *((__global double *)((__global char *)src1 + src1_index)); double data1 = src1[src1_index];
double data2 = *((__global double *)((__global char *)src2 + src2_index)); double data2 = src2[src2_index];
double tmp = atan2(data2, data1);
*((__global double *)((__global char *)dst + dst_index)) = atan2(data2,data1); if (tmp < 0)
} tmp += CV_2PI;
dst[dst_index] = tmp;
}
} }
#endif #endif
/**************************************phase indegrees**************************************/ /**************************************phase indegrees**************************************/
__kernel void arithm_phase_indegrees_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
__global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
__kernel void arithm_phase_indegrees_D5(__global float *src1, int src1_step1, int src1_offset1,
__global float *src2, int src2_step1, int src2_offset1,
__global float *dst, int dst_step1, int dst_offset1,
int cols, int rows)
{
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); int dst_index = mad24(y, dst_step1, x + dst_offset1);
float data1 = *((__global float *)((__global char *)src1 + src1_index)); float data1 = src1[src1_index];
float data2 = *((__global float *)((__global char *)src2 + src2_index)); float data2 = src2[src2_index];
float tmp = atan2(data2,data1); float tmp = atan2(data2, data1);
float tmp_data = 180*tmp/CV_PI; tmp = 180 * tmp / CV_PI;
*((__global float *)((__global char *)dst + dst_index)) = tmp_data; if (tmp < 0)
} tmp += 360;
dst[dst_index] = tmp;
}
} }
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step, int src1_offset, __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, int src1_offset1,
__global double *src2, int src2_step, int src2_offset, __global double *src2, int src2_step1, int src2_offset1,
__global double *dst, int dst_step, int dst_offset, __global double *dst, int dst_step1, int dst_offset1,
int rows, int cols, int dst_step1) int cols, int rows)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); int dst_index = mad24(y, dst_step1, x + dst_offset1);
double data1 = *((__global double *)((__global char *)src1 + src1_index)); double data1 = src1[src1_index];
double data2 = *((__global double *)((__global char *)src2 + src2_index)); double data2 = src2[src2_index];
double tmp = atan2(data2,data1); double tmp = atan2(src2[src2_index], src1[src1_index]);
double tmp_data = 180*tmp/CV_PI;
*((__global double *)((__global char *)dst + dst_index)) = tmp_data; tmp = 180 * tmp / CV_PI;
} if (tmp < 0)
tmp += 360;
dst[dst_index] = tmp;
}
} }
#endif #endif
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64 #ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
...@@ -50,51 +51,19 @@ ...@@ -50,51 +51,19 @@
#endif #endif
#endif #endif
__kernel void setIdentity(__global T * src, int src_step, int src_offset,
#if defined (DOUBLE_SUPPORT) int cols, int rows, __global const T * scalar)
#define DATA_TYPE double
#else
#define DATA_TYPE float
#endif
__kernel void setIdentityKernel_F1(__global float* src, int src_row, int src_col, int src_step, DATA_TYPE scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < src_col && y < src_row)
{
if(x == y)
src[y * src_step + x] = scalar;
else
src[y * src_step + x] = 0 * scalar;
}
}
__kernel void setIdentityKernel_D1(__global DATA_TYPE* src, int src_row, int src_col, int src_step, DATA_TYPE scalar)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < src_col && y < src_row) if (x < cols && y < rows)
{ {
if(x == y) int src_index = mad24(y, src_step, src_offset + x);
src[y * src_step + x] = scalar;
else
src[y * src_step + x] = 0 * scalar;
}
}
__kernel void setIdentityKernel_I1(__global int* src, int src_row, int src_col, int src_step, int scalar) if (x == y)
{ src[src_index] = *scalar;
int x = get_global_id(0);
int y = get_global_id(1);
if(x < src_col && y < src_row)
{
if(x == y)
src[y * src_step + x] = scalar;
else else
src[y * src_step + x] = 0 * scalar; src[src_index] = 0;
} }
} }
...@@ -43,163 +43,62 @@ ...@@ -43,163 +43,62 @@
// //
//M*/ //M*/
/**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#define RES_TYPE double8 #elif defined (cl_amd_fp64)
#define CONVERT_RES_TYPE convert_double8 #pragma OPENCL EXTENSION cl_amd_fp64:enable
#else
#define RES_TYPE float8
#define CONVERT_RES_TYPE convert_float8
#endif #endif
#if defined (DEPTH_0)
#define VEC_TYPE uchar8
#endif
#if defined (DEPTH_1)
#define VEC_TYPE char8
#endif
#if defined (DEPTH_2)
#define VEC_TYPE ushort8
#endif
#if defined (DEPTH_3)
#define VEC_TYPE short8
#endif
#if defined (DEPTH_4)
#define VEC_TYPE int8
#endif
#if defined (DEPTH_5)
#define VEC_TYPE float8
#endif
#if defined (DEPTH_6)
#define VEC_TYPE double8
#endif
#if defined (FUNC_TYPE_0)
#define FUNC(a,b) b += a;
#endif
#if defined (FUNC_TYPE_1)
#define FUNC(a,b) b = b + (a >= 0 ? a : -a);
#endif
#if defined (FUNC_TYPE_2)
#define FUNC(a,b) b = b + a * a;
#endif
#if defined (REPEAT_S0)
#define repeat_s(a) a = a;
#endif
#if defined (REPEAT_S1)
#define repeat_s(a) a.s0 = 0;
#endif
#if defined (REPEAT_S2)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;
#endif
#if defined (REPEAT_S3)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_S4)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;
#endif
#if defined (REPEAT_S5)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;
#endif
#if defined (REPEAT_S6)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;
#endif
#if defined (REPEAT_S7)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0;
#endif #endif
#if defined (REPEAT_E0) #if defined (FUNC_SUM)
#define repeat_e(a) a = a; #define FUNC(a, b) b += a;
#endif #endif
#if defined (REPEAT_E1) #if defined (FUNC_ABS_SUM)
#define repeat_e(a) a.s7 = 0; #define FUNC(a, b) b += a >= 0 ? a : -a;
#endif #endif
#if defined (REPEAT_E2) #if defined (FUNC_SQR_SUM)
#define repeat_e(a) a.s7 = 0;a.s6 = 0; #define FUNC(a, b) b += a * a;
#endif #endif
#if defined (REPEAT_E3)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;
#endif
#if defined (REPEAT_E4)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;
#endif
#if defined (REPEAT_E5)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;
#endif
#if defined (REPEAT_E6)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_E7)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0;
#endif
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
/**************************************Array buffer SUM**************************************/ /**************************************Array buffer SUM**************************************/
__kernel void arithm_op_sum (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global VEC_TYPE *src, __global RES_TYPE *dst) __kernel void arithm_op_sum(int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global srcT *src, __global dstT *dst)
{ {
unsigned int lid = get_local_id(0); unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0); unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0); unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols; unsigned int idx = offset + id + (id / cols) * invalid_cols;
__local RES_TYPE localmem_sum[128];
RES_TYPE sum = 0,temp; __local dstT localmem_sum[128];
if(id < elemnum) dstT sum = (dstT)(0), temp;
{
temp = CONVERT_RES_TYPE(src[idx]); for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
if(id % cols == 0 )
{
repeat_s(temp);
}
if(id % cols == cols - 1)
{
repeat_e(temp);
}
FUNC(temp,sum);
}
else
{
sum = 0;
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
{ {
idx = offset + id + (id / cols) * invalid_cols; idx = offset + id + (id / cols) * invalid_cols;
temp = CONVERT_RES_TYPE(src[idx]); temp = convertToDstT(src[idx]);
if(id % cols == 0 ) FUNC(temp, sum);
{
repeat_s(temp);
}
if(id % cols == cols - 1)
{
repeat_e(temp);
}
FUNC(temp,sum);
} }
if(lid > 127)
{ if (lid > 127)
localmem_sum[lid - 128] = sum; localmem_sum[lid - 128] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{ if (lid < 128)
localmem_sum[lid] = sum + localmem_sum[lid]; localmem_sum[lid] = sum + localmem_sum[lid];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
for (int lsize = 64; lsize > 0; lsize >>= 1)
{ {
if(lid < lsize) if (lid < lsize)
{ {
int lid2 = lsize + lid; int lid2 = lsize + lid;
localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2]; localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2];
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
if( lid == 0)
{ if (lid == 0)
dst[gid] = localmem_sum[0]; dst[gid] = localmem_sum[0];
}
} }
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Shengen Yan,yanshengen@gmail.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#define RES_TYPE double4
#define CONVERT_RES_TYPE convert_double4
#else
#define RES_TYPE float4
#define CONVERT_RES_TYPE convert_float4
#endif
#if defined (DEPTH_0)
#define VEC_TYPE uchar4
#endif
#if defined (DEPTH_1)
#define VEC_TYPE char4
#endif
#if defined (DEPTH_2)
#define VEC_TYPE ushort4
#endif
#if defined (DEPTH_3)
#define VEC_TYPE short4
#endif
#if defined (DEPTH_4)
#define VEC_TYPE int4
#endif
#if defined (DEPTH_5)
#define VEC_TYPE float4
#endif
#if defined (DEPTH_6)
#define VEC_TYPE double4
#endif
#if defined (FUNC_TYPE_0)
#define FUNC(a,b) b += a;
#endif
#if defined (FUNC_TYPE_1)
#define FUNC(a,b) b = b + (a >= 0 ? a : -a);
#endif
#if defined (FUNC_TYPE_2)
#define FUNC(a,b) b = b + a * a;
#endif
#if defined (REPEAT_S0)
#define repeat_s(a,b,c) a=a; b =b; c=c;
#endif
#if defined (REPEAT_S1)
#define repeat_s(a,b,c) a.s0=0; b=b; c=c;
#endif
#if defined (REPEAT_S2)
#define repeat_s(a,b,c) a.s0=0; a.s1=0; b=b; c=c;
#endif
#if defined (REPEAT_S3)
#define repeat_s(a,b,c) a.s0=0; a.s1=0; a.s2=0; b=b; c=c;
#endif
#if defined (REPEAT_S4)
#define repeat_s(a,b,c) a=0;b=b; c=c;
#endif
#if defined (REPEAT_S5)
#define repeat_s(a,b,c) a=0; b.s0=0;c=c;
#endif
#if defined (REPEAT_S6)
#define repeat_s(a,b,c) a=0; b.s0=0; b.s1=0; c=c;
#endif
#if defined (REPEAT_S7)
#define repeat_s(a,b,c) a=0; b.s0=0; b.s1=0; b.s2=0; c=c;
#endif
#if defined (REPEAT_S8)
#define repeat_s(a,b,c) a=0; b=0; c=c;
#endif
#if defined (REPEAT_S9)
#define repeat_s(a,b,c) a=0; b=0; c.s0=0;
#endif
#if defined (REPEAT_S10)
#define repeat_s(a,b,c) a=0; b=0; c.s0=0; c.s1=0;
#endif
#if defined (REPEAT_S11)
#define repeat_s(a,b,c) a=0; b=0; c.s0=0; c.s1=0; c.s2=0;
#endif
#if defined (REPEAT_E0)
#define repeat_e(a,b,c) a=a; b =b; c=c;
#endif
#if defined (REPEAT_E1)
#define repeat_e(a,b,c) a=a; b=b; c.s3=0;
#endif
#if defined (REPEAT_E2)
#define repeat_e(a,b,c) a=a; b=b; c.s3=0; c.s2=0;
#endif
#if defined (REPEAT_E3)
#define repeat_e(a,b,c) a=a; b=b; c.s3=0; c.s2=0; c.s1=0;
#endif
#if defined (REPEAT_E4)
#define repeat_e(a,b,c) a=a; b=b; c=0;
#endif
#if defined (REPEAT_E5)
#define repeat_e(a,b,c) a=a; b.s3=0; c=0;
#endif
#if defined (REPEAT_E6)
#define repeat_e(a,b,c) a=a; b.s3=0; b.s2=0; c=0;
#endif
#if defined (REPEAT_E7)
#define repeat_e(a,b,c) a=a; b.s3=0; b.s2=0; b.s1=0; c=0;
#endif
#if defined (REPEAT_E8)
#define repeat_e(a,b,c) a=a; b=0; c=0;
#endif
#if defined (REPEAT_E9)
#define repeat_e(a,b,c) a.s3=0; b=0; c=0;
#endif
#if defined (REPEAT_E10)
#define repeat_e(a,b,c) a.s3=0; a.s2=0; b=0; c=0;
#endif
#if defined (REPEAT_E11)
#define repeat_e(a,b,c) a.s3=0; a.s2=0; a.s1=0; b=0; c=0;
#endif
__kernel void arithm_op_sum_3 (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global VEC_TYPE *src, __global RES_TYPE *dst)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols;
idx = idx * 3;
__local RES_TYPE localmem_sum1[128];
__local RES_TYPE localmem_sum2[128];
__local RES_TYPE localmem_sum3[128];
RES_TYPE sum1 = 0,sum2 = 0,sum3 = 0,temp1,temp2,temp3;
if(id < elemnum)
{
temp1 = CONVERT_RES_TYPE(src[idx]);
temp2 = CONVERT_RES_TYPE(src[idx+1]);
temp3 = CONVERT_RES_TYPE(src[idx+2]);
if(id % cols == 0 )
{
repeat_s(temp1,temp2,temp3);
}
if(id % cols == cols - 1)
{
repeat_e(temp1,temp2,temp3);
}
FUNC(temp1,sum1);
FUNC(temp2,sum2);
FUNC(temp3,sum3);
}
else
{
sum1 = 0;
sum2 = 0;
sum3 = 0;
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
{
idx = offset + id + (id / cols) * invalid_cols;
idx = idx * 3;
temp1 = CONVERT_RES_TYPE(src[idx]);
temp2 = CONVERT_RES_TYPE(src[idx+1]);
temp3 = CONVERT_RES_TYPE(src[idx+2]);
if(id % cols == 0 )
{
repeat_s(temp1,temp2,temp3);
}
if(id % cols == cols - 1)
{
repeat_e(temp1,temp2,temp3);
}
FUNC(temp1,sum1);
FUNC(temp2,sum2);
FUNC(temp3,sum3);
}
if(lid > 127)
{
localmem_sum1[lid - 128] = sum1;
localmem_sum2[lid - 128] = sum2;
localmem_sum3[lid - 128] = sum3;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{
localmem_sum1[lid] = sum1 + localmem_sum1[lid];
localmem_sum2[lid] = sum2 + localmem_sum2[lid];
localmem_sum3[lid] = sum3 + localmem_sum3[lid];
}
barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
{
if(lid < lsize)
{
int lid2 = lsize + lid;
localmem_sum1[lid] = localmem_sum1[lid] + localmem_sum1[lid2];
localmem_sum2[lid] = localmem_sum2[lid] + localmem_sum2[lid2];
localmem_sum3[lid] = localmem_sum3[lid] + localmem_sum3[lid2];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if( lid == 0)
{
dst[gid*3] = localmem_sum1[0];
dst[gid*3+1] = localmem_sum2[0];
dst[gid*3+2] = localmem_sum3[0];
}
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment