Commit 154fe4f6 authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #1519 from ilya-lavrenov:ocl_minMax

parents 8c15d276 4322c47b
......@@ -584,7 +584,8 @@ namespace cv
CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
CV_EXPORTS void setIdentity(oclMat& src, double val);
//! initializes a scaled identity matrix
CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1));
//////////////////////////////// Filter Engine ////////////////////////////////
......
This diff is collapsed.
......@@ -7,12 +7,17 @@
// copy or use the software.
//
//
// Intel License Agreement
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
......@@ -21,12 +26,12 @@
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// and/or other oclMaterials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
......@@ -39,25 +44,50 @@
//
//M*/
#include "test_precomp.hpp"
typedef ::testing::TestWithParam<cv::Size> normFixture;
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
TEST_P(normFixture, DISABLED_accuracy)
__kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_step, int src1_offset,
__global srcT *src2, int src2_step, int src2_offset,
__global dstT *dst, int dst_step, int dst_offset,
int cols, int rows)
{
const cv::Size srcSize = GetParam();
int x = get_global_id(0);
int y = get_global_id(1);
cv::Mat src1(srcSize, CV_8UC1), src2(srcSize, CV_8UC1);
cv::randu(src1, 0, 2);
cv::randu(src2, 0, 2);
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
cv::ocl::oclMat oclSrc1(src1), oclSrc2(src2);
dstT t0 = convertToDstT(src1[src1_index]);
dstT t1 = convertToDstT(src2[src2_index]);
dstT t2 = t0 - t1;
double value = cv::norm(src1, src2, cv::NORM_INF);
double oclValue = cv::ocl::norm(oclSrc1, oclSrc2, cv::NORM_INF);
ASSERT_EQ(value, oclValue);
dst[dst_index] = t2 >= 0 ? t2 : -t2;
}
}
INSTANTIATE_TEST_CASE_P(oclNormTest, normFixture,
::testing::Values(cv::Size(500, 500), cv::Size(1000, 1000)));
__kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int src1_offset,
__global dstT *dst, int dst_step, int dst_offset,
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
dstT t0 = convertToDstT(src1[src1_index]);
dst[dst_index] = t0 >= 0 ? t0 : -t0;
}
}
......@@ -53,169 +53,117 @@
#endif
#endif
#if defined (DEPTH_0)
#define VEC_TYPE uchar8
#define CONVERT_TYPE convert_uchar8
#define MIN_VAL 0
#define MAX_VAL 255
#endif
#if defined (DEPTH_1)
#define VEC_TYPE char8
#define CONVERT_TYPE convert_char8
#define MIN_VAL -128
#define MAX_VAL 127
#endif
#if defined (DEPTH_2)
#define VEC_TYPE ushort8
#define CONVERT_TYPE convert_ushort8
#define MIN_VAL 0
#define MAX_VAL 65535
#endif
#if defined (DEPTH_3)
#define VEC_TYPE short8
#define CONVERT_TYPE convert_short8
#define MIN_VAL -32768
#define MAX_VAL 32767
#endif
#if defined (DEPTH_4)
#define VEC_TYPE int8
#define CONVERT_TYPE convert_int8
#define MIN_VAL INT_MIN
#define MAX_VAL INT_MAX
#endif
#if defined (DEPTH_5)
#define VEC_TYPE float8
#define CONVERT_TYPE convert_float8
#define MIN_VAL (-FLT_MAX)
#define MAX_VAL FLT_MAX
#endif
#if defined (DEPTH_6)
#define VEC_TYPE double8
#define CONVERT_TYPE convert_double8
#define MIN_VAL (-DBL_MAX)
#define MAX_VAL DBL_MAX
#endif
#if defined (REPEAT_S0)
#define repeat_s(a) a = a;
#endif
#if defined (REPEAT_S1)
#define repeat_s(a) a.s0 = a.s1;
#endif
#if defined (REPEAT_S2)
#define repeat_s(a) a.s0 = a.s2;a.s1 = a.s2;
#endif
#if defined (REPEAT_S3)
#define repeat_s(a) a.s0 = a.s3;a.s1 = a.s3;a.s2 = a.s3;
#endif
#if defined (REPEAT_S4)
#define repeat_s(a) a.s0 = a.s4;a.s1 = a.s4;a.s2 = a.s4;a.s3 = a.s4;
#endif
#if defined (REPEAT_S5)
#define repeat_s(a) a.s0 = a.s5;a.s1 = a.s5;a.s2 = a.s5;a.s3 = a.s5;a.s4 = a.s5;
#endif
#if defined (REPEAT_S6)
#define repeat_s(a) a.s0 = a.s6;a.s1 = a.s6;a.s2 = a.s6;a.s3 = a.s6;a.s4 = a.s6;a.s5 = a.s6;
#endif
#if defined (REPEAT_S7)
#define repeat_s(a) a.s0 = a.s7;a.s1 = a.s7;a.s2 = a.s7;a.s3 = a.s7;a.s4 = a.s7;a.s5 = a.s7;a.s6 = a.s7;
#endif
#if defined (REPEAT_E0)
#define repeat_e(a) a = a;
#endif
#if defined (REPEAT_E1)
#define repeat_e(a) a.s7 = a.s6;
#endif
#if defined (REPEAT_E2)
#define repeat_e(a) a.s7 = a.s5;a.s6 = a.s5;
#endif
#if defined (REPEAT_E3)
#define repeat_e(a) a.s7 = a.s4;a.s6 = a.s4;a.s5 = a.s4;
#endif
#if defined (REPEAT_E4)
#define repeat_e(a) a.s7 = a.s3;a.s6 = a.s3;a.s5 = a.s3;a.s4 = a.s3;
#endif
#if defined (REPEAT_E5)
#define repeat_e(a) a.s7 = a.s2;a.s6 = a.s2;a.s5 = a.s2;a.s4 = a.s2;a.s3 = a.s2;
#endif
#if defined (REPEAT_E6)
#define repeat_e(a) a.s7 = a.s1;a.s6 = a.s1;a.s5 = a.s1;a.s4 = a.s1;a.s3 = a.s1;a.s2 = a.s1;
#endif
#if defined (REPEAT_E7)
#define repeat_e(a) a.s7 = a.s0;a.s6 = a.s0;a.s5 = a.s0;a.s4 = a.s0;a.s3 = a.s0;a.s2 = a.s0;a.s1 = a.s0;
#endif
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
/**************************************Array minMax**************************************/
__kernel void arithm_op_minMax (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global VEC_TYPE *src, __global VEC_TYPE *dst)
__kernel void arithm_op_minMax(__global const T * src, __global T * dst,
int cols, int invalid_cols, int offset, int elemnum, int groupnum)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols;
__local VEC_TYPE localmem_max[128],localmem_min[128];
VEC_TYPE minval,maxval,temp;
if(id < elemnum)
__local T localmem_max[128], localmem_min[128];
T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
{
idx = offset + id + (id / cols) * invalid_cols;
temp = src[idx];
if(id % cols == 0 )
{
repeat_s(temp);
}
if(id % cols == cols - 1)
minval = min(minval, temp);
maxval = max(maxval, temp);
}
if(lid > 127)
{
localmem_min[lid - 128] = minval;
localmem_max[lid - 128] = maxval;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{
localmem_min[lid] = min(minval, localmem_min[lid]);
localmem_max[lid] = max(maxval, localmem_max[lid]);
}
barrier(CLK_LOCAL_MEM_FENCE);
for (int lsize = 64; lsize > 0; lsize >>= 1)
{
if (lid < lsize)
{
repeat_e(temp);
int lid2 = lsize + lid;
localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
}
minval = temp;
maxval = temp;
barrier(CLK_LOCAL_MEM_FENCE);
}
else
if (lid == 0)
{
minval = MAX_VAL;
maxval = MIN_VAL;
dst[gid] = localmem_min[0];
dst[gid + groupnum] = localmem_max[0];
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
}
__kernel void arithm_op_minMax_mask(__global const T * src, __global T * dst,
int cols, int invalid_cols, int offset,
int elemnum, int groupnum,
const __global uchar * mask, int minvalid_cols, int moffset)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols;
unsigned int midx = moffset + id + (id / cols) * minvalid_cols;
__local T localmem_max[128], localmem_min[128];
T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
{
idx = offset + id + (id / cols) * invalid_cols;
temp = src[idx];
if(id % cols == 0 )
{
repeat_s(temp);
}
if(id % cols == cols - 1)
midx = moffset + id + (id / cols) * minvalid_cols;
if (mask[midx])
{
repeat_e(temp);
temp = src[idx];
minval = min(minval, temp);
maxval = max(maxval, temp);
}
minval = min(minval,temp);
maxval = max(maxval,temp);
}
if(lid > 127)
{
localmem_min[lid - 128] = minval;
localmem_max[lid - 128] = maxval;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{
localmem_min[lid] = min(minval,localmem_min[lid]);
localmem_max[lid] = max(maxval,localmem_max[lid]);
localmem_min[lid] = min(minval, localmem_min[lid]);
localmem_max[lid] = max(maxval, localmem_max[lid]);
}
barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
for (int lsize = 64; lsize > 0; lsize >>= 1)
{
if(lid < lsize)
if (lid < lsize)
{
int lid2 = lsize + lid;
localmem_min[lid] = min(localmem_min[lid] , localmem_min[lid2]);
localmem_max[lid] = max(localmem_max[lid] , localmem_max[lid2]);
localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if( lid == 0)
if (lid == 0)
{
dst[gid] = localmem_min[0];
dst[gid + groupnum] = localmem_max[0];
......
......@@ -41,151 +41,53 @@
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
///
/**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#if defined (DEPTH_0)
#define VEC_TYPE uchar8
#endif
#if defined (DEPTH_1)
#define VEC_TYPE char8
#endif
#if defined (DEPTH_2)
#define VEC_TYPE ushort8
#endif
#if defined (DEPTH_3)
#define VEC_TYPE short8
#endif
#if defined (DEPTH_4)
#define VEC_TYPE int8
#endif
#if defined (DEPTH_5)
#define VEC_TYPE float8
#endif
#if defined (DEPTH_6)
#define VEC_TYPE double8
#endif
#if defined (REPEAT_S0)
#define repeat_s(a) a = a;
#endif
#if defined (REPEAT_S1)
#define repeat_s(a) a.s0 = 0;
#endif
#if defined (REPEAT_S2)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;
#endif
#if defined (REPEAT_S3)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_S4)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;
#endif
#if defined (REPEAT_S5)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;
#endif
#if defined (REPEAT_S6)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;
#endif
#if defined (REPEAT_S7)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0;
#endif
/**************************************Count NonZero**************************************/
#if defined (REPEAT_E0)
#define repeat_e(a) a = a;
#endif
#if defined (REPEAT_E1)
#define repeat_e(a) a.s7 = 0;
#endif
#if defined (REPEAT_E2)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;
#endif
#if defined (REPEAT_E3)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;
#endif
#if defined (REPEAT_E4)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;
#endif
#if defined (REPEAT_E5)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;
#endif
#if defined (REPEAT_E6)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_E7)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0;
#endif
__kernel void arithm_op_nonzero(int cols, int invalid_cols, int offset, int elemnum, int groupnum,
__global srcT *src, __global dstT *dst)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
unsigned int idx = offset + id + (id / cols) * invalid_cols;
__local dstT localmem_nonzero[128];
dstT nonzero = (dstT)(0);
srcT zero = (srcT)(0), one = (srcT)(1);
/**************************************Count NonZero**************************************/
__kernel void arithm_op_nonzero (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global VEC_TYPE *src, __global int8 *dst)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols;
__local int8 localmem_nonzero[128];
int8 nonzero;
VEC_TYPE zero=0,one=1,temp;
if(id < elemnum)
{
temp = src[idx];
if(id % cols == 0 )
{
repeat_s(temp);
}
if(id % cols == cols - 1)
{
repeat_e(temp);
}
nonzero = convert_int8(temp == zero ? zero:one);
}
else
{
nonzero = 0;
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
{
idx = offset + id + (id / cols) * invalid_cols;
temp = src[idx];
if(id % cols == 0 )
{
repeat_s(temp);
}
if(id % cols == cols - 1)
{
repeat_e(temp);
}
nonzero = nonzero + convert_int8(temp == zero ? zero:one);
}
if(lid > 127)
{
localmem_nonzero[lid - 128] = nonzero;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{
localmem_nonzero[lid] = nonzero + localmem_nonzero[lid];
}
barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
{
if(lid < lsize)
{
for (int grain = groupnum << 8; id < elemnum; id += grain)
{
idx = offset + id + (id / cols) * invalid_cols;
nonzero += src[idx] == zero ? zero : one;
}
if (lid > 127)
localmem_nonzero[lid - 128] = nonzero;
barrier(CLK_LOCAL_MEM_FENCE);
if (lid < 128)
localmem_nonzero[lid] = nonzero + localmem_nonzero[lid];
barrier(CLK_LOCAL_MEM_FENCE);
for (int lsize = 64; lsize > 0; lsize >>= 1)
{
if (lid < lsize)
{
int lid2 = lsize + lid;
localmem_nonzero[lid] = localmem_nonzero[lid] + localmem_nonzero[lid2];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if( lid == 0)
{
dst[gid] = localmem_nonzero[0];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (lid == 0)
dst[gid] = localmem_nonzero[0];
}
......@@ -45,110 +45,125 @@
//
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
#define CV_PI 3.1415926535898
#define CV_2PI 2*3.1415926535898
/**************************************phase inradians**************************************/
__kernel void arithm_phase_inradians_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
__global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
__kernel void arithm_phase_inradians_D5(__global float *src1, int src1_step1, int src1_offset1,
__global float *src2, int src2_step1, int src2_offset1,
__global float *dst, int dst_step1, int dst_offset1,
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < cols && y < rows)
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step1, x + dst_offset1);
float data1 = *((__global float *)((__global char *)src1 + src1_index));
float data2 = *((__global float *)((__global char *)src2 + src2_index));
float tmp = atan2(data2,data1);
float data1 = src1[src1_index];
float data2 = src2[src2_index];
float tmp = atan2(data2, data1);
*((__global float *)((__global char *)dst + dst_index)) = tmp;
}
if (tmp < 0)
tmp += CV_2PI;
dst[dst_index] = tmp;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_phase_inradians_D6 (__global double *src1, int src1_step, int src1_offset,
__global double *src2, int src2_step, int src2_offset,
__global double *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_phase_inradians_D6(__global double *src1, int src1_step1, int src1_offset1,
__global double *src2, int src2_step1, int src2_offset1,
__global double *dst, int dst_step1, int dst_offset1,
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < cols && y < rows)
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step1, x + dst_offset1);
double data1 = *((__global double *)((__global char *)src1 + src1_index));
double data2 = *((__global double *)((__global char *)src2 + src2_index));
double data1 = src1[src1_index];
double data2 = src2[src2_index];
double tmp = atan2(data2, data1);
*((__global double *)((__global char *)dst + dst_index)) = atan2(data2,data1);
}
if (tmp < 0)
tmp += CV_2PI;
dst[dst_index] = tmp;
}
}
#endif
/**************************************phase indegrees**************************************/
__kernel void arithm_phase_indegrees_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
__global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
__kernel void arithm_phase_indegrees_D5(__global float *src1, int src1_step1, int src1_offset1,
__global float *src2, int src2_step1, int src2_offset1,
__global float *dst, int dst_step1, int dst_offset1,
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < cols && y < rows)
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step1, x + dst_offset1);
float data1 = *((__global float *)((__global char *)src1 + src1_index));
float data2 = *((__global float *)((__global char *)src2 + src2_index));
float tmp = atan2(data2,data1);
float tmp_data = 180*tmp/CV_PI;
float data1 = src1[src1_index];
float data2 = src2[src2_index];
float tmp = atan2(data2, data1);
tmp = 180 * tmp / CV_PI;
*((__global float *)((__global char *)dst + dst_index)) = tmp_data;
}
if (tmp < 0)
tmp += 360;
dst[dst_index] = tmp;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step, int src1_offset,
__global double *src2, int src2_step, int src2_offset,
__global double *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, int src1_offset1,
__global double *src2, int src2_step1, int src2_offset1,
__global double *dst, int dst_step1, int dst_offset1,
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < cols && y < rows)
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step1, x + dst_offset1);
double data1 = *((__global double *)((__global char *)src1 + src1_index));
double data2 = *((__global double *)((__global char *)src2 + src2_index));
double tmp = atan2(data2,data1);
double tmp_data = 180*tmp/CV_PI;
double data1 = src1[src1_index];
double data2 = src2[src2_index];
double tmp = atan2(src2[src2_index], src1[src1_index]);
*((__global double *)((__global char *)dst + dst_index)) = tmp_data;
}
tmp = 180 * tmp / CV_PI;
if (tmp < 0)
tmp += 360;
dst[dst_index] = tmp;
}
}
#endif
......@@ -42,6 +42,7 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
......@@ -50,51 +51,19 @@
#endif
#endif
#if defined (DOUBLE_SUPPORT)
#define DATA_TYPE double
#else
#define DATA_TYPE float
#endif
__kernel void setIdentityKernel_F1(__global float* src, int src_row, int src_col, int src_step, DATA_TYPE scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < src_col && y < src_row)
{
if(x == y)
src[y * src_step + x] = scalar;
else
src[y * src_step + x] = 0 * scalar;
}
}
__kernel void setIdentityKernel_D1(__global DATA_TYPE* src, int src_row, int src_col, int src_step, DATA_TYPE scalar)
__kernel void setIdentity(__global T * src, int src_step, int src_offset,
int cols, int rows, __global const T * scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < src_col && y < src_row)
if (x < cols && y < rows)
{
if(x == y)
src[y * src_step + x] = scalar;
else
src[y * src_step + x] = 0 * scalar;
}
}
int src_index = mad24(y, src_step, src_offset + x);
__kernel void setIdentityKernel_I1(__global int* src, int src_row, int src_col, int src_step, int scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < src_col && y < src_row)
{
if(x == y)
src[y * src_step + x] = scalar;
if (x == y)
src[src_index] = *scalar;
else
src[y * src_step + x] = 0 * scalar;
src[src_index] = 0;
}
}
......@@ -43,163 +43,62 @@
//
//M*/
/**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#define RES_TYPE double8
#define CONVERT_RES_TYPE convert_double8
#else
#define RES_TYPE float8
#define CONVERT_RES_TYPE convert_float8
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#if defined (DEPTH_0)
#define VEC_TYPE uchar8
#endif
#if defined (DEPTH_1)
#define VEC_TYPE char8
#endif
#if defined (DEPTH_2)
#define VEC_TYPE ushort8
#endif
#if defined (DEPTH_3)
#define VEC_TYPE short8
#endif
#if defined (DEPTH_4)
#define VEC_TYPE int8
#endif
#if defined (DEPTH_5)
#define VEC_TYPE float8
#endif
#if defined (DEPTH_6)
#define VEC_TYPE double8
#endif
#if defined (FUNC_TYPE_0)
#define FUNC(a,b) b += a;
#endif
#if defined (FUNC_TYPE_1)
#define FUNC(a,b) b = b + (a >= 0 ? a : -a);
#endif
#if defined (FUNC_TYPE_2)
#define FUNC(a,b) b = b + a * a;
#endif
#if defined (REPEAT_S0)
#define repeat_s(a) a = a;
#endif
#if defined (REPEAT_S1)
#define repeat_s(a) a.s0 = 0;
#endif
#if defined (REPEAT_S2)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;
#endif
#if defined (REPEAT_S3)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_S4)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;
#endif
#if defined (REPEAT_S5)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;
#endif
#if defined (REPEAT_S6)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;
#endif
#if defined (REPEAT_S7)
#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0;
#endif
#if defined (REPEAT_E0)
#define repeat_e(a) a = a;
#if defined (FUNC_SUM)
#define FUNC(a, b) b += a;
#endif
#if defined (REPEAT_E1)
#define repeat_e(a) a.s7 = 0;
#if defined (FUNC_ABS_SUM)
#define FUNC(a, b) b += a >= 0 ? a : -a;
#endif
#if defined (REPEAT_E2)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;
#if defined (FUNC_SQR_SUM)
#define FUNC(a, b) b += a * a;
#endif
#if defined (REPEAT_E3)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;
#endif
#if defined (REPEAT_E4)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;
#endif
#if defined (REPEAT_E5)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;
#endif
#if defined (REPEAT_E6)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_E7)
#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0;
#endif
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
/**************************************Array buffer SUM**************************************/
__kernel void arithm_op_sum (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global VEC_TYPE *src, __global RES_TYPE *dst)
__kernel void arithm_op_sum(int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global srcT *src, __global dstT *dst)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols;
__local RES_TYPE localmem_sum[128];
RES_TYPE sum = 0,temp;
if(id < elemnum)
{
temp = CONVERT_RES_TYPE(src[idx]);
if(id % cols == 0 )
{
repeat_s(temp);
}
if(id % cols == cols - 1)
{
repeat_e(temp);
}
FUNC(temp,sum);
}
else
{
sum = 0;
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
__local dstT localmem_sum[128];
dstT sum = (dstT)(0), temp;
for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
{
idx = offset + id + (id / cols) * invalid_cols;
temp = CONVERT_RES_TYPE(src[idx]);
if(id % cols == 0 )
{
repeat_s(temp);
}
if(id % cols == cols - 1)
{
repeat_e(temp);
}
FUNC(temp,sum);
temp = convertToDstT(src[idx]);
FUNC(temp, sum);
}
if(lid > 127)
{
if (lid > 127)
localmem_sum[lid - 128] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{
if (lid < 128)
localmem_sum[lid] = sum + localmem_sum[lid];
}
barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
for (int lsize = 64; lsize > 0; lsize >>= 1)
{
if(lid < lsize)
if (lid < lsize)
{
int lid2 = lsize + lid;
localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if( lid == 0)
{
if (lid == 0)
dst[gid] = localmem_sum[0];
}
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Shengen Yan,yanshengen@gmail.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#define RES_TYPE double4
#define CONVERT_RES_TYPE convert_double4
#else
#define RES_TYPE float4
#define CONVERT_RES_TYPE convert_float4
#endif
#if defined (DEPTH_0)
#define VEC_TYPE uchar4
#endif
#if defined (DEPTH_1)
#define VEC_TYPE char4
#endif
#if defined (DEPTH_2)
#define VEC_TYPE ushort4
#endif
#if defined (DEPTH_3)
#define VEC_TYPE short4
#endif
#if defined (DEPTH_4)
#define VEC_TYPE int4
#endif
#if defined (DEPTH_5)
#define VEC_TYPE float4
#endif
#if defined (DEPTH_6)
#define VEC_TYPE double4
#endif
#if defined (FUNC_TYPE_0)
#define FUNC(a,b) b += a;
#endif
#if defined (FUNC_TYPE_1)
#define FUNC(a,b) b = b + (a >= 0 ? a : -a);
#endif
#if defined (FUNC_TYPE_2)
#define FUNC(a,b) b = b + a * a;
#endif
#if defined (REPEAT_S0)
#define repeat_s(a,b,c) a=a; b =b; c=c;
#endif
#if defined (REPEAT_S1)
#define repeat_s(a,b,c) a.s0=0; b=b; c=c;
#endif
#if defined (REPEAT_S2)
#define repeat_s(a,b,c) a.s0=0; a.s1=0; b=b; c=c;
#endif
#if defined (REPEAT_S3)
#define repeat_s(a,b,c) a.s0=0; a.s1=0; a.s2=0; b=b; c=c;
#endif
#if defined (REPEAT_S4)
#define repeat_s(a,b,c) a=0;b=b; c=c;
#endif
#if defined (REPEAT_S5)
#define repeat_s(a,b,c) a=0; b.s0=0;c=c;
#endif
#if defined (REPEAT_S6)
#define repeat_s(a,b,c) a=0; b.s0=0; b.s1=0; c=c;
#endif
#if defined (REPEAT_S7)
#define repeat_s(a,b,c) a=0; b.s0=0; b.s1=0; b.s2=0; c=c;
#endif
#if defined (REPEAT_S8)
#define repeat_s(a,b,c) a=0; b=0; c=c;
#endif
#if defined (REPEAT_S9)
#define repeat_s(a,b,c) a=0; b=0; c.s0=0;
#endif
#if defined (REPEAT_S10)
#define repeat_s(a,b,c) a=0; b=0; c.s0=0; c.s1=0;
#endif
#if defined (REPEAT_S11)
#define repeat_s(a,b,c) a=0; b=0; c.s0=0; c.s1=0; c.s2=0;
#endif
#if defined (REPEAT_E0)
#define repeat_e(a,b,c) a=a; b =b; c=c;
#endif
#if defined (REPEAT_E1)
#define repeat_e(a,b,c) a=a; b=b; c.s3=0;
#endif
#if defined (REPEAT_E2)
#define repeat_e(a,b,c) a=a; b=b; c.s3=0; c.s2=0;
#endif
#if defined (REPEAT_E3)
#define repeat_e(a,b,c) a=a; b=b; c.s3=0; c.s2=0; c.s1=0;
#endif
#if defined (REPEAT_E4)
#define repeat_e(a,b,c) a=a; b=b; c=0;
#endif
#if defined (REPEAT_E5)
#define repeat_e(a,b,c) a=a; b.s3=0; c=0;
#endif
#if defined (REPEAT_E6)
#define repeat_e(a,b,c) a=a; b.s3=0; b.s2=0; c=0;
#endif
#if defined (REPEAT_E7)
#define repeat_e(a,b,c) a=a; b.s3=0; b.s2=0; b.s1=0; c=0;
#endif
#if defined (REPEAT_E8)
#define repeat_e(a,b,c) a=a; b=0; c=0;
#endif
#if defined (REPEAT_E9)
#define repeat_e(a,b,c) a.s3=0; b=0; c=0;
#endif
#if defined (REPEAT_E10)
#define repeat_e(a,b,c) a.s3=0; a.s2=0; b=0; c=0;
#endif
#if defined (REPEAT_E11)
#define repeat_e(a,b,c) a.s3=0; a.s2=0; a.s1=0; b=0; c=0;
#endif
__kernel void arithm_op_sum_3 (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__global VEC_TYPE *src, __global RES_TYPE *dst)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols;
idx = idx * 3;
__local RES_TYPE localmem_sum1[128];
__local RES_TYPE localmem_sum2[128];
__local RES_TYPE localmem_sum3[128];
RES_TYPE sum1 = 0,sum2 = 0,sum3 = 0,temp1,temp2,temp3;
if(id < elemnum)
{
temp1 = CONVERT_RES_TYPE(src[idx]);
temp2 = CONVERT_RES_TYPE(src[idx+1]);
temp3 = CONVERT_RES_TYPE(src[idx+2]);
if(id % cols == 0 )
{
repeat_s(temp1,temp2,temp3);
}
if(id % cols == cols - 1)
{
repeat_e(temp1,temp2,temp3);
}
FUNC(temp1,sum1);
FUNC(temp2,sum2);
FUNC(temp3,sum3);
}
else
{
sum1 = 0;
sum2 = 0;
sum3 = 0;
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
{
idx = offset + id + (id / cols) * invalid_cols;
idx = idx * 3;
temp1 = CONVERT_RES_TYPE(src[idx]);
temp2 = CONVERT_RES_TYPE(src[idx+1]);
temp3 = CONVERT_RES_TYPE(src[idx+2]);
if(id % cols == 0 )
{
repeat_s(temp1,temp2,temp3);
}
if(id % cols == cols - 1)
{
repeat_e(temp1,temp2,temp3);
}
FUNC(temp1,sum1);
FUNC(temp2,sum2);
FUNC(temp3,sum3);
}
if(lid > 127)
{
localmem_sum1[lid - 128] = sum1;
localmem_sum2[lid - 128] = sum2;
localmem_sum3[lid - 128] = sum3;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{
localmem_sum1[lid] = sum1 + localmem_sum1[lid];
localmem_sum2[lid] = sum2 + localmem_sum2[lid];
localmem_sum3[lid] = sum3 + localmem_sum3[lid];
}
barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
{
if(lid < lsize)
{
int lid2 = lsize + lid;
localmem_sum1[lid] = localmem_sum1[lid] + localmem_sum1[lid2];
localmem_sum2[lid] = localmem_sum2[lid] + localmem_sum2[lid2];
localmem_sum3[lid] = localmem_sum3[lid] + localmem_sum3[lid2];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if( lid == 0)
{
dst[gid*3] = localmem_sum1[0];
dst[gid*3+1] = localmem_sum2[0];
dst[gid*3+2] = localmem_sum3[0];
}
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment