Commit 91ac9688 authored by Andrey Kamaev's avatar Andrey Kamaev

Allow OpenCL acceleration in every OpenCV module

parent d28df08e
......@@ -782,7 +782,7 @@ if(HAVE_CUDA)
status(" Use fast math:" CUDA_FAST_MATH THEN YES ELSE NO)
endif()
if(HAVE_OPENCL AND BUILD_opencv_ocl)
if(HAVE_OPENCL)
status("")
status(" OpenCL")
if(OPENCL_INCLUDE_DIR)
......
......@@ -432,10 +432,22 @@ macro(ocv_glob_module_sources)
file(GLOB lib_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
file(GLOB lib_hdrs_detail "include/opencv2/${name}/detail/*.hpp" "include/opencv2/${name}/detail/*.h")
file(GLOB cl_kernels "src/opencl/*.cl")
source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
source_group("Include" FILES ${lib_hdrs})
source_group("Include\\detail" FILES ${lib_hdrs_detail})
if(HAVE_OPENCL AND cl_kernels)
ocv_include_directories(${OPENCL_INCLUDE_DIRS})
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp"
COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake")
source_group("Src\\OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
endif()
ocv_set_module_sources(${ARGN} HEADERS ${lib_hdrs} ${lib_hdrs_detail} SOURCES ${lib_srcs} ${lib_int_hdrs})
endmacro()
......@@ -449,6 +461,9 @@ macro(ocv_create_module)
if(NOT "${ARGN}" STREQUAL "SKIP_LINK")
target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN})
if(HAVE_OPENCL AND OPENCL_LIBRARIES)
target_link_libraries(${the_module} ${OPENCL_LIBRARIES})
endif()
endif()
add_dependencies(opencv_modules ${the_module})
......
......@@ -3,45 +3,5 @@ if(NOT HAVE_OPENCL)
endif()
set(the_description "OpenCL-accelerated Computer Vision")
ocv_add_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_nonfree)
ocv_module_include_directories(${OPENCL_INCLUDE_DIRS})
file(GLOB CL_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/*.cl")
set(kernels_cpp "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
set(cl2cpp_script "${CMAKE_CURRENT_SOURCE_DIR}/cl2cpp.cmake")
add_custom_command(
OUTPUT ${kernels_cpp}
COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/kernels" -DOUTPUT="${kernels_cpp}" -P ${cl2cpp_script}
DEPENDS ${CL_FILES} ${cl2cpp_script})
file(GLOB lib_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
file(GLOB lib_srcs "src/*.cpp")
file(GLOB lib_int_hdrs "src/*.h*")
source_group("Include" FILES ${lib_hdrs})
source_group("Src\\Host" FILES ${lib_srcs} ${lib_int_hdrs} ${kernels_cpp})
ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_nonfree)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
ocv_set_module_sources(HEADERS ${lib_hdrs} SOURCES ${lib_int_hdrs} ${lib_srcs} ${kernels_cpp})
ocv_create_module(${OPENCL_LIBRARIES})
ocv_add_precompiled_headers(${the_module})
################################################################################################################
################################ OpenCL Module Tests ##################################################
################################################################################################################
file(GLOB test_srcs "test/*.cpp")
file(GLOB test_hdrs "test/*.hpp" "test/*.h")
ocv_add_accuracy_tests(FILES "Include" ${test_hdrs}
FILES "Src" ${test_srcs})
################################################################################################################
################################ OpenCL Module Performance ##################################################
################################################################################################################
file(GLOB perf_srcs "perf/*.cpp")
file(GLOB perf_hdrs "perf/*.hpp" "perf/*.h")
ocv_add_perf_tests(FILES "Include" ${perf_hdrs}
FILES "Src" ${perf_srcs})
This diff is collapsed.
......@@ -61,29 +61,29 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
uchar4 src1_data ,src2_data;
uchar4 src1_data ,src2_data;
src1_data.x= src1_index+0 >= 0 ? src1[src1_index+0] : 0;
src1_data.y= src1_index+1 >= 0 ? src1[src1_index+1] : 0;
src1_data.z= src1_index+2 >= 0 ? src1[src1_index+2] : 0;
src1_data.w= src1_index+3 >= 0 ? src1[src1_index+3] : 0;
src1_data.x= src1_index+0 >= 0 ? src1[src1_index+0] : 0;
src1_data.y= src1_index+1 >= 0 ? src1[src1_index+1] : 0;
src1_data.z= src1_index+2 >= 0 ? src1[src1_index+2] : 0;
src1_data.w= src1_index+3 >= 0 ? src1[src1_index+3] : 0;
src2_data.x= src2_index+0 >= 0 ? src2[src2_index+0] : 0;
src2_data.y= src2_index+1 >= 0 ? src2[src2_index+1] : 0;
src2_data.z= src2_index+2 >= 0 ? src2[src2_index+2] : 0;
src2_data.w= src2_index+3 >= 0 ? src2[src2_index+3] : 0;
src2_data.x= src2_index+0 >= 0 ? src2[src2_index+0] : 0;
src2_data.y= src2_index+1 >= 0 ? src2[src2_index+1] : 0;
src2_data.z= src2_index+2 >= 0 ? src2[src2_index+2] : 0;
src2_data.w= src2_index+3 >= 0 ? src2[src2_index+3] : 0;
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
// short4 tmp = convert_short4_sat(src1_data) * alpha + convert_short4_sat(src2_data) * beta + gama;
......@@ -117,14 +117,14 @@ __kernel void addWeighted_D2 (__global ushort *src1, int src1_step,int src1_offs
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -177,14 +177,14 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -236,18 +236,18 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
#define bitOfInt (sizeof(int)== 4 ? 2: 3)
#define dst_align ((dst_offset >> bitOfInt) & 3)
int src1_index = mad24(y, src1_step, (x << bitOfInt) + src1_offset - (dst_align << bitOfInt));
int src2_index = mad24(y, src2_step, (x << bitOfInt) + src2_offset - (dst_align << bitOfInt));
int src1_index = mad24(y, src1_step, (x << bitOfInt) + src1_offset - (dst_align << bitOfInt));
int src2_index = mad24(y, src2_step, (x << bitOfInt) + src2_offset - (dst_align << bitOfInt));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << bitOfInt) -(dst_align << bitOfInt));
......@@ -256,7 +256,7 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index_fix));
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index_fix));
if(src1_index < 0)
{
int4 tmp;
......@@ -299,16 +299,16 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
......@@ -361,16 +361,16 @@ __kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offs
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 3) -(dst_align << 3));
......
......@@ -63,8 +63,8 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
x = x << 2;
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -75,14 +75,14 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
if(src1_index < 0)
{
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
uchar4 tmp;
}
if(src2_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
......@@ -113,8 +113,8 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
x = x << 2;
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -126,14 +126,14 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
char4 src2_data = vload4(0, src2 + src2_index_fix);
if(src1_index < 0)
{
{
char4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
char4 tmp;
}
if(src2_index < 0)
{
char4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
......@@ -164,8 +164,8 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -177,14 +177,14 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
if(src1_index < 0)
{
{
ushort4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
ushort4 tmp;
}
if(src2_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
......@@ -216,8 +216,8 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -229,14 +229,14 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix));
if(src1_index < 0)
{
{
short4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
short4 tmp;
}
if(src2_index < 0)
{
short4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
......@@ -320,4 +320,3 @@ __kernel void arithm_bitwise_and_D6 (__global char *src1, int src1_step, int src
}
}
#endif
......@@ -1135,4 +1135,3 @@ __kernel void arithm_bitwise_and_with_mask_C4_D6 (__global char *src1, int src1_
}
}
#endif
......@@ -1055,4 +1055,3 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D6 (__global short *src1, int sr
}
}
#endif
......@@ -62,7 +62,7 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
x = x << 2;
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -72,7 +72,7 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = ~ src1_data;
/* if(src1_index < 0)
{
uchar4 tmp;
......@@ -102,7 +102,7 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src
x = x << 2;
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -136,7 +136,7 @@ __kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int s
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -171,7 +171,7 @@ __kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int sr
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -245,14 +245,13 @@ __kernel void arithm_bitwise_not_D6 (__global char *src, int src_step, int src_o
{
int src_index = mad24(y, src_step, (x << 3) + src_offset);
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
char8 data;
data = *((__global char8 *)((__global char *)src + src_index));
data = ~ data;
*((__global char8 *)((__global char *)dst + dst_index)) = data;
}
}
#endif
......@@ -63,8 +63,8 @@ __kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src
x = x << 2;
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -111,8 +111,8 @@ __kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1
x = x << 2;
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -148,8 +148,8 @@ __kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int sr
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -186,8 +186,8 @@ __kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -276,4 +276,3 @@ __kernel void arithm_bitwise_or_D6 (__global char *src1, int src1_step, int src1
}
}
#endif
......@@ -1135,4 +1135,3 @@ __kernel void arithm_bitwise_or_with_mask_C4_D6 (__global char *src1, int src1_s
}
}
#endif
......@@ -911,4 +911,3 @@ __kernel void arithm_s_bitwise_or_C4_D6 (__global short *src1, int src1_step, in
}
}
#endif
......@@ -1078,4 +1078,3 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D6 (__global short *src1, int src
}
}
#endif
......@@ -63,8 +63,8 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr
x = x << 2;
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -76,14 +76,14 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
if(src1_index < 0)
{
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
uchar4 tmp;
}
if(src2_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
......@@ -113,8 +113,8 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src
x = x << 2;
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -126,14 +126,14 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src
char4 src2_data = vload4(0, src2 + src2_index_fix);
if(src1_index < 0)
{
{
char4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
char4 tmp;
}
if(src2_index < 0)
{
char4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
......@@ -164,8 +164,8 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -177,14 +177,14 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
if(src1_index < 0)
{
{
ushort4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
ushort4 tmp;
}
if(src2_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
......@@ -216,8 +216,8 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
......@@ -231,14 +231,14 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
if(src1_index < 0)
{
{
short4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
short4 tmp;
}
if(src2_index < 0)
{
short4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
......@@ -324,4 +324,3 @@ __kernel void arithm_bitwise_xor_D6 (__global char *src1, int src1_step, int src
}
}
#endif
......@@ -1135,4 +1135,3 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D6 (__global char *src1, int src1_
}
}
#endif
......@@ -1055,4 +1055,3 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D6 (__global short *src1, int sr
}
}
#endif
......@@ -455,5 +455,3 @@ __kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offse
}
}
#endif
......@@ -60,17 +60,17 @@ __kernel void magnitudeSqr_C1_D5 (__global float *src1,int src1_step,int src1_of
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
......@@ -125,16 +125,16 @@ __kernel void magnitudeSqr_C2_D5 (__global float *src1,int src1_step,int src1_of
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
......@@ -148,8 +148,8 @@ __kernel void magnitudeSqr_C2_D5 (__global float *src1,int src1_step,int src1_of
src1_data.s01234567 = src1_data.s45670123;
if(src1_index== -2)
src1_data.s01234567 = src1_data.s23456701;
float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
......
......@@ -240,4 +240,3 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(lm_maxloc[0]);
}
}
......@@ -194,4 +194,3 @@ __kernel void arithm_op_minMax_mask (int cols,int invalid_cols,int offset,int el
dst[gid + groupnum] = localmem_max[0];
}
}
......@@ -203,4 +203,3 @@ __kernel void arithm_op_sum (int cols,int invalid_cols,int offset,int elemnum,in
dst[gid] = localmem_sum[0];
}
}
......@@ -245,4 +245,3 @@ __kernel void arithm_op_sum_3 (int cols,int invalid_cols,int offset,int elemnum,
dst[gid*3+2] = localmem_sum3[0];
}
}
......@@ -15,7 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// Liu Liujun, liujun@multicorewareinc.com
// Liu Liujun, liujun@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
......@@ -61,7 +61,7 @@ __kernel void BlendLinear_C1_D0(
int pos = mad24(idy,istep >> 2,idx);
int wpos = mad24(idy,wstep >> 2,idx);
float4 w1 = weight1[wpos], w2 = weight2[wpos];
dst[pos] = convert_uchar4((convert_float4(img1[pos]) * w1 +
dst[pos] = convert_uchar4((convert_float4(img1[pos]) * w1 +
convert_float4(img2[pos]) * w2) / (w1 + w2 + 1e-5f));
}
}
......@@ -86,7 +86,7 @@ __kernel void BlendLinear_C4_D0(
int wpos = mad24(idy,wstep, idx);
float w1 = weight1[wpos];
float w2 = weight2[wpos];
dst[pos] = convert_uchar4((convert_float4(img1[pos]) * w1 +
dst[pos] = convert_uchar4((convert_float4(img1[pos]) * w1 +
convert_float4(img2[pos]) * w2) / (w1 + w2 + 1e-5f));
}
}
......@@ -138,4 +138,3 @@ __kernel void BlendLinear_C4_D5(
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
}
}
This diff is collapsed.
......@@ -234,4 +234,3 @@ __kernel
map_y[y * step_y + x] = ycoo;
}
}
......@@ -466,5 +466,3 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
dst[start_addr] = sum;
}
}
......@@ -283,4 +283,3 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
newnode[counter].alpha[0] = t1.alpha[0];
newnode[counter].alpha[1] = t1.alpha[1];
}
......@@ -107,5 +107,3 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
dst[gy*(dst_step >> 2)+gx] = res;
}
}
......@@ -267,4 +267,3 @@ __kernel __attribute__((reqd_work_group_size(256,1,1)))void equalizeHist(
}
}
*/
......@@ -484,4 +484,3 @@ __kernel void medianFilter5_C1_D5(__global float * src, __global float * dst, i
dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p12;
}
#undef op(a,b)
......@@ -411,4 +411,3 @@ __kernel void resizeNN_C4_D5(__global float4 * dst, __global float4 * src,
dst[dpos] = src[spos];
}
......@@ -150,4 +150,3 @@ __kernel void threshold_C1_D5(__global const float * restrict src, __global floa
}
}
}
......@@ -682,4 +682,3 @@ __kernel void warpPerspectiveCubic_C4_D5(__global float4 * src, __global float4
}
}
}
......@@ -821,4 +821,3 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
res[res_idx] = normAcc(num, denum);
}
}
......@@ -240,4 +240,3 @@ __kernel void meanshiftproc_kernel( __global uchar4* in, __global uchar4* outr,
// outsp[basesp] =(short2)((short)x0,(short)y0);
}
}
......@@ -27,7 +27,7 @@ typedef long T;
#define DST_ROW_A03 9
__kernel void icvContourMoments(int contour_total,
__global float* reader_oclmat_data,
__global float* reader_oclmat_data,
__global T* dst_a,
int dst_step)
{
......@@ -58,7 +58,7 @@ __kernel void icvContourMoments(int contour_total,
dxy = xi_1 * yi - xi * yi_1;
xii_1 = xi_1 + xi;
yii_1 = yi_1 + yi;
dst_step /= sizeof(T);
*( dst_a + DST_ROW_A00 * dst_step + idx) = dxy;
*( dst_a + DST_ROW_A10 * dst_step + idx) = dxy * xii_1;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment