Commit 06a4bad8 authored by peng xiao's avatar peng xiao

Merge ocl and/or/xor operators into one kernel each.

parent abe2ea59
This diff is collapsed.
This diff is collapsed.
......@@ -16,6 +16,7 @@
//
// @Authors
// Jiang Liyuan, jlyuan001.good@163.com
// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
......@@ -50,11 +51,17 @@
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_and without mask**************************************/
__kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int src1_offset,
//bitwise_binary without mask for and, or, xor operators
/////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary///////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////
#ifndef OP_BINARY
#define OP_BINARY &
#endif
__kernel void arithm_bitwise_binary_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
......@@ -95,7 +102,7 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
}
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = src1_data & src2_data;
uchar4 tmp_data = src1_data OP_BINARY src2_data;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
......@@ -107,7 +114,7 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
}
__kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_binary_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
......@@ -148,7 +155,7 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
char4 dst_data = *((__global char4 *)(dst + dst_index));
char4 tmp_data = src1_data & src2_data;
char4 tmp_data = src1_data OP_BINARY src2_data;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
......@@ -160,7 +167,7 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
}
__kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_binary_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
......@@ -202,7 +209,7 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
ushort4 tmp_data = src1_data & src2_data;
ushort4 tmp_data = src1_data OP_BINARY src2_data;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
......@@ -215,7 +222,7 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
__kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_binary_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
......@@ -257,7 +264,7 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
short4 tmp_data = src1_data & src2_data;
short4 tmp_data = src1_data OP_BINARY src2_data;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
......@@ -270,7 +277,7 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
__kernel void arithm_bitwise_and_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_binary_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
......@@ -286,13 +293,13 @@ __kernel void arithm_bitwise_and_D4 (__global int *src1, int src1_step, int src1
int data1 = *((__global int *)((__global char *)src1 + src1_index));
int data2 = *((__global int *)((__global char *)src2 + src2_index));
int tmp = data1 & data2;
int tmp = data1 OP_BINARY data2;
*((__global int *)((__global char *)dst + dst_index)) = tmp;
}
}
__kernel void arithm_bitwise_and_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_binary_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
......@@ -308,14 +315,14 @@ __kernel void arithm_bitwise_and_D5 (__global char *src1, int src1_step, int src
char4 data1 = *((__global char4 *)((__global char *)src1 + src1_index));
char4 data2 = *((__global char4 *)((__global char *)src2 + src2_index));
char4 tmp = data1 & data2;
char4 tmp = data1 OP_BINARY data2;
*((__global char4 *)((__global char *)dst + dst_index)) = tmp;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_and_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_binary_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
......@@ -332,7 +339,7 @@ __kernel void arithm_bitwise_and_D6 (__global char *src1, int src1_step, int src
char8 data1 = *((__global char8 *)((__global char *)src1 + src1_index));
char8 data2 = *((__global char8 *)((__global char *)src2 + src2_index));
*((__global char8 *)((__global char *)dst + dst_index)) = data1 & data2;
*((__global char8 *)((__global char *)dst + dst_index)) = data1 OP_BINARY data2;
}
}
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment