Commit c5fc8e03 authored by Li Peng's avatar Li Peng

cleanup unnecessary macros in convolution ocl kernel

Signed-off-by: 's avatarLi Peng <peng.li@intel.com>
parent 0aa5e43a
...@@ -257,11 +257,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType, ...@@ -257,11 +257,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef("INPUT_DEPTH", channels_ / group_); addDef("INPUT_DEPTH", channels_ / group_);
addDef("TOTAL_INPUT_DEPTH_SIZE", channels_); addDef("TOTAL_INPUT_DEPTH_SIZE", channels_);
addDef("TOTAL_OUTPUT_DEPTH", num_output_); addDef("TOTAL_OUTPUT_DEPTH", num_output_);
addDef("INPUT_START_X", 0);
addDef("INPUT_START_Y", 0);
addDef("INPUT_START_Z", 0);
addDef("NUM_FILTERS", M_); addDef("NUM_FILTERS", M_);
addDef("OUT_BUFF_OFFSET", 0);
addDef("TILE_X", tile_x); addDef("TILE_X", tile_x);
addDef("TILE_Y", tile_y); addDef("TILE_Y", tile_y);
addDef("TILE_Y_STRIDE", tile_y_stride); addDef("TILE_Y_STRIDE", tile_y_stride);
......
...@@ -189,10 +189,8 @@ __kernel void ConvolveBasic( ...@@ -189,10 +189,8 @@ __kernel void ConvolveBasic(
// NDRange: (output_width+pad)/ OUT_BLOCK_WIDTH, (output_height+pad)/OUT_BLOCK_HEIGHT, NUM_FILTERS/OUT_BLOCK_DEPTH // NDRange: (output_width+pad)/ OUT_BLOCK_WIDTH, (output_height+pad)/OUT_BLOCK_HEIGHT, NUM_FILTERS/OUT_BLOCK_DEPTH
// NOTE: for beignet this reqd_work_group_size does not guarantee that SIMD16 mode will be used, the compiler could choose to use two SIMD8 threads, and if that happens the code will break. // NOTE: for beignet this reqd_work_group_size does not guarantee that SIMD16 mode will be used, the compiler could choose to use two SIMD8 threads, and if that happens the code will break.
#ifndef __BEIGNET__
__attribute__((reqd_work_group_size(1, 1, SIMD_SIZE))) __attribute__((reqd_work_group_size(1, 1, SIMD_SIZE)))
__attribute__((intel_reqd_sub_group_size(SIMD_SIZE))) __attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
#endif
__kernel void __kernel void
convolve_simd( convolve_simd(
ELTWISE_DATA_ARG ELTWISE_DATA_ARG
...@@ -232,12 +230,12 @@ convolve_simd( ...@@ -232,12 +230,12 @@ convolve_simd(
int curr_local_y = ( lid / ( TILE_X / 4 ) ); int curr_local_y = ( lid / ( TILE_X / 4 ) );
int curr_local_x = ( lid % ( TILE_X / 4 ) ) * 4; int curr_local_x = ( lid % ( TILE_X / 4 ) ) * 4;
int curr_y = or * STRIDE_Y + INPUT_START_Y + curr_local_y; int curr_y = or * STRIDE_Y + curr_local_y;
int curr_x = oc * STRIDE_X + INPUT_START_X + curr_local_x; int curr_x = oc * STRIDE_X + curr_local_x;
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 #if INPUT_PAD_W != 0 || INPUT_PAD_H != 0
int saved_y = curr_y; int saved_y = curr_y;
#endif #endif
in_addr = input_batch_offset + INPUT_START_Z * input_height * input_width in_addr = input_batch_offset
+ (curr_y - INPUT_PAD_H) * input_width // y tile offset + (curr_y - INPUT_PAD_H) * input_width // y tile offset
+ curr_x - INPUT_PAD_W; // x tile offset + curr_x - INPUT_PAD_W; // x tile offset
union { union {
...@@ -363,7 +361,7 @@ convolve_simd( ...@@ -363,7 +361,7 @@ convolve_simd(
fm = fm % ALIGNED_NUM_FILTERS; fm = fm % ALIGNED_NUM_FILTERS;
if ((ALIGNED_NUM_FILTERS == NUM_FILTERS || fm < NUM_FILTERS)) { if ((ALIGNED_NUM_FILTERS == NUM_FILTERS || fm < NUM_FILTERS)) {
unsigned int out_addr = OUT_BUFF_OFFSET + ( num_in_batch * TOTAL_OUTPUT_DEPTH + fm ) * output_width * output_height; unsigned int out_addr = ( num_in_batch * TOTAL_OUTPUT_DEPTH + fm ) * output_width * output_height;
out_addr += or * output_width + oc; out_addr += or * output_width + oc;
// we need this address calculation for biases because we support views and batching // we need this address calculation for biases because we support views and batching
#if APPLY_BIAS #if APPLY_BIAS
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment