Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
17ffb288
Commit
17ffb288
authored
Nov 09, 2016
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #7602 from mshabunin:fix-opencl-warnings
parents
f1d93cb2
3e28d517
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
70 additions
and
60 deletions
+70
-60
canny.cl
modules/imgproc/src/opencl/canny.cl
+2
-2
cvtcolor.cl
modules/imgproc/src/opencl/cvtcolor.cl
+40
-32
hough_lines.cl
modules/imgproc/src/opencl/hough_lines.cl
+3
-3
integral_sum.cl
modules/imgproc/src/opencl/integral_sum.cl
+1
-1
match_template.cl
modules/imgproc/src/opencl/match_template.cl
+4
-4
pyr_down.cl
modules/imgproc/src/opencl/pyr_down.cl
+2
-0
remap.cl
modules/imgproc/src/opencl/remap.cl
+2
-2
warp_affine.cl
modules/imgproc/src/opencl/warp_affine.cl
+6
-6
cascadedetect.cl
modules/objdetect/src/opencl/cascadedetect.cl
+8
-8
optical_flow_tvl1.cl
modules/video/src/opencl/optical_flow_tvl1.cl
+1
-1
pyrlk.cl
modules/video/src/opencl/pyrlk.cl
+1
-1
No files found.
modules/imgproc/src/opencl/canny.cl
View file @
17ffb288
...
...
@@ -82,10 +82,10 @@ inline float3 sobel(int idx, __local const floatN *smem)
// result: x, y, mag
float3 res;
floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4],
floatN dx = fma(
(floatN)
2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4],
smem[idx + 2] - smem[idx] + smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]);
floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9],
floatN dy = fma(
(floatN)
2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9],
smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]);
#ifdef L2GRAD
...
...
modules/imgproc/src/opencl/cvtcolor.cl
View file @
17ffb288
...
...
@@ -49,21 +49,21 @@
#
if
depth
==
0
#
define
DATA_TYPE
uchar
#
define
MAX_NUM
255
#
define
HALF_MAX
128
#
define
HALF_MAX
_NUM
128
#
define
COEFF_TYPE
int
#
define
SAT_CAST
(
num
)
convert_uchar_sat
(
num
)
#
define
DEPTH_0
#
elif
depth
==
2
#
define
DATA_TYPE
ushort
#
define
MAX_NUM
65535
#
define
HALF_MAX
32768
#
define
HALF_MAX
_NUM
32768
#
define
COEFF_TYPE
int
#
define
SAT_CAST
(
num
)
convert_ushort_sat
(
num
)
#
define
DEPTH_2
#
elif
depth
==
5
#
define
DATA_TYPE
float
#
define
MAX_NUM
1.0f
#
define
HALF_MAX
0.5f
#
define
HALF_MAX
_NUM
0.5f
#
define
COEFF_TYPE
float
#
define
SAT_CAST
(
num
)
(
num
)
#
define
DEPTH_5
...
...
@@ -229,11 +229,11 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset
#ifdef DEPTH_5
__constant float * coeffs = c_RGB2YUVCoeffs_f;
const DATA_TYPE Y = fma(b, coeffs[0], fma(g, coeffs[1], r * coeffs[2]));
const DATA_TYPE U = fma(b - Y, coeffs[3], HALF_MAX);
const DATA_TYPE V = fma(r - Y, coeffs[4], HALF_MAX);
const DATA_TYPE U = fma(b - Y, coeffs[3], HALF_MAX
_NUM
);
const DATA_TYPE V = fma(r - Y, coeffs[4], HALF_MAX
_NUM
);
#else
__constant int * coeffs = c_RGB2YUVCoeffs_i;
const int delta = HALF_MAX * (1 << yuv_shift);
const int delta = HALF_MAX
_NUM
* (1 << yuv_shift);
const int Y = CV_DESCALE(mad24(b, coeffs[0], mad24(g, coeffs[1], mul24(r, coeffs[2]))), yuv_shift);
const int U = CV_DESCALE(mad24(b - Y, coeffs[3], delta), yuv_shift);
const int V = CV_DESCALE(mad24(r - Y, coeffs[4], delta), yuv_shift);
...
...
@@ -278,14 +278,14 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset
#ifdef DEPTH_5
__constant float * coeffs = c_YUV2RGBCoeffs_f;
float r = fma(V - HALF_MAX, coeffs[3], Y);
float g = fma(V - HALF_MAX
, coeffs[2], fma(U - HALF_MAX
, coeffs[1], Y));
float b = fma(U - HALF_MAX, coeffs[0], Y);
float r = fma(V - HALF_MAX
_NUM
, coeffs[3], Y);
float g = fma(V - HALF_MAX
_NUM, coeffs[2], fma(U - HALF_MAX_NUM
, coeffs[1], Y));
float b = fma(U - HALF_MAX
_NUM
, coeffs[0], Y);
#else
__constant int * coeffs = c_YUV2RGBCoeffs_i;
const int r = Y + CV_DESCALE(mul24(V - HALF_MAX, coeffs[3]), yuv_shift);
const int g = Y + CV_DESCALE(mad24(V - HALF_MAX
, coeffs[2], mul24(U - HALF_MAX
, coeffs[1])), yuv_shift);
const int b = Y + CV_DESCALE(mul24(U - HALF_MAX, coeffs[0]), yuv_shift);
const int r = Y + CV_DESCALE(mul24(V - HALF_MAX
_NUM
, coeffs[3]), yuv_shift);
const int g = Y + CV_DESCALE(mad24(V - HALF_MAX
_NUM, coeffs[2], mul24(U - HALF_MAX_NUM
, coeffs[1])), yuv_shift);
const int b = Y + CV_DESCALE(mul24(U - HALF_MAX
_NUM
, coeffs[0]), yuv_shift);
#endif
dst[bidx] = SAT_CAST( b );
...
...
@@ -328,8 +328,8 @@ __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_of
float Y3 = ysrc[src_step];
float Y4 = ysrc[src_step + 1];
float U = ((float)usrc[uidx]) - HALF_MAX;
float V = ((float)usrc[1-uidx]) - HALF_MAX;
float U = ((float)usrc[uidx]) - HALF_MAX
_NUM
;
float V = ((float)usrc[1-uidx]) - HALF_MAX
_NUM
;
__constant float* coeffs = c_YUV2RGBCoeffs_420;
float ruv = fma(coeffs[4], V, 0.5f);
...
...
@@ -373,6 +373,8 @@ __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_of
}
}
#if uidx < 2
__kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset,
int rows, int cols)
...
...
@@ -399,12 +401,12 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int
#ifdef SRC_CONT
__global const uchar* uvsrc = srcptr + mad24(rows, src_step, src_offset);
int u_ind = mad24(y, cols >> 1, x);
float uv[2] = { ((float)uvsrc[u_ind]) - HALF_MAX
, ((float)uvsrc[u_ind + ((rows * cols) >> 2)]) - HALF_MAX
};
float uv[2] = { ((float)uvsrc[u_ind]) - HALF_MAX
_NUM, ((float)uvsrc[u_ind + ((rows * cols) >> 2)]) - HALF_MAX_NUM
};
#else
int vsteps[2] = { cols >> 1, src_step - (cols >> 1)};
__global const uchar* usrc = srcptr + mad24(rows + (y>>1), src_step, src_offset + (y%2)*(cols >> 1) + x);
__global const uchar* vsrc = usrc + mad24(rows >> 2, src_step, rows % 4 ? vsteps[y%2] : 0);
float uv[2] = { ((float)usrc[0]) - HALF_MAX
, ((float)vsrc[0]) - HALF_MAX
};
float uv[2] = { ((float)usrc[0]) - HALF_MAX
_NUM, ((float)vsrc[0]) - HALF_MAX_NUM
};
#endif
float U = uv[uidx];
float V = uv[1-uidx];
...
...
@@ -451,6 +453,10 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int
}
}
#endif
#if uidx < 2
__constant float c_RGB2YUVCoeffs_420[8] = { 0.256999969f, 0.50399971f, 0.09799957f, -0.1479988098f, -0.2909994125f,
0.438999176f, -0.3679990768f, -0.0709991455f };
...
...
@@ -556,6 +562,8 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int
}
}
#endif
__kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dst_offset,
int rows, int cols)
...
...
@@ -576,15 +584,15 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of
__constant float* coeffs = c_YUV2RGBCoeffs_420;
#ifndef USE_OPTIMIZED_LOAD
float U = ((float) src[uidx]) - HALF_MAX;
float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX;
float U = ((float) src[uidx]) - HALF_MAX
_NUM
;
float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX
_NUM
;
float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0];
float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0];
#else
int load_src = *((__global int*) src);
float vec_src[4] = { load_src & 0xff, (load_src >> 8) & 0xff, (load_src >> 16) & 0xff, (load_src >> 24) & 0xff};
float U = vec_src[uidx] - HALF_MAX;
float V = vec_src[(2 + uidx) % 4] - HALF_MAX;
float U = vec_src[uidx] - HALF_MAX
_NUM
;
float V = vec_src[(2 + uidx) % 4] - HALF_MAX
_NUM
;
float y00 = max(0.f, vec_src[yidx] - 16.f) * coeffs[0];
float y01 = max(0.f, vec_src[yidx + 2] - 16.f) * coeffs[0];
#endif
...
...
@@ -644,11 +652,11 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int src_step, int src_offs
#ifdef DEPTH_5
__constant float * coeffs = c_RGB2YCrCbCoeffs_f;
DATA_TYPE Y = fma(b, coeffs[2], fma(g, coeffs[1], r * coeffs[0]));
DATA_TYPE Cr = fma(r - Y, coeffs[3], HALF_MAX);
DATA_TYPE Cb = fma(b - Y, coeffs[4], HALF_MAX);
DATA_TYPE Cr = fma(r - Y, coeffs[3], HALF_MAX
_NUM
);
DATA_TYPE Cb = fma(b - Y, coeffs[4], HALF_MAX
_NUM
);
#else
__constant int * coeffs = c_RGB2YCrCbCoeffs_i;
int delta = HALF_MAX * (1 << yuv_shift);
int delta = HALF_MAX
_NUM
* (1 << yuv_shift);
int Y = CV_DESCALE(mad24(b, coeffs[2], mad24(g, coeffs[1], mul24(r, coeffs[0]))), yuv_shift);
int Cr = CV_DESCALE(mad24(r - Y, coeffs[3], delta), yuv_shift);
int Cb = CV_DESCALE(mad24(b - Y, coeffs[4], delta), yuv_shift);
...
...
@@ -694,14 +702,14 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
#ifdef DEPTH_5
__constant float * coeff = c_YCrCb2RGBCoeffs_f;
float r = fma(coeff[0], cr - HALF_MAX, yp);
float g = fma(coeff[1], cr - HALF_MAX
, fma(coeff[2], cb - HALF_MAX
, yp));
float b = fma(coeff[3], cb - HALF_MAX, yp);
float r = fma(coeff[0], cr - HALF_MAX
_NUM
, yp);
float g = fma(coeff[1], cr - HALF_MAX
_NUM, fma(coeff[2], cb - HALF_MAX_NUM
, yp));
float b = fma(coeff[3], cb - HALF_MAX
_NUM
, yp);
#else
__constant int * coeff = c_YCrCb2RGBCoeffs_i;
int r = yp + CV_DESCALE(coeff[0] * (cr - HALF_MAX), yuv_shift);
int g = yp + CV_DESCALE(mad24(coeff[1], cr - HALF_MAX
, coeff[2] * (cb - HALF_MAX
)), yuv_shift);
int b = yp + CV_DESCALE(coeff[3] * (cb - HALF_MAX), yuv_shift);
int r = yp + CV_DESCALE(coeff[0] * (cr - HALF_MAX
_NUM
), yuv_shift);
int g = yp + CV_DESCALE(mad24(coeff[1], cr - HALF_MAX
_NUM, coeff[2] * (cb - HALF_MAX_NUM
)), yuv_shift);
int b = yp + CV_DESCALE(coeff[3] * (cb - HALF_MAX
_NUM
), yuv_shift);
#endif
dstptr[(bidx^2)] = SAT_CAST(r);
...
...
@@ -1564,9 +1572,9 @@ __kernel void RGBA2mRGBA(__global const uchar* src, int src_step, int src_offset
uchar4
src_pix
=
*
(
__global
const
uchar4
*
)(
src
+
src_index
)
;
*
(
__global
uchar4
*
)(
dst
+
dst_index
)
=
(
uchar4
)(
mad24
(
src_pix.x,
src_pix.w,
HALF_MAX
)
/
MAX_NUM,
mad24
(
src_pix.y,
src_pix.w,
HALF_MAX
)
/
MAX_NUM,
mad24
(
src_pix.z,
src_pix.w,
HALF_MAX
)
/
MAX_NUM,
src_pix.w
)
;
(
uchar4
)(
mad24
(
src_pix.x,
src_pix.w,
HALF_MAX
_NUM
)
/
MAX_NUM,
mad24
(
src_pix.y,
src_pix.w,
HALF_MAX
_NUM
)
/
MAX_NUM,
mad24
(
src_pix.z,
src_pix.w,
HALF_MAX
_NUM
)
/
MAX_NUM,
src_pix.w
)
;
++y
;
dst_index
+=
dst_step
;
...
...
modules/imgproc/src/opencl/hough_lines.cl
View file @
17ffb288
...
...
@@ -80,7 +80,7 @@ __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step,
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = convert_int_rte(mad(x, cosVal, y * sinVal)) + shift;
int r = convert_int_rte(mad(
(float)
x, cosVal, y * sinVal)) + shift;
atomic_inc(accum + r + 1);
}
}
...
...
@@ -117,7 +117,7 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i
const int x = (point & 0xFFFF);
const int y = point >> 16;
int r = convert_int_rte(mad(x, cosVal, y * sinVal)) + shift;
int r = convert_int_rte(mad(
(float)
x, cosVal, y * sinVal)) + shift;
atomic_inc(l_accum + r + 1);
}
...
...
@@ -186,7 +186,7 @@ __kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int ac
if (y < accum_rows-2)
{
__global uchar* accum = accum_ptr + mad24(y+1, accum_step, mad24(x+1, (int) sizeof(int), accum_offset));
__global
const
uchar* accum = accum_ptr + mad24(y+1, accum_step, mad24(x+1, (int) sizeof(int), accum_offset));
__global int4* lines = (__global int4*)(lines_ptr + lines_offset);
__global int* lines_index = lines_index_ptr + 1;
...
...
modules/imgproc/src/opencl/integral_sum.cl
View file @
17ffb288
...
...
@@ -125,7 +125,7 @@ kernel void integral_sum_rows(__global const uchar *buf_ptr, int buf_step, int b
sumT
accum
=
0
;
#
ifdef
SUM_SQUARE
__global
sumSQT
*dst_sq
=
(
__global
sumT
*
)(
dst_sq_ptr
+
dst_sq_offset
)
;
__global
sumSQT
*dst_sq
=
(
__global
sum
SQ
T
*
)(
dst_sq_ptr
+
dst_sq_offset
)
;
for
(
int
xin
=
x
; xin < cols; xin += gs)
{
dst_sq[xin]
=
0
;
...
...
modules/imgproc/src/opencl/match_template.cl
View file @
17ffb288
...
...
@@ -465,10 +465,10 @@ __kernel void matchTemplate_CCOEFF_NORMED(__global const uchar * src_sums, int s
T value_sum = sum[mad24(t_rows, step, t_cols)] - sum[mad24(t_rows, step, 0)] - sum[t_cols] + sum[0];
T value_sqsum = sqsum[mad24(t_rows, step, t_cols)] - sqsum[mad24(t_rows, step, 0)] - sqsum[t_cols] + sqsum[0];
float num = convertToDT(mad(value_sum, template_sum, 0));
float num = convertToDT(mad(value_sum, template_sum,
(float)
0));
value_sqsum -= weight * value_sum * value_sum;
float denum = sqrt(mad(template_sqsum, convertToDT(value_sqsum), 0));
float denum = sqrt(mad(template_sqsum, convertToDT(value_sqsum),
(float)
0));
int dst_idx = mad24(y, dst_step, mad24(x, (int)sizeof(float), dst_offset));
__global float * dstult = (__global float *)(dst+dst_idx);
...
...
@@ -509,7 +509,7 @@ __kernel void matchTemplate_CCOEFF_NORMED(__global const uchar * src_sums, int s
float num = convertToDT(mad(value_sum, temp_sum, 0));
value_sqsum -= weight * value_sum * value_sum;
float denum = sqrt(mad(template_sqsum, convertToDT(value_sqsum), 0));
float denum = sqrt(mad(template_sqsum, convertToDT(value_sqsum),
(float)
0));
int dst_idx = mad24(y, dst_step, mad24(x, (int)sizeof(float), dst_offset));
__global float * dstult = (__global float *)(dst+dst_idx);
...
...
@@ -549,7 +549,7 @@ __kernel void matchTemplate_CCOEFF_NORMED(__global const uchar * src_sums, int s
float
num
=
convertToDT
(
mad
(
value_sum,
temp_sum,
0
))
;
value_sqsum
-=
weight
*
value_sum
*
value_sum
;
float
denum
=
sqrt
(
mad
(
template_sqsum,
convertToDT
(
value_sqsum
)
,
0
))
;
float
denum
=
sqrt
(
mad
(
template_sqsum,
convertToDT
(
value_sqsum
)
,
(
float
)
0
))
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst+dst_idx
)
;
...
...
modules/imgproc/src/opencl/pyr_down.cl
View file @
17ffb288
...
...
@@ -148,6 +148,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
if
(
src_y
>=
2
&&
src_y
<
src_rows
-
4
)
{
#
undef
EXTRAPOLATE_
#
define
EXTRAPOLATE_
(
val,
maxVal
)
val
#
if
kercn
==
1
col
=
EXTRAPOLATE
(
x,
src_cols
)
;
...
...
@@ -180,6 +181,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
}
else
//
need
extrapolate
y
{
#
undef
EXTRAPOLATE_
#
define
EXTRAPOLATE_
(
val,
maxVal
)
EXTRAPOLATE
(
val,
maxVal
)
#
if
kercn
==
1
col
=
EXTRAPOLATE
(
x,
src_cols
)
;
...
...
modules/imgproc/src/opencl/remap.cl
View file @
17ffb288
...
...
@@ -414,8 +414,8 @@ __kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src
#
if
defined
BORDER_CONSTANT
float
xf
=
map1[0],
yf
=
map2[0]
;
int
sx
=
convert_int_sat_rtz
(
mad
(
xf,
INTER_TAB_SIZE,
0.5f
))
>>
INTER_BITS
;
int
sy
=
convert_int_sat_rtz
(
mad
(
yf,
INTER_TAB_SIZE,
0.5f
))
>>
INTER_BITS
;
int
sx
=
convert_int_sat_rtz
(
mad
(
xf,
(
float
)
INTER_TAB_SIZE,
0.5f
))
>>
INTER_BITS
;
int
sy
=
convert_int_sat_rtz
(
mad
(
yf,
(
float
)
INTER_TAB_SIZE,
0.5f
))
>>
INTER_BITS
;
__constant
float
*
coeffs_x
=
coeffs
+
((
convert_int_rte
(
xf
*
INTER_TAB_SIZE
)
&
(
INTER_TAB_SIZE
-
1
))
<<
1
)
;
__constant
float
*
coeffs_y
=
coeffs
+
((
convert_int_rte
(
yf
*
INTER_TAB_SIZE
)
&
(
INTER_TAB_SIZE
-
1
))
<<
1
)
;
...
...
modules/imgproc/src/opencl/warp_affine.cl
View file @
17ffb288
...
...
@@ -104,8 +104,8 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
for
(
int
dy
=
dy0,
dy1
=
min
(
dst_rows,
dy0
+
rowsPerWI
)
; dy < dy1; ++dy, dst_index += dst_step)
{
int
X0
=
X0_
+
rint
(
fma
(
M[1],
dy,
M[2]
)
*
AB_SCALE
)
+
round_delta
;
int
Y0
=
Y0_
+
rint
(
fma
(
M[4],
dy,
M[5]
)
*
AB_SCALE
)
+
round_delta
;
int
X0
=
X0_
+
rint
(
fma
(
M[1],
(
CT
)
dy,
M[2]
)
*
AB_SCALE
)
+
round_delta
;
int
Y0
=
Y0_
+
rint
(
fma
(
M[4],
(
CT
)
dy,
M[5]
)
*
AB_SCALE
)
+
round_delta
;
short
sx
=
convert_short_sat
(
X0
>>
AB_BITS
)
;
short
sy
=
convert_short_sat
(
Y0
>>
AB_BITS
)
;
...
...
@@ -146,8 +146,8 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
for
(
int
dy
=
dy0,
dy1
=
min
(
dst_rows,
dy0
+
rowsPerWI
)
; dy < dy1; ++dy)
{
int
X0
=
X0_
+
rint
(
fma
(
M[1],
dy,
M[2]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
int
Y0
=
Y0_
+
rint
(
fma
(
M[4],
dy,
M[5]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
int
X0
=
X0_
+
rint
(
fma
(
M[1],
(
CT
)
dy,
M[2]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
int
Y0
=
Y0_
+
rint
(
fma
(
M[4],
(
CT
)
dy,
M[5]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
X0
=
X0
>>
(
AB_BITS
-
INTER_BITS
)
;
Y0
=
Y0
>>
(
AB_BITS
-
INTER_BITS
)
;
...
...
@@ -274,8 +274,8 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
if
(
dx
<
dst_cols
&&
dy
<
dst_rows
)
{
int
tmp
=
(
dx
<<
AB_BITS
)
;
int
X0
=
rint
(
M[0]
*
tmp
)
+
rint
(
fma
(
M[1],
dy,
M[2]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
int
Y0
=
rint
(
M[3]
*
tmp
)
+
rint
(
fma
(
M[4],
dy,
M[5]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
int
X0
=
rint
(
M[0]
*
tmp
)
+
rint
(
fma
(
M[1],
(
CT
)
dy,
M[2]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
int
Y0
=
rint
(
M[3]
*
tmp
)
+
rint
(
fma
(
M[4],
(
CT
)
dy,
M[5]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
X0
=
X0
>>
(
AB_BITS
-
INTER_BITS
)
;
Y0
=
Y0
>>
(
AB_BITS
-
INTER_BITS
)
;
...
...
modules/objdetect/src/opencl/cascadedetect.cl
View file @
17ffb288
...
...
@@ -180,11 +180,11 @@ void runHaarClassifier(
int4 ofs = f->ofs[0];
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
}
s += (sval < st.y*nf) ? st.z : st.w;
...
...
@@ -204,11 +204,11 @@ void runHaarClassifier(
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
}
idx = (sval < as_float(n.y)*nf) ? n.z : n.w;
...
...
@@ -281,12 +281,12 @@ void runHaarClassifier(
int4 ofs = f->ofs[0];
float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
//if( weight.z > 0 )
if( fabs(weight.z) > 0 )
{
ofs = f->ofs[2];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
}
partsum += (sval < st.y*nf) ? st.z : st.w;
...
...
@@ -304,11 +304,11 @@ void runHaarClassifier(
float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
}
idx = (sval < as_float(n.y)*nf) ? n.z : n.w;
...
...
modules/video/src/opencl/optical_flow_tvl1.cl
View file @
17ffb288
...
...
@@ -148,7 +148,7 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
}
}
inline
float
readImage
(
__global
float
*image,
int
x,
int
y,
int
rows,
int
cols,
int
elemCntPerRow
)
inline
float
readImage
(
__global
const
float
*image,
int
x,
int
y,
int
rows,
int
cols,
int
elemCntPerRow
)
{
int
i0
=
clamp
(
x,
0
,
cols
-
1
)
;
int
j0
=
clamp
(
y,
0
,
rows
-
1
)
;
...
...
modules/video/src/opencl/pyrlk.cl
View file @
17ffb288
...
...
@@ -266,7 +266,7 @@ inline void GetError(image2d_t J, const float x, const float y, const float* Pch
//macro
to
read
pixel
value
into
local
memory.
#
define
READI
(
_y,_x
)
IPatchLocal[mad24
(
mad24
((
_y
)
,
LSy,
yid
)
,
LM_W,
mad24
((
_x
)
,
LSx,
xid
))
]
=
read_imagef
(
I,
sampler,
(
float2
)(
mad
((
_x
)
,
LSx,
Point.x
+
xid
-
0.5f
)
,
mad
((
_y
)
,
LSy,
Point.y
+
yid
-
0.5f
)))
.
x
;
#
define
READI
(
_y,_x
)
IPatchLocal[mad24
(
mad24
((
_y
)
,
LSy,
yid
)
,
LM_W,
mad24
((
_x
)
,
LSx,
xid
))
]
=
read_imagef
(
I,
sampler,
(
float2
)(
mad
((
float
)(
_x
)
,
(
float
)
LSx,
Point.x
+
xid
-
0.5f
)
,
mad
((
float
)(
_y
)
,
(
float
)
LSy,
Point.y
+
yid
-
0.5f
)))
.
x
;
void
ReadPatchIToLocalMem
(
image2d_t
I,
float2
Point,
local
float*
IPatchLocal
)
{
int
xid=get_local_id
(
0
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment