Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
17ffb288
Commit
17ffb288
authored
8 years ago
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #7602 from mshabunin:fix-opencl-warnings
parents
f1d93cb2
3e28d517
No related merge requests found
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
70 additions
and
60 deletions
+70
-60
canny.cl
modules/imgproc/src/opencl/canny.cl
+2
-2
cvtcolor.cl
modules/imgproc/src/opencl/cvtcolor.cl
+40
-32
hough_lines.cl
modules/imgproc/src/opencl/hough_lines.cl
+3
-3
integral_sum.cl
modules/imgproc/src/opencl/integral_sum.cl
+1
-1
match_template.cl
modules/imgproc/src/opencl/match_template.cl
+4
-4
pyr_down.cl
modules/imgproc/src/opencl/pyr_down.cl
+2
-0
remap.cl
modules/imgproc/src/opencl/remap.cl
+2
-2
warp_affine.cl
modules/imgproc/src/opencl/warp_affine.cl
+6
-6
cascadedetect.cl
modules/objdetect/src/opencl/cascadedetect.cl
+8
-8
optical_flow_tvl1.cl
modules/video/src/opencl/optical_flow_tvl1.cl
+1
-1
pyrlk.cl
modules/video/src/opencl/pyrlk.cl
+1
-1
No files found.
modules/imgproc/src/opencl/canny.cl
View file @
17ffb288
...
...
@@ -82,10 +82,10 @@ inline float3 sobel(int idx, __local const floatN *smem)
// result: x, y, mag
float3 res;
floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4],
floatN dx = fma(
(floatN)
2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4],
smem[idx + 2] - smem[idx] + smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]);
floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9],
floatN dy = fma(
(floatN)
2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9],
smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]);
#ifdef L2GRAD
...
...
This diff is collapsed.
Click to expand it.
modules/imgproc/src/opencl/cvtcolor.cl
View file @
17ffb288
...
...
@@ -49,21 +49,21 @@
#
if
depth
==
0
#
define
DATA_TYPE
uchar
#
define
MAX_NUM
255
#
define
HALF_MAX
128
#
define
HALF_MAX
_NUM
128
#
define
COEFF_TYPE
int
#
define
SAT_CAST
(
num
)
convert_uchar_sat
(
num
)
#
define
DEPTH_0
#
elif
depth
==
2
#
define
DATA_TYPE
ushort
#
define
MAX_NUM
65535
#
define
HALF_MAX
32768
#
define
HALF_MAX
_NUM
32768
#
define
COEFF_TYPE
int
#
define
SAT_CAST
(
num
)
convert_ushort_sat
(
num
)
#
define
DEPTH_2
#
elif
depth
==
5
#
define
DATA_TYPE
float
#
define
MAX_NUM
1.0f
#
define
HALF_MAX
0.5f
#
define
HALF_MAX
_NUM
0.5f
#
define
COEFF_TYPE
float
#
define
SAT_CAST
(
num
)
(
num
)
#
define
DEPTH_5
...
...
@@ -229,11 +229,11 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset
#ifdef DEPTH_5
__constant float * coeffs = c_RGB2YUVCoeffs_f;
const DATA_TYPE Y = fma(b, coeffs[0], fma(g, coeffs[1], r * coeffs[2]));
const DATA_TYPE U = fma(b - Y, coeffs[3], HALF_MAX);
const DATA_TYPE V = fma(r - Y, coeffs[4], HALF_MAX);
const DATA_TYPE U = fma(b - Y, coeffs[3], HALF_MAX
_NUM
);
const DATA_TYPE V = fma(r - Y, coeffs[4], HALF_MAX
_NUM
);
#else
__constant int * coeffs = c_RGB2YUVCoeffs_i;
const int delta = HALF_MAX * (1 << yuv_shift);
const int delta = HALF_MAX
_NUM
* (1 << yuv_shift);
const int Y = CV_DESCALE(mad24(b, coeffs[0], mad24(g, coeffs[1], mul24(r, coeffs[2]))), yuv_shift);
const int U = CV_DESCALE(mad24(b - Y, coeffs[3], delta), yuv_shift);
const int V = CV_DESCALE(mad24(r - Y, coeffs[4], delta), yuv_shift);
...
...
@@ -278,14 +278,14 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset
#ifdef DEPTH_5
__constant float * coeffs = c_YUV2RGBCoeffs_f;
float r = fma(V - HALF_MAX, coeffs[3], Y);
float g = fma(V - HALF_MAX
, coeffs[2], fma(U - HALF_MAX
, coeffs[1], Y));
float b = fma(U - HALF_MAX, coeffs[0], Y);
float r = fma(V - HALF_MAX
_NUM
, coeffs[3], Y);
float g = fma(V - HALF_MAX
_NUM, coeffs[2], fma(U - HALF_MAX_NUM
, coeffs[1], Y));
float b = fma(U - HALF_MAX
_NUM
, coeffs[0], Y);
#else
__constant int * coeffs = c_YUV2RGBCoeffs_i;
const int r = Y + CV_DESCALE(mul24(V - HALF_MAX, coeffs[3]), yuv_shift);
const int g = Y + CV_DESCALE(mad24(V - HALF_MAX
, coeffs[2], mul24(U - HALF_MAX
, coeffs[1])), yuv_shift);
const int b = Y + CV_DESCALE(mul24(U - HALF_MAX, coeffs[0]), yuv_shift);
const int r = Y + CV_DESCALE(mul24(V - HALF_MAX
_NUM
, coeffs[3]), yuv_shift);
const int g = Y + CV_DESCALE(mad24(V - HALF_MAX
_NUM, coeffs[2], mul24(U - HALF_MAX_NUM
, coeffs[1])), yuv_shift);
const int b = Y + CV_DESCALE(mul24(U - HALF_MAX
_NUM
, coeffs[0]), yuv_shift);
#endif
dst[bidx] = SAT_CAST( b );
...
...
@@ -328,8 +328,8 @@ __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_of
float Y3 = ysrc[src_step];
float Y4 = ysrc[src_step + 1];
float U = ((float)usrc[uidx]) - HALF_MAX;
float V = ((float)usrc[1-uidx]) - HALF_MAX;
float U = ((float)usrc[uidx]) - HALF_MAX
_NUM
;
float V = ((float)usrc[1-uidx]) - HALF_MAX
_NUM
;
__constant float* coeffs = c_YUV2RGBCoeffs_420;
float ruv = fma(coeffs[4], V, 0.5f);
...
...
@@ -373,6 +373,8 @@ __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_of
}
}
#if uidx < 2
__kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset,
int rows, int cols)
...
...
@@ -399,12 +401,12 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int
#ifdef SRC_CONT
__global const uchar* uvsrc = srcptr + mad24(rows, src_step, src_offset);
int u_ind = mad24(y, cols >> 1, x);
float uv[2] = { ((float)uvsrc[u_ind]) - HALF_MAX
, ((float)uvsrc[u_ind + ((rows * cols) >> 2)]) - HALF_MAX
};
float uv[2] = { ((float)uvsrc[u_ind]) - HALF_MAX
_NUM, ((float)uvsrc[u_ind + ((rows * cols) >> 2)]) - HALF_MAX_NUM
};
#else
int vsteps[2] = { cols >> 1, src_step - (cols >> 1)};
__global const uchar* usrc = srcptr + mad24(rows + (y>>1), src_step, src_offset + (y%2)*(cols >> 1) + x);
__global const uchar* vsrc = usrc + mad24(rows >> 2, src_step, rows % 4 ? vsteps[y%2] : 0);
float uv[2] = { ((float)usrc[0]) - HALF_MAX
, ((float)vsrc[0]) - HALF_MAX
};
float uv[2] = { ((float)usrc[0]) - HALF_MAX
_NUM, ((float)vsrc[0]) - HALF_MAX_NUM
};
#endif
float U = uv[uidx];
float V = uv[1-uidx];
...
...
@@ -451,6 +453,10 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int
}
}
#endif
#if uidx < 2
__constant float c_RGB2YUVCoeffs_420[8] = { 0.256999969f, 0.50399971f, 0.09799957f, -0.1479988098f, -0.2909994125f,
0.438999176f, -0.3679990768f, -0.0709991455f };
...
...
@@ -556,6 +562,8 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int
}
}
#endif
__kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dst_offset,
int rows, int cols)
...
...
@@ -576,15 +584,15 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of
__constant float* coeffs = c_YUV2RGBCoeffs_420;
#ifndef USE_OPTIMIZED_LOAD
float U = ((float) src[uidx]) - HALF_MAX;
float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX;
float U = ((float) src[uidx]) - HALF_MAX
_NUM
;
float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX
_NUM
;
float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0];
float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0];
#else
int load_src = *((__global int*) src);
float vec_src[4] = { load_src & 0xff, (load_src >> 8) & 0xff, (load_src >> 16) & 0xff, (load_src >> 24) & 0xff};
float U = vec_src[uidx] - HALF_MAX;
float V = vec_src[(2 + uidx) % 4] - HALF_MAX;
float U = vec_src[uidx] - HALF_MAX
_NUM
;
float V = vec_src[(2 + uidx) % 4] - HALF_MAX
_NUM
;
float y00 = max(0.f, vec_src[yidx] - 16.f) * coeffs[0];
float y01 = max(0.f, vec_src[yidx + 2] - 16.f) * coeffs[0];
#endif
...
...
@@ -644,11 +652,11 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int src_step, int src_offs
#ifdef DEPTH_5
__constant float * coeffs = c_RGB2YCrCbCoeffs_f;
DATA_TYPE Y = fma(b, coeffs[2], fma(g, coeffs[1], r * coeffs[0]));
DATA_TYPE Cr = fma(r - Y, coeffs[3], HALF_MAX);
DATA_TYPE Cb = fma(b - Y, coeffs[4], HALF_MAX);
DATA_TYPE Cr = fma(r - Y, coeffs[3], HALF_MAX
_NUM
);
DATA_TYPE Cb = fma(b - Y, coeffs[4], HALF_MAX
_NUM
);
#else
__constant int * coeffs = c_RGB2YCrCbCoeffs_i;
int delta = HALF_MAX * (1 << yuv_shift);
int delta = HALF_MAX
_NUM
* (1 << yuv_shift);
int Y = CV_DESCALE(mad24(b, coeffs[2], mad24(g, coeffs[1], mul24(r, coeffs[0]))), yuv_shift);
int Cr = CV_DESCALE(mad24(r - Y, coeffs[3], delta), yuv_shift);
int Cb = CV_DESCALE(mad24(b - Y, coeffs[4], delta), yuv_shift);
...
...
@@ -694,14 +702,14 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
#ifdef DEPTH_5
__constant float * coeff = c_YCrCb2RGBCoeffs_f;
float r = fma(coeff[0], cr - HALF_MAX, yp);
float g = fma(coeff[1], cr - HALF_MAX
, fma(coeff[2], cb - HALF_MAX
, yp));
float b = fma(coeff[3], cb - HALF_MAX, yp);
float r = fma(coeff[0], cr - HALF_MAX
_NUM
, yp);
float g = fma(coeff[1], cr - HALF_MAX
_NUM, fma(coeff[2], cb - HALF_MAX_NUM
, yp));
float b = fma(coeff[3], cb - HALF_MAX
_NUM
, yp);
#else
__constant int * coeff = c_YCrCb2RGBCoeffs_i;
int r = yp + CV_DESCALE(coeff[0] * (cr - HALF_MAX), yuv_shift);
int g = yp + CV_DESCALE(mad24(coeff[1], cr - HALF_MAX
, coeff[2] * (cb - HALF_MAX
)), yuv_shift);
int b = yp + CV_DESCALE(coeff[3] * (cb - HALF_MAX), yuv_shift);
int r = yp + CV_DESCALE(coeff[0] * (cr - HALF_MAX
_NUM
), yuv_shift);
int g = yp + CV_DESCALE(mad24(coeff[1], cr - HALF_MAX
_NUM, coeff[2] * (cb - HALF_MAX_NUM
)), yuv_shift);
int b = yp + CV_DESCALE(coeff[3] * (cb - HALF_MAX
_NUM
), yuv_shift);
#endif
dstptr[(bidx^2)] = SAT_CAST(r);
...
...
@@ -1564,9 +1572,9 @@ __kernel void RGBA2mRGBA(__global const uchar* src, int src_step, int src_offset
uchar4
src_pix
=
*
(
__global
const
uchar4
*
)(
src
+
src_index
)
;
*
(
__global
uchar4
*
)(
dst
+
dst_index
)
=
(
uchar4
)(
mad24
(
src_pix.x,
src_pix.w,
HALF_MAX
)
/
MAX_NUM,
mad24
(
src_pix.y,
src_pix.w,
HALF_MAX
)
/
MAX_NUM,
mad24
(
src_pix.z,
src_pix.w,
HALF_MAX
)
/
MAX_NUM,
src_pix.w
)
;
(
uchar4
)(
mad24
(
src_pix.x,
src_pix.w,
HALF_MAX
_NUM
)
/
MAX_NUM,
mad24
(
src_pix.y,
src_pix.w,
HALF_MAX
_NUM
)
/
MAX_NUM,
mad24
(
src_pix.z,
src_pix.w,
HALF_MAX
_NUM
)
/
MAX_NUM,
src_pix.w
)
;
++y
;
dst_index
+=
dst_step
;
...
...
This diff is collapsed.
Click to expand it.
modules/imgproc/src/opencl/hough_lines.cl
View file @
17ffb288
...
...
@@ -80,7 +80,7 @@ __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step,
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = convert_int_rte(mad(x, cosVal, y * sinVal)) + shift;
int r = convert_int_rte(mad(
(float)
x, cosVal, y * sinVal)) + shift;
atomic_inc(accum + r + 1);
}
}
...
...
@@ -117,7 +117,7 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i
const int x = (point & 0xFFFF);
const int y = point >> 16;
int r = convert_int_rte(mad(x, cosVal, y * sinVal)) + shift;
int r = convert_int_rte(mad(
(float)
x, cosVal, y * sinVal)) + shift;
atomic_inc(l_accum + r + 1);
}
...
...
@@ -186,7 +186,7 @@ __kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int ac
if (y < accum_rows-2)
{
__global uchar* accum = accum_ptr + mad24(y+1, accum_step, mad24(x+1, (int) sizeof(int), accum_offset));
__global
const
uchar* accum = accum_ptr + mad24(y+1, accum_step, mad24(x+1, (int) sizeof(int), accum_offset));
__global int4* lines = (__global int4*)(lines_ptr + lines_offset);
__global int* lines_index = lines_index_ptr + 1;
...
...
This diff is collapsed.
Click to expand it.
modules/imgproc/src/opencl/integral_sum.cl
View file @
17ffb288
...
...
@@ -125,7 +125,7 @@ kernel void integral_sum_rows(__global const uchar *buf_ptr, int buf_step, int b
sumT
accum
=
0
;
#
ifdef
SUM_SQUARE
__global
sumSQT
*dst_sq
=
(
__global
sumT
*
)(
dst_sq_ptr
+
dst_sq_offset
)
;
__global
sumSQT
*dst_sq
=
(
__global
sum
SQ
T
*
)(
dst_sq_ptr
+
dst_sq_offset
)
;
for
(
int
xin
=
x
; xin < cols; xin += gs)
{
dst_sq[xin]
=
0
;
...
...
This diff is collapsed.
Click to expand it.
modules/imgproc/src/opencl/match_template.cl
View file @
17ffb288
...
...
@@ -465,10 +465,10 @@ __kernel void matchTemplate_CCOEFF_NORMED(__global const uchar * src_sums, int s
T value_sum = sum[mad24(t_rows, step, t_cols)] - sum[mad24(t_rows, step, 0)] - sum[t_cols] + sum[0];
T value_sqsum = sqsum[mad24(t_rows, step, t_cols)] - sqsum[mad24(t_rows, step, 0)] - sqsum[t_cols] + sqsum[0];
float num = convertToDT(mad(value_sum, template_sum, 0));
float num = convertToDT(mad(value_sum, template_sum,
(float)
0));
value_sqsum -= weight * value_sum * value_sum;
float denum = sqrt(mad(template_sqsum, convertToDT(value_sqsum), 0));
float denum = sqrt(mad(template_sqsum, convertToDT(value_sqsum),
(float)
0));
int dst_idx = mad24(y, dst_step, mad24(x, (int)sizeof(float), dst_offset));
__global float * dstult = (__global float *)(dst+dst_idx);
...
...
@@ -509,7 +509,7 @@ __kernel void matchTemplate_CCOEFF_NORMED(__global const uchar * src_sums, int s
float num = convertToDT(mad(value_sum, temp_sum, 0));
value_sqsum -= weight * value_sum * value_sum;
float denum = sqrt(mad(template_sqsum, convertToDT(value_sqsum), 0));
float denum = sqrt(mad(template_sqsum, convertToDT(value_sqsum),
(float)
0));
int dst_idx = mad24(y, dst_step, mad24(x, (int)sizeof(float), dst_offset));
__global float * dstult = (__global float *)(dst+dst_idx);
...
...
@@ -549,7 +549,7 @@ __kernel void matchTemplate_CCOEFF_NORMED(__global const uchar * src_sums, int s
float
num
=
convertToDT
(
mad
(
value_sum,
temp_sum,
0
))
;
value_sqsum
-=
weight
*
value_sum
*
value_sum
;
float
denum
=
sqrt
(
mad
(
template_sqsum,
convertToDT
(
value_sqsum
)
,
0
))
;
float
denum
=
sqrt
(
mad
(
template_sqsum,
convertToDT
(
value_sqsum
)
,
(
float
)
0
))
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst+dst_idx
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/imgproc/src/opencl/pyr_down.cl
View file @
17ffb288
...
...
@@ -148,6 +148,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
if
(
src_y
>=
2
&&
src_y
<
src_rows
-
4
)
{
#
undef
EXTRAPOLATE_
#
define
EXTRAPOLATE_
(
val,
maxVal
)
val
#
if
kercn
==
1
col
=
EXTRAPOLATE
(
x,
src_cols
)
;
...
...
@@ -180,6 +181,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
}
else
//
need
extrapolate
y
{
#
undef
EXTRAPOLATE_
#
define
EXTRAPOLATE_
(
val,
maxVal
)
EXTRAPOLATE
(
val,
maxVal
)
#
if
kercn
==
1
col
=
EXTRAPOLATE
(
x,
src_cols
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/imgproc/src/opencl/remap.cl
View file @
17ffb288
...
...
@@ -414,8 +414,8 @@ __kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src
#
if
defined
BORDER_CONSTANT
float
xf
=
map1[0],
yf
=
map2[0]
;
int
sx
=
convert_int_sat_rtz
(
mad
(
xf,
INTER_TAB_SIZE,
0.5f
))
>>
INTER_BITS
;
int
sy
=
convert_int_sat_rtz
(
mad
(
yf,
INTER_TAB_SIZE,
0.5f
))
>>
INTER_BITS
;
int
sx
=
convert_int_sat_rtz
(
mad
(
xf,
(
float
)
INTER_TAB_SIZE,
0.5f
))
>>
INTER_BITS
;
int
sy
=
convert_int_sat_rtz
(
mad
(
yf,
(
float
)
INTER_TAB_SIZE,
0.5f
))
>>
INTER_BITS
;
__constant
float
*
coeffs_x
=
coeffs
+
((
convert_int_rte
(
xf
*
INTER_TAB_SIZE
)
&
(
INTER_TAB_SIZE
-
1
))
<<
1
)
;
__constant
float
*
coeffs_y
=
coeffs
+
((
convert_int_rte
(
yf
*
INTER_TAB_SIZE
)
&
(
INTER_TAB_SIZE
-
1
))
<<
1
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/imgproc/src/opencl/warp_affine.cl
View file @
17ffb288
...
...
@@ -104,8 +104,8 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
for
(
int
dy
=
dy0,
dy1
=
min
(
dst_rows,
dy0
+
rowsPerWI
)
; dy < dy1; ++dy, dst_index += dst_step)
{
int
X0
=
X0_
+
rint
(
fma
(
M[1],
dy,
M[2]
)
*
AB_SCALE
)
+
round_delta
;
int
Y0
=
Y0_
+
rint
(
fma
(
M[4],
dy,
M[5]
)
*
AB_SCALE
)
+
round_delta
;
int
X0
=
X0_
+
rint
(
fma
(
M[1],
(
CT
)
dy,
M[2]
)
*
AB_SCALE
)
+
round_delta
;
int
Y0
=
Y0_
+
rint
(
fma
(
M[4],
(
CT
)
dy,
M[5]
)
*
AB_SCALE
)
+
round_delta
;
short
sx
=
convert_short_sat
(
X0
>>
AB_BITS
)
;
short
sy
=
convert_short_sat
(
Y0
>>
AB_BITS
)
;
...
...
@@ -146,8 +146,8 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
for
(
int
dy
=
dy0,
dy1
=
min
(
dst_rows,
dy0
+
rowsPerWI
)
; dy < dy1; ++dy)
{
int
X0
=
X0_
+
rint
(
fma
(
M[1],
dy,
M[2]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
int
Y0
=
Y0_
+
rint
(
fma
(
M[4],
dy,
M[5]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
int
X0
=
X0_
+
rint
(
fma
(
M[1],
(
CT
)
dy,
M[2]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
int
Y0
=
Y0_
+
rint
(
fma
(
M[4],
(
CT
)
dy,
M[5]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
X0
=
X0
>>
(
AB_BITS
-
INTER_BITS
)
;
Y0
=
Y0
>>
(
AB_BITS
-
INTER_BITS
)
;
...
...
@@ -274,8 +274,8 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
if
(
dx
<
dst_cols
&&
dy
<
dst_rows
)
{
int
tmp
=
(
dx
<<
AB_BITS
)
;
int
X0
=
rint
(
M[0]
*
tmp
)
+
rint
(
fma
(
M[1],
dy,
M[2]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
int
Y0
=
rint
(
M[3]
*
tmp
)
+
rint
(
fma
(
M[4],
dy,
M[5]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
int
X0
=
rint
(
M[0]
*
tmp
)
+
rint
(
fma
(
M[1],
(
CT
)
dy,
M[2]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
int
Y0
=
rint
(
M[3]
*
tmp
)
+
rint
(
fma
(
M[4],
(
CT
)
dy,
M[5]
)
*
AB_SCALE
)
+
ROUND_DELTA
;
X0
=
X0
>>
(
AB_BITS
-
INTER_BITS
)
;
Y0
=
Y0
>>
(
AB_BITS
-
INTER_BITS
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/objdetect/src/opencl/cascadedetect.cl
View file @
17ffb288
...
...
@@ -180,11 +180,11 @@ void runHaarClassifier(
int4 ofs = f->ofs[0];
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
}
s += (sval < st.y*nf) ? st.z : st.w;
...
...
@@ -204,11 +204,11 @@ void runHaarClassifier(
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
}
idx = (sval < as_float(n.y)*nf) ? n.z : n.w;
...
...
@@ -281,12 +281,12 @@ void runHaarClassifier(
int4 ofs = f->ofs[0];
float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
//if( weight.z > 0 )
if( fabs(weight.z) > 0 )
{
ofs = f->ofs[2];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
}
partsum += (sval < st.y*nf) ? st.z : st.w;
...
...
@@ -304,11 +304,11 @@ void runHaarClassifier(
float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
sval = mad((
float)(
psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
}
idx = (sval < as_float(n.y)*nf) ? n.z : n.w;
...
...
This diff is collapsed.
Click to expand it.
modules/video/src/opencl/optical_flow_tvl1.cl
View file @
17ffb288
...
...
@@ -148,7 +148,7 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
}
}
inline
float
readImage
(
__global
float
*image,
int
x,
int
y,
int
rows,
int
cols,
int
elemCntPerRow
)
inline
float
readImage
(
__global
const
float
*image,
int
x,
int
y,
int
rows,
int
cols,
int
elemCntPerRow
)
{
int
i0
=
clamp
(
x,
0
,
cols
-
1
)
;
int
j0
=
clamp
(
y,
0
,
rows
-
1
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/video/src/opencl/pyrlk.cl
View file @
17ffb288
...
...
@@ -266,7 +266,7 @@ inline void GetError(image2d_t J, const float x, const float y, const float* Pch
//macro
to
read
pixel
value
into
local
memory.
#
define
READI
(
_y,_x
)
IPatchLocal[mad24
(
mad24
((
_y
)
,
LSy,
yid
)
,
LM_W,
mad24
((
_x
)
,
LSx,
xid
))
]
=
read_imagef
(
I,
sampler,
(
float2
)(
mad
((
_x
)
,
LSx,
Point.x
+
xid
-
0.5f
)
,
mad
((
_y
)
,
LSy,
Point.y
+
yid
-
0.5f
)))
.
x
;
#
define
READI
(
_y,_x
)
IPatchLocal[mad24
(
mad24
((
_y
)
,
LSy,
yid
)
,
LM_W,
mad24
((
_x
)
,
LSx,
xid
))
]
=
read_imagef
(
I,
sampler,
(
float2
)(
mad
((
float
)(
_x
)
,
(
float
)
LSx,
Point.x
+
xid
-
0.5f
)
,
mad
((
float
)(
_y
)
,
(
float
)
LSy,
Point.y
+
yid
-
0.5f
)))
.
x
;
void
ReadPatchIToLocalMem
(
image2d_t
I,
float2
Point,
local
float*
IPatchLocal
)
{
int
xid=get_local_id
(
0
)
;
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment