Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
687d7639
Commit
687d7639
authored
Aug 03, 2012
by
yao
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
git://code.opencv.org/opencv
parents
1eedcea5
3180bbe3
Show whitespace changes
Inline
Side-by-side
Showing
26 changed files
with
1631 additions
and
556 deletions
+1631
-556
parallel_tool.hpp
modules/core/include/opencv2/core/parallel_tool.hpp
+108
-0
parallel_tool.cpp
modules/core/src/parallel_tool.cpp
+112
-0
precomp.hpp
modules/core/src/precomp.hpp
+1
-0
cascadeclassifier.cpp
modules/gpu/src/cascadeclassifier.cpp
+2
-0
perf_bilateral.cpp
modules/imgproc/perf/perf_bilateral.cpp
+38
-0
precomp.hpp
modules/imgproc/src/precomp.hpp
+1
-0
smooth.cpp
modules/imgproc/src/smooth.cpp
+128
-70
test_bilateral_filter.cpp
modules/imgproc/test/test_bilateral_filter.cpp
+290
-0
matrix_operations.hpp
modules/ocl/include/opencv2/ocl/matrix_operations.hpp
+1
-1
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+1
-0
arithm.cpp
modules/ocl/src/arithm.cpp
+24
-24
imgproc.cpp
modules/ocl/src/imgproc.cpp
+21
-9
initialization.cpp
modules/ocl/src/initialization.cpp
+17
-1
arithm_addWeighted.cl
modules/ocl/src/kernels/arithm_addWeighted.cl
+18
-18
arithm_cartToPolar.cl
modules/ocl/src/kernels/arithm_cartToPolar.cl
+4
-0
arithm_div.cl
modules/ocl/src/kernels/arithm_div.cl
+72
-59
arithm_exp.cl
modules/ocl/src/kernels/arithm_exp.cl
+4
-0
arithm_log.cl
modules/ocl/src/kernels/arithm_log.cl
+2
-2
convertC3C4.cl
modules/ocl/src/kernels/convertC3C4.cl
+88
-87
imgproc_resize.cl
modules/ocl/src/kernels/imgproc_resize.cl
+131
-76
operator_setTo.cl
modules/ocl/src/kernels/operator_setTo.cl
+4
-54
operator_setToM.cl
modules/ocl/src/kernels/operator_setToM.cl
+9
-103
matrix_operations.cpp
modules/ocl/src/matrix_operations.cpp
+454
-49
precomp.hpp
modules/ocl/src/precomp.hpp
+3
-3
test_imgproc.cpp
modules/ocl/test/test_imgproc.cpp
+0
-0
test_matrix_operation.cpp
modules/ocl/test/test_matrix_operation.cpp
+98
-0
No files found.
modules/core/include/opencv2/core/parallel_tool.hpp
0 → 100644
View file @
687d7639
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_PARALLEL_TOOL_HPP__
#define __OPENCV_PARALLEL_TOOL_HPP__
#ifdef HAVE_CVCONFIG_H
# include <cvconfig.h>
#endif // HAVE_CVCONFIG_H
/*
HAVE_TBB - using TBB
HAVE_GCD - using GCD
HAVE_OPENMP - using OpenMP
HAVE_CONCURRENCY - using visual studio 2010 concurrency
*/
#ifdef HAVE_TBB
# include "tbb/tbb_stddef.h"
# if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
# include "tbb/tbb.h"
# include "tbb/task.h"
# undef min
# undef max
# else
# undef HAVE_TBB
# endif // end TBB version
#endif // HAVE_TBB
#ifdef __cplusplus
namespace
cv
{
// a base body class
class
CV_EXPORTS
ParallelLoopBody
{
public
:
virtual
void
operator
()
(
const
Range
&
range
)
const
=
0
;
virtual
~
ParallelLoopBody
();
};
CV_EXPORTS
void
parallel_for_
(
const
Range
&
range
,
const
ParallelLoopBody
&
body
);
template
<
typename
Iterator
,
typename
Body
>
inline
CV_EXPORTS
void
parallel_do_
(
Iterator
first
,
Iterator
last
,
const
Body
&
body
)
{
#ifdef HAVE_TBB
tbb
::
parallel_do
(
first
,
last
,
body
);
#else
for
(
;
first
!=
last
;
++
first
)
body
(
*
first
);
#endif // HAVE_TBB
}
template
<
typename
Body
>
inline
CV_EXPORTS
void
parallel_reduce_
(
const
Range
&
range
,
Body
&
body
)
{
#ifdef HAVE_TBB
tbb
::
parallel_reduce
(
tbb
::
blocked_range
<
int
>
(
range
.
start
,
range
.
end
),
body
);
#else
body
(
range
);
#endif // end HAVE_TBB
}
}
// namespace cv
#endif // __cplusplus
#endif // __OPENCV_PARALLEL_TOOL_HPP__
modules/core/src/parallel_tool.cpp
0 → 100644
View file @
687d7639
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#ifdef HAVE_CONCURRENCY
# include <ppl.h>
#elif defined HAVE_OPENMP
# include <omp.h>
#elif defined HAVE_GCD
# include <dispatch/dispatch.h>
#endif // HAVE_CONCURRENCY
namespace
cv
{
ParallelLoopBody
::~
ParallelLoopBody
()
{
}
#ifdef HAVE_TBB
class
TbbProxyLoopBody
{
public
:
TbbProxyLoopBody
(
const
ParallelLoopBody
&
_body
)
:
body
(
&
_body
)
{
}
void
operator
()(
const
tbb
::
blocked_range
<
int
>&
range
)
const
{
body
->
operator
()(
Range
(
range
.
begin
(),
range
.
end
()));
}
private
:
const
ParallelLoopBody
*
body
;
};
#endif // end HAVE_TBB
#ifdef HAVE_GCD
static
void
block_function
(
void
*
context
,
size_t
index
)
{
ParallelLoopBody
*
ptr_body
=
static_cast
<
ParallelLoopBody
*>
(
context
);
ptr_body
->
operator
()(
Range
(
index
,
index
+
1
));
}
#endif // HAVE_GCD
void
parallel_for_
(
const
Range
&
range
,
const
ParallelLoopBody
&
body
)
{
#ifdef HAVE_TBB
tbb
::
parallel_for
(
tbb
::
blocked_range
<
int
>
(
range
.
start
,
range
.
end
),
TbbProxyLoopBody
(
body
));
#elif defined HAVE_CONCURRENCY
Concurrency
::
parallel_for
(
range
.
start
,
range
.
end
,
body
);
#elif defined HAVE_OPENMP
#pragma omp parallel for schedule(dynamic)
for
(
int
i
=
range
.
start
;
i
<
range
.
end
;
++
i
)
body
(
Range
(
i
,
i
+
1
));
#elif defined (HAVE_GCD)
dispatch_queue_t
concurrent_queue
=
dispatch_get_global_queue
(
DISPATCH_QUEUE_PRIORITY_DEFAULT
,
0
);
dispatch_apply_f
(
range
.
end
-
range
.
start
,
concurrent_queue
,
&
const_cast
<
ParallelLoopBody
&>
(
body
),
block_function
);
#else
body
(
range
);
#endif // end HAVE_TBB
}
}
// namespace cv
modules/core/src/precomp.hpp
View file @
687d7639
...
@@ -50,6 +50,7 @@
...
@@ -50,6 +50,7 @@
#include "opencv2/core/core.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/core/core_c.h"
#include "opencv2/core/core_c.h"
#include "opencv2/core/internal.hpp"
#include "opencv2/core/internal.hpp"
#include "opencv2/core/parallel_tool.hpp"
#include <assert.h>
#include <assert.h>
#include <ctype.h>
#include <ctype.h>
...
...
modules/gpu/src/cascadeclassifier.cpp
View file @
687d7639
...
@@ -255,6 +255,8 @@ private:
...
@@ -255,6 +255,8 @@ private:
ncvAssertPrintReturn
(
gpuAllocator
->
isInitialized
(),
"Error creating GPU memory allocator"
,
NCV_CUDA_ERROR
);
ncvAssertPrintReturn
(
gpuAllocator
->
isInitialized
(),
"Error creating GPU memory allocator"
,
NCV_CUDA_ERROR
);
ncvAssertPrintReturn
(
cpuAllocator
->
isInitialized
(),
"Error creating CPU memory allocator"
,
NCV_CUDA_ERROR
);
ncvAssertPrintReturn
(
cpuAllocator
->
isInitialized
(),
"Error creating CPU memory allocator"
,
NCV_CUDA_ERROR
);
lastAllocatedFrameSize
=
frameSize
;
return
NCV_SUCCESS
;
return
NCV_SUCCESS
;
}
}
...
...
modules/imgproc/perf/perf_bilateral.cpp
0 → 100644
View file @
687d7639
#include "perf_precomp.hpp"
using
namespace
std
;
using
namespace
cv
;
using
namespace
perf
;
using
namespace
testing
;
using
std
::
tr1
::
make_tuple
;
using
std
::
tr1
::
get
;
CV_ENUM
(
Mat_Type
,
CV_8UC1
,
CV_8UC3
,
CV_32FC1
,
CV_32FC3
)
typedef
TestBaseWithParam
<
tr1
::
tuple
<
Size
,
int
,
Mat_Type
>
>
TestBilateralFilter
;
PERF_TEST_P
(
TestBilateralFilter
,
BilateralFilter
,
Combine
(
Values
(
szVGA
,
sz1080p
),
// image size
Values
(
3
,
5
),
// d
ValuesIn
(
Mat_Type
::
all
()
)
// image type
)
)
{
Size
sz
;
int
d
,
type
;
const
double
sigmaColor
=
1.
,
sigmaSpace
=
1.
;
sz
=
get
<
0
>
(
GetParam
());
d
=
get
<
1
>
(
GetParam
());
type
=
get
<
2
>
(
GetParam
());
Mat
src
(
sz
,
type
);
Mat
dst
(
sz
,
type
);
declare
.
in
(
src
,
WARMUP_RNG
).
out
(
dst
).
time
(
20
);
TEST_CYCLE
()
bilateralFilter
(
src
,
dst
,
d
,
sigmaColor
,
sigmaSpace
,
BORDER_DEFAULT
);
SANITY_CHECK
(
dst
);
}
modules/imgproc/src/precomp.hpp
View file @
687d7639
...
@@ -50,6 +50,7 @@
...
@@ -50,6 +50,7 @@
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/core/internal.hpp"
#include "opencv2/core/internal.hpp"
#include "opencv2/core/parallel_tool.hpp"
#include <math.h>
#include <math.h>
#include <assert.h>
#include <assert.h>
#include <string.h>
#include <string.h>
...
...
modules/imgproc/src/smooth.cpp
View file @
687d7639
...
@@ -1288,13 +1288,84 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize )
...
@@ -1288,13 +1288,84 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize )
namespace
cv
namespace
cv
{
{
class
BilateralFilter_8u_Invoker
:
public
ParallelLoopBody
{
public
:
BilateralFilter_8u_Invoker
(
const
Mat
&
_src
,
Mat
&
_dst
,
Mat
_temp
,
int
_radius
,
int
_maxk
,
int
*
_space_ofs
,
float
*
_space_weight
,
float
*
_color_weight
)
:
ParallelLoopBody
(),
src
(
_src
),
dst
(
_dst
),
temp
(
_temp
),
radius
(
_radius
),
maxk
(
_maxk
),
space_ofs
(
_space_ofs
),
space_weight
(
_space_weight
),
color_weight
(
_color_weight
)
{
}
virtual
void
operator
()
(
const
Range
&
range
)
const
{
int
i
,
j
,
cn
=
src
.
channels
(),
k
;
Size
size
=
src
.
size
();
for
(
i
=
range
.
start
;
i
<
range
.
end
;
i
++
)
{
const
uchar
*
sptr
=
temp
.
data
+
(
i
+
radius
)
*
temp
.
step
+
radius
*
cn
;
uchar
*
dptr
=
dst
.
data
+
i
*
dst
.
step
;
if
(
cn
==
1
)
{
for
(
j
=
0
;
j
<
size
.
width
;
j
++
)
{
float
sum
=
0
,
wsum
=
0
;
int
val0
=
sptr
[
j
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
int
val
=
sptr
[
j
+
space_ofs
[
k
]];
float
w
=
space_weight
[
k
]
*
color_weight
[
std
::
abs
(
val
-
val0
)];
sum
+=
val
*
w
;
wsum
+=
w
;
}
// overflow is not possible here => there is no need to use CV_CAST_8U
dptr
[
j
]
=
(
uchar
)
cvRound
(
sum
/
wsum
);
}
}
else
{
assert
(
cn
==
3
);
for
(
j
=
0
;
j
<
size
.
width
*
3
;
j
+=
3
)
{
float
sum_b
=
0
,
sum_g
=
0
,
sum_r
=
0
,
wsum
=
0
;
int
b0
=
sptr
[
j
],
g0
=
sptr
[
j
+
1
],
r0
=
sptr
[
j
+
2
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
const
uchar
*
sptr_k
=
sptr
+
j
+
space_ofs
[
k
];
int
b
=
sptr_k
[
0
],
g
=
sptr_k
[
1
],
r
=
sptr_k
[
2
];
float
w
=
space_weight
[
k
]
*
color_weight
[
std
::
abs
(
b
-
b0
)
+
std
::
abs
(
g
-
g0
)
+
std
::
abs
(
r
-
r0
)];
sum_b
+=
b
*
w
;
sum_g
+=
g
*
w
;
sum_r
+=
r
*
w
;
wsum
+=
w
;
}
wsum
=
1.
f
/
wsum
;
b0
=
cvRound
(
sum_b
*
wsum
);
g0
=
cvRound
(
sum_g
*
wsum
);
r0
=
cvRound
(
sum_r
*
wsum
);
dptr
[
j
]
=
(
uchar
)
b0
;
dptr
[
j
+
1
]
=
(
uchar
)
g0
;
dptr
[
j
+
2
]
=
(
uchar
)
r0
;
}
}
}
}
private
:
const
Mat
&
src
;
Mat
&
dst
,
temp
;
int
radius
,
maxk
,
*
space_ofs
;
float
*
space_weight
,
*
color_weight
;
};
static
void
static
void
bilateralFilter_8u
(
const
Mat
&
src
,
Mat
&
dst
,
int
d
,
bilateralFilter_8u
(
const
Mat
&
src
,
Mat
&
dst
,
int
d
,
double
sigma_color
,
double
sigma_space
,
double
sigma_color
,
double
sigma_space
,
int
borderType
)
int
borderType
)
{
{
int
cn
=
src
.
channels
();
int
cn
=
src
.
channels
();
int
i
,
j
,
k
,
maxk
,
radius
;
int
i
,
j
,
maxk
,
radius
;
Size
size
=
src
.
size
();
Size
size
=
src
.
size
();
CV_Assert
(
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_8UC3
)
&&
CV_Assert
(
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_8UC3
)
&&
...
@@ -1341,26 +1412,51 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
...
@@ -1341,26 +1412,51 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
space_ofs
[
maxk
++
]
=
(
int
)(
i
*
temp
.
step
+
j
*
cn
);
space_ofs
[
maxk
++
]
=
(
int
)(
i
*
temp
.
step
+
j
*
cn
);
}
}
for
(
i
=
0
;
i
<
size
.
height
;
i
++
)
BilateralFilter_8u_Invoker
body
(
src
,
dst
,
temp
,
radius
,
maxk
,
space_ofs
,
space_weight
,
color_weight
);
parallel_for_
(
Range
(
0
,
size
.
height
),
body
);
}
class
BilateralFilter_32f_Invoker
:
public
ParallelLoopBody
{
public
:
BilateralFilter_32f_Invoker
(
int
_cn
,
int
_radius
,
int
_maxk
,
int
*
_space_ofs
,
Mat
_temp
,
Mat
*
_dest
,
Size
_size
,
float
_scale_index
,
float
*
_space_weight
,
float
*
_expLUT
)
:
ParallelLoopBody
(),
cn
(
_cn
),
radius
(
_radius
),
maxk
(
_maxk
),
space_ofs
(
_space_ofs
),
temp
(
_temp
),
dest
(
_dest
),
size
(
_size
),
scale_index
(
_scale_index
),
space_weight
(
_space_weight
),
expLUT
(
_expLUT
)
{
{
const
uchar
*
sptr
=
temp
.
data
+
(
i
+
radius
)
*
temp
.
step
+
radius
*
cn
;
}
uchar
*
dptr
=
dst
.
data
+
i
*
dst
.
step
;
virtual
void
operator
()
(
const
Range
&
range
)
const
{
Mat
&
dst
=
*
dest
;
int
i
,
j
,
k
;
for
(
i
=
range
.
start
;
i
<
range
.
end
;
i
++
)
{
const
float
*
sptr
=
(
const
float
*
)(
temp
.
data
+
(
i
+
radius
)
*
temp
.
step
)
+
radius
*
cn
;
float
*
dptr
=
(
float
*
)(
dst
.
data
+
i
*
dst
.
step
);
if
(
cn
==
1
)
if
(
cn
==
1
)
{
{
for
(
j
=
0
;
j
<
size
.
width
;
j
++
)
for
(
j
=
0
;
j
<
size
.
width
;
j
++
)
{
{
float
sum
=
0
,
wsum
=
0
;
float
sum
=
0
,
wsum
=
0
;
in
t
val0
=
sptr
[
j
];
floa
t
val0
=
sptr
[
j
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
{
int
val
=
sptr
[
j
+
space_ofs
[
k
]];
float
val
=
sptr
[
j
+
space_ofs
[
k
]];
float
w
=
space_weight
[
k
]
*
color_weight
[
std
::
abs
(
val
-
val0
)];
float
alpha
=
(
float
)(
std
::
abs
(
val
-
val0
)
*
scale_index
);
int
idx
=
cvFloor
(
alpha
);
alpha
-=
idx
;
float
w
=
space_weight
[
k
]
*
(
expLUT
[
idx
]
+
alpha
*
(
expLUT
[
idx
+
1
]
-
expLUT
[
idx
]));
sum
+=
val
*
w
;
sum
+=
val
*
w
;
wsum
+=
w
;
wsum
+=
w
;
}
}
// overflow is not possible here => there is no need to use CV_CAST_8U
dptr
[
j
]
=
(
float
)(
sum
/
wsum
);
dptr
[
j
]
=
(
uchar
)
cvRound
(
sum
/
wsum
);
}
}
}
}
else
else
...
@@ -1369,26 +1465,35 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
...
@@ -1369,26 +1465,35 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
for
(
j
=
0
;
j
<
size
.
width
*
3
;
j
+=
3
)
for
(
j
=
0
;
j
<
size
.
width
*
3
;
j
+=
3
)
{
{
float
sum_b
=
0
,
sum_g
=
0
,
sum_r
=
0
,
wsum
=
0
;
float
sum_b
=
0
,
sum_g
=
0
,
sum_r
=
0
,
wsum
=
0
;
in
t
b0
=
sptr
[
j
],
g0
=
sptr
[
j
+
1
],
r0
=
sptr
[
j
+
2
];
floa
t
b0
=
sptr
[
j
],
g0
=
sptr
[
j
+
1
],
r0
=
sptr
[
j
+
2
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
{
const
uchar
*
sptr_k
=
sptr
+
j
+
space_ofs
[
k
];
const
float
*
sptr_k
=
sptr
+
j
+
space_ofs
[
k
];
int
b
=
sptr_k
[
0
],
g
=
sptr_k
[
1
],
r
=
sptr_k
[
2
];
float
b
=
sptr_k
[
0
],
g
=
sptr_k
[
1
],
r
=
sptr_k
[
2
];
float
w
=
space_weight
[
k
]
*
color_weight
[
std
::
abs
(
b
-
b0
)
+
float
alpha
=
(
float
)((
std
::
abs
(
b
-
b0
)
+
std
::
abs
(
g
-
g0
)
+
std
::
abs
(
r
-
r0
)];
std
::
abs
(
g
-
g0
)
+
std
::
abs
(
r
-
r0
))
*
scale_index
);
int
idx
=
cvFloor
(
alpha
);
alpha
-=
idx
;
float
w
=
space_weight
[
k
]
*
(
expLUT
[
idx
]
+
alpha
*
(
expLUT
[
idx
+
1
]
-
expLUT
[
idx
]));
sum_b
+=
b
*
w
;
sum_g
+=
g
*
w
;
sum_r
+=
r
*
w
;
sum_b
+=
b
*
w
;
sum_g
+=
g
*
w
;
sum_r
+=
r
*
w
;
wsum
+=
w
;
wsum
+=
w
;
}
}
wsum
=
1.
f
/
wsum
;
wsum
=
1.
f
/
wsum
;
b0
=
cvRound
(
sum_b
*
wsum
);
b0
=
sum_b
*
wsum
;
g0
=
cvRound
(
sum_g
*
wsum
);
g0
=
sum_g
*
wsum
;
r0
=
cvRound
(
sum_r
*
wsum
);
r0
=
sum_r
*
wsum
;
dptr
[
j
]
=
(
uchar
)
b0
;
dptr
[
j
+
1
]
=
(
uchar
)
g0
;
dptr
[
j
+
2
]
=
(
uchar
)
r0
;
dptr
[
j
]
=
b0
;
dptr
[
j
+
1
]
=
g0
;
dptr
[
j
+
2
]
=
r0
;
}
}
}
}
}
}
}
}
private
:
int
cn
,
radius
,
maxk
,
*
space_ofs
;
Mat
temp
,
*
dest
;
Size
size
;
float
scale_index
,
*
space_weight
,
*
expLUT
;
};
static
void
static
void
bilateralFilter_32f
(
const
Mat
&
src
,
Mat
&
dst
,
int
d
,
bilateralFilter_32f
(
const
Mat
&
src
,
Mat
&
dst
,
int
d
,
...
@@ -1396,7 +1501,7 @@ bilateralFilter_32f( const Mat& src, Mat& dst, int d,
...
@@ -1396,7 +1501,7 @@ bilateralFilter_32f( const Mat& src, Mat& dst, int d,
int
borderType
)
int
borderType
)
{
{
int
cn
=
src
.
channels
();
int
cn
=
src
.
channels
();
int
i
,
j
,
k
,
maxk
,
radius
;
int
i
,
j
,
maxk
,
radius
;
double
minValSrc
=-
1
,
maxValSrc
=
1
;
double
minValSrc
=-
1
,
maxValSrc
=
1
;
const
int
kExpNumBinsPerChannel
=
1
<<
12
;
const
int
kExpNumBinsPerChannel
=
1
<<
12
;
int
kExpNumBins
=
0
;
int
kExpNumBins
=
0
;
...
@@ -1474,57 +1579,10 @@ bilateralFilter_32f( const Mat& src, Mat& dst, int d,
...
@@ -1474,57 +1579,10 @@ bilateralFilter_32f( const Mat& src, Mat& dst, int d,
space_ofs
[
maxk
++
]
=
(
int
)(
i
*
(
temp
.
step
/
sizeof
(
float
))
+
j
*
cn
);
space_ofs
[
maxk
++
]
=
(
int
)(
i
*
(
temp
.
step
/
sizeof
(
float
))
+
j
*
cn
);
}
}
for
(
i
=
0
;
i
<
size
.
height
;
i
++
)
// parallel_for usage
{
const
float
*
sptr
=
(
const
float
*
)(
temp
.
data
+
(
i
+
radius
)
*
temp
.
step
)
+
radius
*
cn
;
float
*
dptr
=
(
float
*
)(
dst
.
data
+
i
*
dst
.
step
);
if
(
cn
==
1
)
BilateralFilter_32f_Invoker
body
(
cn
,
radius
,
maxk
,
space_ofs
,
temp
,
&
dst
,
size
,
scale_index
,
space_weight
,
expLUT
);
{
parallel_for_
(
Range
(
0
,
size
.
height
),
body
);
for
(
j
=
0
;
j
<
size
.
width
;
j
++
)
{
float
sum
=
0
,
wsum
=
0
;
float
val0
=
sptr
[
j
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
float
val
=
sptr
[
j
+
space_ofs
[
k
]];
float
alpha
=
(
float
)(
std
::
abs
(
val
-
val0
)
*
scale_index
);
int
idx
=
cvFloor
(
alpha
);
alpha
-=
idx
;
float
w
=
space_weight
[
k
]
*
(
expLUT
[
idx
]
+
alpha
*
(
expLUT
[
idx
+
1
]
-
expLUT
[
idx
]));
sum
+=
val
*
w
;
wsum
+=
w
;
}
dptr
[
j
]
=
(
float
)(
sum
/
wsum
);
}
}
else
{
assert
(
cn
==
3
);
for
(
j
=
0
;
j
<
size
.
width
*
3
;
j
+=
3
)
{
float
sum_b
=
0
,
sum_g
=
0
,
sum_r
=
0
,
wsum
=
0
;
float
b0
=
sptr
[
j
],
g0
=
sptr
[
j
+
1
],
r0
=
sptr
[
j
+
2
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
const
float
*
sptr_k
=
sptr
+
j
+
space_ofs
[
k
];
float
b
=
sptr_k
[
0
],
g
=
sptr_k
[
1
],
r
=
sptr_k
[
2
];
float
alpha
=
(
float
)((
std
::
abs
(
b
-
b0
)
+
std
::
abs
(
g
-
g0
)
+
std
::
abs
(
r
-
r0
))
*
scale_index
);
int
idx
=
cvFloor
(
alpha
);
alpha
-=
idx
;
float
w
=
space_weight
[
k
]
*
(
expLUT
[
idx
]
+
alpha
*
(
expLUT
[
idx
+
1
]
-
expLUT
[
idx
]));
sum_b
+=
b
*
w
;
sum_g
+=
g
*
w
;
sum_r
+=
r
*
w
;
wsum
+=
w
;
}
wsum
=
1.
f
/
wsum
;
b0
=
sum_b
*
wsum
;
g0
=
sum_g
*
wsum
;
r0
=
sum_r
*
wsum
;
dptr
[
j
]
=
b0
;
dptr
[
j
+
1
]
=
g0
;
dptr
[
j
+
2
]
=
r0
;
}
}
}
}
}
}
}
...
...
modules/imgproc/test/test_bilateral_filter.cpp
0 → 100644
View file @
687d7639
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using
namespace
cv
;
namespace
cvtest
{
class
CV_BilateralFilterTest
:
public
cvtest
::
BaseTest
{
public
:
enum
{
MAX_WIDTH
=
1920
,
MIN_WIDTH
=
1
,
MAX_HEIGHT
=
1080
,
MIN_HEIGHT
=
1
};
CV_BilateralFilterTest
();
~
CV_BilateralFilterTest
();
protected
:
virtual
void
run_func
();
virtual
int
prepare_test_case
(
int
test_case_index
);
virtual
int
validate_test_results
(
int
test_case_index
);
private
:
void
reference_bilateral_filter
(
const
Mat
&
src
,
Mat
&
dst
,
int
d
,
double
sigma_color
,
double
sigma_space
,
int
borderType
=
BORDER_DEFAULT
);
int
getRandInt
(
RNG
&
rng
,
int
min_value
,
int
max_value
)
const
;
double
_sigma_color
;
double
_sigma_space
;
Mat
_src
;
Mat
_parallel_dst
;
int
_d
;
};
CV_BilateralFilterTest
::
CV_BilateralFilterTest
()
:
cvtest
::
BaseTest
(),
_src
(),
_parallel_dst
(),
_d
()
{
test_case_count
=
1000
;
}
CV_BilateralFilterTest
::~
CV_BilateralFilterTest
()
{
}
int
CV_BilateralFilterTest
::
getRandInt
(
RNG
&
rng
,
int
min_value
,
int
max_value
)
const
{
double
rand_value
=
rng
.
uniform
(
log
(
min_value
),
log
(
max_value
+
1
));
return
cvRound
(
exp
(
rand_value
));
}
void
CV_BilateralFilterTest
::
reference_bilateral_filter
(
const
Mat
&
src
,
Mat
&
dst
,
int
d
,
double
sigma_color
,
double
sigma_space
,
int
borderType
)
{
int
cn
=
src
.
channels
();
int
i
,
j
,
k
,
maxk
,
radius
;
double
minValSrc
=
-
1
,
maxValSrc
=
1
;
const
int
kExpNumBinsPerChannel
=
1
<<
12
;
int
kExpNumBins
=
0
;
float
lastExpVal
=
1.
f
;
float
len
,
scale_index
;
Size
size
=
src
.
size
();
dst
.
create
(
size
,
src
.
type
());
CV_Assert
(
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC3
)
&&
src
.
type
()
==
dst
.
type
()
&&
src
.
size
()
==
dst
.
size
()
&&
src
.
data
!=
dst
.
data
);
if
(
sigma_color
<=
0
)
sigma_color
=
1
;
if
(
sigma_space
<=
0
)
sigma_space
=
1
;
double
gauss_color_coeff
=
-
0.5
/
(
sigma_color
*
sigma_color
);
double
gauss_space_coeff
=
-
0.5
/
(
sigma_space
*
sigma_space
);
if
(
d
<=
0
)
radius
=
cvRound
(
sigma_space
*
1.5
);
else
radius
=
d
/
2
;
radius
=
MAX
(
radius
,
1
);
d
=
radius
*
2
+
1
;
// compute the min/max range for the input image (even if multichannel)
minMaxLoc
(
src
.
reshape
(
1
),
&
minValSrc
,
&
maxValSrc
);
if
(
std
::
abs
(
minValSrc
-
maxValSrc
)
<
FLT_EPSILON
)
{
src
.
copyTo
(
dst
);
return
;
}
// temporary copy of the image with borders for easy processing
Mat
temp
;
copyMakeBorder
(
src
,
temp
,
radius
,
radius
,
radius
,
radius
,
borderType
);
patchNaNs
(
temp
);
// allocate lookup tables
vector
<
float
>
_space_weight
(
d
*
d
);
vector
<
int
>
_space_ofs
(
d
*
d
);
float
*
space_weight
=
&
_space_weight
[
0
];
int
*
space_ofs
=
&
_space_ofs
[
0
];
// assign a length which is slightly more than needed
len
=
(
float
)(
maxValSrc
-
minValSrc
)
*
cn
;
kExpNumBins
=
kExpNumBinsPerChannel
*
cn
;
vector
<
float
>
_expLUT
(
kExpNumBins
+
2
);
float
*
expLUT
=
&
_expLUT
[
0
];
scale_index
=
kExpNumBins
/
len
;
// initialize the exp LUT
for
(
i
=
0
;
i
<
kExpNumBins
+
2
;
i
++
)
{
if
(
lastExpVal
>
0.
f
)
{
double
val
=
i
/
scale_index
;
expLUT
[
i
]
=
(
float
)
std
::
exp
(
val
*
val
*
gauss_color_coeff
);
lastExpVal
=
expLUT
[
i
];
}
else
expLUT
[
i
]
=
0.
f
;
}
// initialize space-related bilateral filter coefficients
for
(
i
=
-
radius
,
maxk
=
0
;
i
<=
radius
;
i
++
)
for
(
j
=
-
radius
;
j
<=
radius
;
j
++
)
{
double
r
=
std
::
sqrt
((
double
)
i
*
i
+
(
double
)
j
*
j
);
if
(
r
>
radius
)
continue
;
space_weight
[
maxk
]
=
(
float
)
std
::
exp
(
r
*
r
*
gauss_space_coeff
);
space_ofs
[
maxk
++
]
=
(
int
)(
i
*
(
temp
.
step
/
sizeof
(
float
))
+
j
*
cn
);
}
for
(
i
=
0
;
i
<
size
.
height
;
i
++
)
{
const
float
*
sptr
=
(
const
float
*
)(
temp
.
data
+
(
i
+
radius
)
*
temp
.
step
)
+
radius
*
cn
;
float
*
dptr
=
(
float
*
)(
dst
.
data
+
i
*
dst
.
step
);
if
(
cn
==
1
)
{
for
(
j
=
0
;
j
<
size
.
width
;
j
++
)
{
float
sum
=
0
,
wsum
=
0
;
float
val0
=
sptr
[
j
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
float
val
=
sptr
[
j
+
space_ofs
[
k
]];
float
alpha
=
(
float
)(
std
::
abs
(
val
-
val0
)
*
scale_index
);
int
idx
=
cvFloor
(
alpha
);
alpha
-=
idx
;
float
w
=
space_weight
[
k
]
*
(
expLUT
[
idx
]
+
alpha
*
(
expLUT
[
idx
+
1
]
-
expLUT
[
idx
]));
sum
+=
val
*
w
;
wsum
+=
w
;
}
dptr
[
j
]
=
(
float
)(
sum
/
wsum
);
}
}
else
{
assert
(
cn
==
3
);
for
(
j
=
0
;
j
<
size
.
width
*
3
;
j
+=
3
)
{
float
sum_b
=
0
,
sum_g
=
0
,
sum_r
=
0
,
wsum
=
0
;
float
b0
=
sptr
[
j
],
g0
=
sptr
[
j
+
1
],
r0
=
sptr
[
j
+
2
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
const
float
*
sptr_k
=
sptr
+
j
+
space_ofs
[
k
];
float
b
=
sptr_k
[
0
],
g
=
sptr_k
[
1
],
r
=
sptr_k
[
2
];
float
alpha
=
(
float
)((
std
::
abs
(
b
-
b0
)
+
std
::
abs
(
g
-
g0
)
+
std
::
abs
(
r
-
r0
))
*
scale_index
);
int
idx
=
cvFloor
(
alpha
);
alpha
-=
idx
;
float
w
=
space_weight
[
k
]
*
(
expLUT
[
idx
]
+
alpha
*
(
expLUT
[
idx
+
1
]
-
expLUT
[
idx
]));
sum_b
+=
b
*
w
;
sum_g
+=
g
*
w
;
sum_r
+=
r
*
w
;
wsum
+=
w
;
}
wsum
=
1.
f
/
wsum
;
b0
=
sum_b
*
wsum
;
g0
=
sum_g
*
wsum
;
r0
=
sum_r
*
wsum
;
dptr
[
j
]
=
b0
;
dptr
[
j
+
1
]
=
g0
;
dptr
[
j
+
2
]
=
r0
;
}
}
}
}
int
CV_BilateralFilterTest
::
prepare_test_case
(
int
/* test_case_index */
)
{
const
static
int
types
[]
=
{
CV_32FC1
,
CV_32FC3
,
CV_8UC1
,
CV_8UC3
};
RNG
&
rng
=
ts
->
get_rng
();
Size
size
(
getRandInt
(
rng
,
MIN_WIDTH
,
MAX_WIDTH
),
getRandInt
(
rng
,
MIN_HEIGHT
,
MAX_HEIGHT
));
int
type
=
types
[
rng
(
sizeof
(
types
)
/
sizeof
(
types
[
0
]))];
_d
=
rng
.
uniform
(
0.
,
1.
)
>
0.5
?
5
:
3
;
_src
.
create
(
size
,
type
);
rng
.
fill
(
_src
,
RNG
::
UNIFORM
,
0
,
256
);
_sigma_color
=
_sigma_space
=
1.
;
return
1
;
}
int
CV_BilateralFilterTest
::
validate_test_results
(
int
test_case_index
)
{
static
const
double
eps
=
1
;
Mat
reference_dst
,
reference_src
;
if
(
_src
.
depth
()
==
CV_32F
)
reference_bilateral_filter
(
_src
,
reference_dst
,
_d
,
_sigma_color
,
_sigma_space
);
else
{
int
type
=
_src
.
type
();
_src
.
convertTo
(
reference_src
,
CV_32F
);
reference_bilateral_filter
(
reference_src
,
reference_dst
,
_d
,
_sigma_color
,
_sigma_space
);
reference_dst
.
convertTo
(
reference_dst
,
type
);
}
double
e
=
norm
(
reference_dst
,
_parallel_dst
);
if
(
e
>
eps
)
{
ts
->
printf
(
cvtest
::
TS
::
CONSOLE
,
"actual error: %g, expected: %g"
,
e
,
eps
);
ts
->
set_failed_test_info
(
cvtest
::
TS
::
FAIL_BAD_ACCURACY
);
}
else
ts
->
set_failed_test_info
(
cvtest
::
TS
::
OK
);
return
BaseTest
::
validate_test_results
(
test_case_index
);
}
void
CV_BilateralFilterTest
::
run_func
()
{
bilateralFilter
(
_src
,
_parallel_dst
,
_d
,
_sigma_color
,
_sigma_space
);
}
TEST
(
Imgproc_BilateralFilter
,
accuracy
)
{
CV_BilateralFilterTest
test
;
test
.
safe_run
();
}
}
// end of namespace cvtest
modules/ocl/include/opencv2/ocl/matrix_operations.hpp
View file @
687d7639
...
@@ -49,7 +49,7 @@ namespace cv
...
@@ -49,7 +49,7 @@ namespace cv
namespace
ocl
namespace
ocl
{
{
////////////////////////////////////OpenCL kernel strings//////////////////////////
////////////////////////////////////OpenCL kernel strings//////////////////////////
extern
const
char
*
convertC3C4
;
//
extern const char *convertC3C4;
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
//////////////////////////////// oclMat ////////////////////////////////
//////////////////////////////// oclMat ////////////////////////////////
...
...
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
687d7639
...
@@ -49,6 +49,7 @@
...
@@ -49,6 +49,7 @@
#include "opencv2/core/core.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/features2d/features2d.hpp"
namespace
cv
namespace
cv
{
{
...
...
modules/ocl/src/arithm.cpp
View file @
687d7639
...
@@ -455,13 +455,12 @@ void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, doub
...
@@ -455,13 +455,12 @@ void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, doub
}
}
void
cv
::
ocl
::
divide
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
double
scalar
)
void
cv
::
ocl
::
divide
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
double
scalar
)
{
{
if
(
src1
.
clCxt
->
impl
->
double_support
==
0
)
{
CV_Error
(
-
217
,
"Selected device don't support double
\r\n
"
);
return
;
}
if
(
src1
.
clCxt
->
impl
->
double_support
!=
0
)
arithmetic_run
<
double
>
(
src1
,
src2
,
dst
,
"arithm_div"
,
&
arithm_div
,
(
void
*
)(
&
scalar
));
arithmetic_run
<
double
>
(
src1
,
src2
,
dst
,
"arithm_div"
,
&
arithm_div
,
(
void
*
)(
&
scalar
));
else
arithmetic_run
<
float
>
(
src1
,
src2
,
dst
,
"arithm_div"
,
&
arithm_div
,
(
void
*
)(
&
scalar
));
}
}
template
<
typename
WT
,
typename
CL_WT
>
template
<
typename
WT
,
typename
CL_WT
>
void
arithmetic_scalar_run
(
const
oclMat
&
src1
,
const
Scalar
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
,
string
kernelName
,
const
char
**
kernelString
,
int
isMatSubScalar
)
void
arithmetic_scalar_run
(
const
oclMat
&
src1
,
const
Scalar
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
,
string
kernelName
,
const
char
**
kernelString
,
int
isMatSubScalar
)
...
@@ -579,7 +578,14 @@ void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, co
...
@@ -579,7 +578,14 @@ void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, co
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
if
(
src
.
clCxt
->
impl
->
double_support
!=
0
)
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
scalar
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
scalar
));
else
{
float
f_scalar
=
(
float
)
scalar
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
f_scalar
));
}
openCLExecuteKernel
(
clCxt
,
kernelString
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
openCLExecuteKernel
(
clCxt
,
kernelString
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
}
}
...
@@ -1253,7 +1259,11 @@ void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, c
...
@@ -1253,7 +1259,11 @@ void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, c
CV_Assert
(
src
.
type
()
==
CV_32F
||
src
.
type
()
==
CV_64F
);
CV_Assert
(
src
.
type
()
==
CV_32F
||
src
.
type
()
==
CV_64F
);
Context
*
clCxt
=
src
.
clCxt
;
Context
*
clCxt
=
src
.
clCxt
;
if
(
clCxt
->
impl
->
double_support
==
0
&&
src
.
type
()
==
CV_64F
)
{
CV_Error
(
-
217
,
"Selected device don't support double
\r\n
"
);
return
;
}
//int channels = dst.channels();
//int channels = dst.channels();
int
depth
=
dst
.
depth
();
int
depth
=
dst
.
depth
();
...
@@ -2199,43 +2209,33 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
...
@@ -2199,43 +2209,33 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
int
dst_step1
=
dst
.
cols
*
dst
.
elemSize
();
int
dst_step1
=
dst
.
cols
*
dst
.
elemSize
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
if
(
sizeof
(
double
)
==
8
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
alpha
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src2
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src2
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
beta
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
offset
));
if
(
src1
.
clCxt
->
impl
->
double_support
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
alpha
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
beta
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
gama
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
gama
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
}
}
else
else
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
alpha
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
alpha
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src2
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
beta
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
beta
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
gama
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
gama
));
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
}
openCLExecuteKernel
(
clCxt
,
&
arithm_addWeighted
,
"addWeighted"
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
openCLExecuteKernel
(
clCxt
,
&
arithm_addWeighted
,
"addWeighted"
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
}
}
...
...
modules/ocl/src/imgproc.cpp
View file @
687d7639
...
@@ -410,7 +410,11 @@ namespace cv
...
@@ -410,7 +410,11 @@ namespace cv
float
ify
=
1.
/
fy
;
float
ify
=
1.
/
fy
;
double
ifx_d
=
1.
/
fx
;
double
ifx_d
=
1.
/
fx
;
double
ify_d
=
1.
/
fy
;
double
ify_d
=
1.
/
fy
;
int
srcStep_in_pixel
=
src
.
step1
()
/
src
.
channels
();
int
srcoffset_in_pixel
=
src
.
offset
/
src
.
elemSize
();
int
dstStep_in_pixel
=
dst
.
step1
()
/
dst
.
channels
();
int
dstoffset_in_pixel
=
dst
.
offset
/
dst
.
elemSize
();
//printf("%d %d\n",src.step1() , dst.elemSize());
string
kernelName
;
string
kernelName
;
if
(
interpolation
==
INTER_LINEAR
)
if
(
interpolation
==
INTER_LINEAR
)
kernelName
=
"resizeLN"
;
kernelName
=
"resizeLN"
;
...
@@ -438,25 +442,33 @@ namespace cv
...
@@ -438,25 +442,33 @@ namespace cv
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
offset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
offset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
Step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
Step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
rows
));
if
(
src
.
clCxt
->
impl
->
double_support
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ifx_d
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ifx_d
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ify_d
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ify_d
));
}
}
else
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ifx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ify
));
}
}
else
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
offset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
offset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
Step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
Step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
...
...
modules/ocl/src/initialization.cpp
View file @
687d7639
...
@@ -378,22 +378,38 @@ namespace cv
...
@@ -378,22 +378,38 @@ namespace cv
void
openCLMemcpy2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
void
openCLMemcpy2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
const
void
*
src
,
size_t
spitch
,
const
void
*
src
,
size_t
spitch
,
size_t
width
,
size_t
height
,
enum
openCLMemcpyKind
kind
)
size_t
width
,
size_t
height
,
enum
openCLMemcpyKind
kind
,
int
channels
)
{
{
size_t
buffer_origin
[
3
]
=
{
0
,
0
,
0
};
size_t
buffer_origin
[
3
]
=
{
0
,
0
,
0
};
size_t
host_origin
[
3
]
=
{
0
,
0
,
0
};
size_t
host_origin
[
3
]
=
{
0
,
0
,
0
};
size_t
region
[
3
]
=
{
width
,
height
,
1
};
size_t
region
[
3
]
=
{
width
,
height
,
1
};
if
(
kind
==
clMemcpyHostToDevice
)
if
(
kind
==
clMemcpyHostToDevice
)
{
if
(
dpitch
==
width
||
channels
==
3
)
{
openCLSafeCall
(
clEnqueueWriteBuffer
(
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
dst
,
CL_TRUE
,
0
,
width
*
height
,
src
,
0
,
NULL
,
NULL
));
}
else
{
{
openCLSafeCall
(
clEnqueueWriteBufferRect
(
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
dst
,
CL_TRUE
,
openCLSafeCall
(
clEnqueueWriteBufferRect
(
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
dst
,
CL_TRUE
,
buffer_origin
,
host_origin
,
region
,
dpitch
,
0
,
spitch
,
0
,
src
,
0
,
0
,
0
));
buffer_origin
,
host_origin
,
region
,
dpitch
,
0
,
spitch
,
0
,
src
,
0
,
0
,
0
));
}
}
}
else
if
(
kind
==
clMemcpyDeviceToHost
)
else
if
(
kind
==
clMemcpyDeviceToHost
)
{
if
(
spitch
==
width
||
channels
==
3
)
{
openCLSafeCall
(
clEnqueueReadBuffer
(
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
src
,
CL_TRUE
,
0
,
width
*
height
,
dst
,
0
,
NULL
,
NULL
));
}
else
{
{
openCLSafeCall
(
clEnqueueReadBufferRect
(
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
src
,
CL_TRUE
,
openCLSafeCall
(
clEnqueueReadBufferRect
(
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
src
,
CL_TRUE
,
buffer_origin
,
host_origin
,
region
,
spitch
,
0
,
dpitch
,
0
,
dst
,
0
,
0
,
0
));
buffer_origin
,
host_origin
,
region
,
spitch
,
0
,
dpitch
,
0
,
dst
,
0
,
0
,
0
));
}
}
}
}
}
void
openCLCopyBuffer2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
int
dst_offset
,
void
openCLCopyBuffer2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
int
dst_offset
,
const
void
*
src
,
size_t
spitch
,
const
void
*
src
,
size_t
spitch
,
...
...
modules/ocl/src/kernels/arithm_addWeighted.cl
View file @
687d7639
...
@@ -51,9 +51,9 @@ typedef float F;
...
@@ -51,9 +51,9 @@ typedef float F;
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////addWeighted//////////////////////////////////////////////
/////////////////////////////////////////////addWeighted//////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
addWeighted_D0
(
__global
uchar
*src1,
F
alpha,
int
src1_step,int
src1_offset,
__kernel
void
addWeighted_D0
(
__global
uchar
*src1,int
src1_step,int
src1_offset,
__global
uchar
*src2,
F
beta,
int
src2_step,int
src2_offset,
__global
uchar
*src2,
int
src2_step,int
src2_offset,
F
gama,
F
alpha,F
beta,F
gama,
__global
uchar
*dst,
int
dst_step,int
dst_offset,
__global
uchar
*dst,
int
dst_step,int
dst_offset,
int
rows,
int
cols,int
dst_step1
)
int
rows,
int
cols,int
dst_step1
)
{
{
...
@@ -99,9 +99,9 @@ __kernel void addWeighted_D0 (__global uchar *src1, F alpha,int src1_step,int sr
...
@@ -99,9 +99,9 @@ __kernel void addWeighted_D0 (__global uchar *src1, F alpha,int src1_step,int sr
__kernel
void
addWeighted_D2
(
__global
ushort
*src1,
F
alpha,
int
src1_step,int
src1_offset,
__kernel
void
addWeighted_D2
(
__global
ushort
*src1,
int
src1_step,int
src1_offset,
__global
ushort
*src2,
F
beta,
int
src2_step,int
src2_offset,
__global
ushort
*src2,
int
src2_step,int
src2_offset,
F
gama,
F
alpha,F
beta,F
gama,
__global
ushort
*dst,
int
dst_step,int
dst_offset,
__global
ushort
*dst,
int
dst_step,int
dst_offset,
int
rows,
int
cols,int
dst_step1
)
int
rows,
int
cols,int
dst_step1
)
{
{
...
@@ -145,9 +145,9 @@ __kernel void addWeighted_D2 (__global ushort *src1, F alpha,int src1_step,int s
...
@@ -145,9 +145,9 @@ __kernel void addWeighted_D2 (__global ushort *src1, F alpha,int src1_step,int s
}
}
__kernel
void
addWeighted_D3
(
__global
short
*src1,
F
alpha,
int
src1_step,int
src1_offset,
__kernel
void
addWeighted_D3
(
__global
short
*src1,
int
src1_step,int
src1_offset,
__global
short
*src2,
F
beta,
int
src2_step,int
src2_offset,
__global
short
*src2,
int
src2_step,int
src2_offset,
F
gama,
F
alpha,F
beta,F
gama,
__global
short
*dst,
int
dst_step,int
dst_offset,
__global
short
*dst,
int
dst_step,int
dst_offset,
int
rows,
int
cols,int
dst_step1
)
int
rows,
int
cols,int
dst_step1
)
{
{
...
@@ -190,9 +190,9 @@ __kernel void addWeighted_D3 (__global short *src1, F alpha,int src1_step,int sr
...
@@ -190,9 +190,9 @@ __kernel void addWeighted_D3 (__global short *src1, F alpha,int src1_step,int sr
}
}
__kernel
void
addWeighted_D4
(
__global
int
*src1,
F
alpha,
int
src1_step,int
src1_offset,
__kernel
void
addWeighted_D4
(
__global
int
*src1,
int
src1_step,int
src1_offset,
__global
int
*src2,
F
beta,
int
src2_step,int
src2_offset,
__global
int
*src2,
int
src2_step,int
src2_offset,
F
gama,
F
alpha,F
beta,
F
gama,
__global
int
*dst,
int
dst_step,int
dst_offset,
__global
int
*dst,
int
dst_step,int
dst_offset,
int
rows,
int
cols,int
dst_step1
)
int
rows,
int
cols,int
dst_step1
)
{
{
...
@@ -238,9 +238,9 @@ __kernel void addWeighted_D4 (__global int *src1, F alpha,int src1_step,int src1
...
@@ -238,9 +238,9 @@ __kernel void addWeighted_D4 (__global int *src1, F alpha,int src1_step,int src1
}
}
__kernel
void
addWeighted_D5
(
__global
float
*src1,
F
alpha,
int
src1_step,int
src1_offset,
__kernel
void
addWeighted_D5
(
__global
float
*src1,int
src1_step,int
src1_offset,
__global
float
*src2,
F
beta,
int
src2_step,int
src2_offset,
__global
float
*src2,
int
src2_step,int
src2_offset,
F
gama,
F
alpha,F
beta,
F
gama,
__global
float
*dst,
int
dst_step,int
dst_offset,
__global
float
*dst,
int
dst_step,int
dst_offset,
int
rows,
int
cols,int
dst_step1
)
int
rows,
int
cols,int
dst_step1
)
{
{
...
@@ -286,9 +286,9 @@ __kernel void addWeighted_D5 (__global float *src1, F alpha,int src1_step,int sr
...
@@ -286,9 +286,9 @@ __kernel void addWeighted_D5 (__global float *src1, F alpha,int src1_step,int sr
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
addWeighted_D6
(
__global
double
*src1,
F
alpha,
int
src1_step,int
src1_offset,
__kernel
void
addWeighted_D6
(
__global
double
*src1,
int
src1_step,int
src1_offset,
__global
double
*src2,
F
beta,
int
src2_step,int
src2_offset,
__global
double
*src2,
int
src2_step,int
src2_offset,
F
gama,
F
alpha,F
beta,
F
gama,
__global
double
*dst,
int
dst_step,int
dst_offset,
__global
double
*dst,
int
dst_step,int
dst_offset,
int
rows,
int
cols,int
dst_step1
)
int
rows,
int
cols,int
dst_step1
)
{
{
...
...
modules/ocl/src/kernels/arithm_cartToPolar.cl
View file @
687d7639
...
@@ -49,6 +49,10 @@
...
@@ -49,6 +49,10 @@
#
define
CV_PI
3.1415926535897932384626433832795
#
define
CV_PI
3.1415926535897932384626433832795
#
ifndef
DBL_EPSILON
#
define
DBL_EPSILON
0x1.0p-52
#
endif
__kernel
void
arithm_cartToPolar_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_cartToPolar_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
__global
float
*dst1,
int
dst1_step,
int
dst1_offset,
//magnitude
__global
float
*dst1,
int
dst1_step,
int
dst1_offset,
//magnitude
...
...
modules/ocl/src/kernels/arithm_div.cl
View file @
687d7639
...
@@ -45,36 +45,45 @@
...
@@ -45,36 +45,45 @@
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
defined
(
DOUBLE_SUPPORT
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
typedef
double
F
;
typedef
double4
F4
;
#
define
convert_F4
convert_double4
#
define
convert_F
convert_double
#
else
typedef
float
F
;
typedef
float4
F4
;
#
define
convert_F4
convert_float4
#
define
convert_F
convert_float
#
endif
#
endif
uchar
round2_uchar
(
double
v
)
{
uchar
round2_uchar
(
F
v
)
{
uchar
v1
=
convert_uchar_sat
(
v
)
;
uchar
v1
=
convert_uchar_sat
(
round
(
v
)
)
;
uchar
v2
=
convert_uchar_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
//
uchar
v2
=
convert_uchar_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
return
(((
v-v1
)
==0.5
)
&&
(
v1%2==0
))
?
v1
:
v2
;
return
v1
;//
(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
}
}
ushort
round2_ushort
(
double
v
)
{
ushort
round2_ushort
(
F
v
)
{
ushort
v1
=
convert_ushort_sat
(
v
)
;
ushort
v1
=
convert_ushort_sat
(
round
(
v
)
)
;
ushort
v2
=
convert_ushort_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
//
ushort
v2
=
convert_ushort_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
return
(((
v-v1
)
==0.5
)
&&
(
v1%2==0
))
?
v1
:
v2
;
return
v1
;//
(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
}
}
short
round2_short
(
double
v
)
{
short
round2_short
(
F
v
)
{
short
v1
=
convert_short_sat
(
v
)
;
short
v1
=
convert_short_sat
(
round
(
v
)
)
;
short
v2
=
convert_short_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
//
short
v2
=
convert_short_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
return
(((
v-v1
)
==0.5
)
&&
(
v1%2==0
))
?
v1
:
v2
;
return
v1
;//
(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
}
}
int
round2_int
(
double
v
)
{
int
round2_int
(
F
v
)
{
int
v1
=
convert_int_sat
(
v
)
;
int
v1
=
convert_int_sat
(
round
(
v
)
)
;
int
v2
=
convert_int_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
//
int
v2
=
convert_int_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
return
(((
v-v1
)
==0.5
)
&&
(
v1%2==0
))
?
v1
:
v2
;
return
v1
;//
(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
}
}
///////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////
////////////////////////////divide///////////////////////////////////////////////////
////////////////////////////divide///////////////////////////////////////////////////
...
@@ -83,7 +92,7 @@ int round2_int(double v){
...
@@ -83,7 +92,7 @@ int round2_int(double v){
__kernel
void
arithm_div_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_div_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1,
double
scalar
)
int
rows,
int
cols,
int
dst_step1,
F
scalar
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
...
@@ -104,13 +113,13 @@ __kernel void arithm_div_D0 (__global uchar *src1, int src1_step, int src1_offse
...
@@ -104,13 +113,13 @@ __kernel void arithm_div_D0 (__global uchar *src1, int src1_step, int src1_offse
uchar4
src2_data
=
vload4
(
0
,
src2
+
src2_index
)
;
uchar4
src2_data
=
vload4
(
0
,
src2
+
src2_index
)
;
uchar4
dst_data
=
*
((
__global
uchar4
*
)(
dst
+
dst_index
))
;
uchar4
dst_data
=
*
((
__global
uchar4
*
)(
dst
+
dst_index
))
;
double4
tmp
=
convert_double
4
(
src1_data
)
*
scalar
;
F4
tmp
=
convert_F
4
(
src1_data
)
*
scalar
;
uchar4
tmp_data
;
uchar4
tmp_data
;
tmp_data.x
=
((
tmp.x
==
0
)
|
| (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / (
double
)src2_data.x);
tmp_data.x
=
((
tmp.x
==
0
)
|
| (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / (
F
)src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / (
double
)src2_data.y);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / (
F
)src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / (
double
)src2_data.z);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / (
F
)src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / (
double
)src2_data.w);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / (
F
)src2_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
...
@@ -124,7 +133,7 @@ __kernel void arithm_div_D0 (__global uchar *src1, int src1_step, int src1_offse
...
@@ -124,7 +133,7 @@ __kernel void arithm_div_D0 (__global uchar *src1, int src1_step, int src1_offse
__kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1);
...
@@ -145,13 +154,13 @@ __kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offs
...
@@ -145,13 +154,13 @@ __kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offs
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
double4 tmp = convert_double
4(src1_data) * scalar;
F4 tmp = convert_F
4(src1_data) * scalar;
ushort4 tmp_data;
ushort4 tmp_data;
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_ushort(tmp.x / (
double
)src2_data.x);
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_ushort(tmp.x / (
F
)src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_ushort(tmp.y / (
double
)src2_data.y);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_ushort(tmp.y / (
F
)src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_ushort(tmp.z / (
double
)src2_data.z);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_ushort(tmp.z / (
F
)src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_ushort(tmp.w / (
double
)src2_data.w);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_ushort(tmp.w / (
F
)src2_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
...
@@ -164,7 +173,7 @@ __kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offs
...
@@ -164,7 +173,7 @@ __kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offs
__kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global short *src2, int src2_step, int src2_offset,
__global short *dst, int dst_step, int dst_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1);
...
@@ -185,13 +194,13 @@ __kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offse
...
@@ -185,13 +194,13 @@ __kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offse
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
double4 tmp = convert_double
4(src1_data) * scalar;
F4 tmp = convert_F
4(src1_data) * scalar;
short4 tmp_data;
short4 tmp_data;
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_short(tmp.x / (
double
)src2_data.x);
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_short(tmp.x / (
F
)src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_short(tmp.y / (
double
)src2_data.y);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_short(tmp.y / (
F
)src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_short(tmp.z / (
double
)src2_data.z);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_short(tmp.z / (
F
)src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_short(tmp.w / (
double
)src2_data.w);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_short(tmp.w / (
F
)src2_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
...
@@ -206,7 +215,7 @@ __kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offse
...
@@ -206,7 +215,7 @@ __kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offse
__kernel void arithm_div_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_div_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global int *src2, int src2_step, int src2_offset,
__global int *dst, int dst_step, int dst_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1);
...
@@ -220,8 +229,8 @@ __kernel void arithm_div_D4 (__global int *src1, int src1_step, int src1_offset,
...
@@ -220,8 +229,8 @@ __kernel void arithm_div_D4 (__global int *src1, int src1_step, int src1_offset,
int data1 = *((__global int *)((__global char *)src1 + src1_index));
int data1 = *((__global int *)((__global char *)src1 + src1_index));
int data2 = *((__global int *)((__global char *)src2 + src2_index));
int data2 = *((__global int *)((__global char *)src2 + src2_index));
double tmp = convert_double
(data1) * scalar;
F tmp = convert_F
(data1) * scalar;
int tmp_data = (tmp == 0 || data2 == 0) ? 0 : round2_int(tmp / (convert_
double
)(data2));
int tmp_data = (tmp == 0 || data2 == 0) ? 0 : round2_int(tmp / (convert_
F
)(data2));
*((__global int *)((__global char *)dst + dst_index)) =tmp_data;
*((__global int *)((__global char *)dst + dst_index)) =tmp_data;
}
}
...
@@ -230,7 +239,7 @@ __kernel void arithm_div_D4 (__global int *src1, int src1_step, int src1_offset,
...
@@ -230,7 +239,7 @@ __kernel void arithm_div_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_div_D5 (__global float *src1, int src1_step, int src1_offset,
__kernel void arithm_div_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
__global float *src2, int src2_step, int src2_offset,
__global float *dst, int dst_step, int dst_offset,
__global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1);
...
@@ -244,13 +253,14 @@ __kernel void arithm_div_D5 (__global float *src1, int src1_step, int src1_offse
...
@@ -244,13 +253,14 @@ __kernel void arithm_div_D5 (__global float *src1, int src1_step, int src1_offse
float data1 = *((__global float *)((__global char *)src1 + src1_index));
float data1 = *((__global float *)((__global char *)src1 + src1_index));
float data2 = *((__global float *)((__global char *)src2 + src2_index));
float data2 = *((__global float *)((__global char *)src2 + src2_index));
double tmp = convert_double
(data1) * scalar;
F tmp = convert_F
(data1) * scalar;
float tmp_data = (tmp == 0 || data2 == 0) ? 0 : convert_float(tmp / (convert_
double
)(data2));
float tmp_data = (tmp == 0 || data2 == 0) ? 0 : convert_float(tmp / (convert_
F
)(data2));
*((__global float *)((__global char *)dst + dst_index)) = tmp_data;
*((__global float *)((__global char *)dst + dst_index)) = tmp_data;
}
}
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_div_D6 (__global double *src1, int src1_step, int src1_offset,
__kernel void arithm_div_D6 (__global double *src1, int src1_step, int src1_offset,
__global double *src2, int src2_step, int src2_offset,
__global double *src2, int src2_step, int src2_offset,
__global double *dst, int dst_step, int dst_offset,
__global double *dst, int dst_step, int dst_offset,
...
@@ -274,10 +284,11 @@ __kernel void arithm_div_D6 (__global double *src1, int src1_step, int src1_offs
...
@@ -274,10 +284,11 @@ __kernel void arithm_div_D6 (__global double *src1, int src1_step, int src1_offs
*((__global double *)((__global char *)dst + dst_index)) = tmp_data;
*((__global double *)((__global char *)dst + dst_index)) = tmp_data;
}
}
}
}
#endif
/************************************div with scalar************************************/
/************************************div with scalar************************************/
__kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset,
__kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1);
...
@@ -297,10 +308,10 @@ __kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset
...
@@ -297,10 +308,10 @@ __kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data;
uchar4 tmp_data;
tmp_data.x = ((scalar == 0) || (src_data.x == 0)) ? 0 : round2_uchar(scalar / (
double
)src_data.x);
tmp_data.x = ((scalar == 0) || (src_data.x == 0)) ? 0 : round2_uchar(scalar / (
F
)src_data.x);
tmp_data.y = ((scalar == 0) || (src_data.y == 0)) ? 0 : round2_uchar(scalar / (
double
)src_data.y);
tmp_data.y = ((scalar == 0) || (src_data.y == 0)) ? 0 : round2_uchar(scalar / (
F
)src_data.y);
tmp_data.z = ((scalar == 0) || (src_data.z == 0)) ? 0 : round2_uchar(scalar / (
double
)src_data.z);
tmp_data.z = ((scalar == 0) || (src_data.z == 0)) ? 0 : round2_uchar(scalar / (
F
)src_data.z);
tmp_data.w = ((scalar == 0) || (src_data.w == 0)) ? 0 : round2_uchar(scalar / (
double
)src_data.w);
tmp_data.w = ((scalar == 0) || (src_data.w == 0)) ? 0 : round2_uchar(scalar / (
F
)src_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
...
@@ -313,7 +324,7 @@ __kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset
...
@@ -313,7 +324,7 @@ __kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset
__kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offset,
__kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1);
...
@@ -333,10 +344,10 @@ __kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offse
...
@@ -333,10 +344,10 @@ __kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offse
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
ushort4 tmp_data;
ushort4 tmp_data;
tmp_data.x = ((scalar == 0) || (src_data.x == 0)) ? 0 : round2_ushort(scalar / (
double
)src_data.x);
tmp_data.x = ((scalar == 0) || (src_data.x == 0)) ? 0 : round2_ushort(scalar / (
F
)src_data.x);
tmp_data.y = ((scalar == 0) || (src_data.y == 0)) ? 0 : round2_ushort(scalar / (
double
)src_data.y);
tmp_data.y = ((scalar == 0) || (src_data.y == 0)) ? 0 : round2_ushort(scalar / (
F
)src_data.y);
tmp_data.z = ((scalar == 0) || (src_data.z == 0)) ? 0 : round2_ushort(scalar / (
double
)src_data.z);
tmp_data.z = ((scalar == 0) || (src_data.z == 0)) ? 0 : round2_ushort(scalar / (
F
)src_data.z);
tmp_data.w = ((scalar == 0) || (src_data.w == 0)) ? 0 : round2_ushort(scalar / (
double
)src_data.w);
tmp_data.w = ((scalar == 0) || (src_data.w == 0)) ? 0 : round2_ushort(scalar / (
F
)src_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
...
@@ -348,7 +359,7 @@ __kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offse
...
@@ -348,7 +359,7 @@ __kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offse
}
}
__kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset,
__kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset,
__global short *dst, int dst_step, int dst_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1);
...
@@ -368,10 +379,10 @@ __kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset
...
@@ -368,10 +379,10 @@ __kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
short4 tmp_data;
short4 tmp_data;
tmp_data.x = ((scalar == 0) || (src_data.x == 0)) ? 0 : round2_short(scalar / (
double
)src_data.x);
tmp_data.x = ((scalar == 0) || (src_data.x == 0)) ? 0 : round2_short(scalar / (
F
)src_data.x);
tmp_data.y = ((scalar == 0) || (src_data.y == 0)) ? 0 : round2_short(scalar / (
double
)src_data.y);
tmp_data.y = ((scalar == 0) || (src_data.y == 0)) ? 0 : round2_short(scalar / (
F
)src_data.y);
tmp_data.z = ((scalar == 0) || (src_data.z == 0)) ? 0 : round2_short(scalar / (
double
)src_data.z);
tmp_data.z = ((scalar == 0) || (src_data.z == 0)) ? 0 : round2_short(scalar / (
F
)src_data.z);
tmp_data.w = ((scalar == 0) || (src_data.w == 0)) ? 0 : round2_short(scalar / (
double
)src_data.w);
tmp_data.w = ((scalar == 0) || (src_data.w == 0)) ? 0 : round2_short(scalar / (
F
)src_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
...
@@ -385,7 +396,7 @@ __kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset
...
@@ -385,7 +396,7 @@ __kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset
__kernel void arithm_s_div_D4 (__global int *src, int src_step, int src_offset,
__kernel void arithm_s_div_D4 (__global int *src, int src_step, int src_offset,
__global int *dst, int dst_step, int dst_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1);
...
@@ -397,7 +408,7 @@ __kernel void arithm_s_div_D4 (__global int *src, int src_step, int src_offset,
...
@@ -397,7 +408,7 @@ __kernel void arithm_s_div_D4 (__global int *src, int src_step, int src_offset,
int data = *((__global int *)((__global char *)src + src_index));
int data = *((__global int *)((__global char *)src + src_index));
int tmp_data = (scalar == 0 || data == 0) ? 0 : round2_int(scalar / (convert_
double
)(data));
int tmp_data = (scalar == 0 || data == 0) ? 0 : round2_int(scalar / (convert_
F
)(data));
*((__global int *)((__global char *)dst + dst_index)) =tmp_data;
*((__global int *)((__global char *)dst + dst_index)) =tmp_data;
}
}
...
@@ -405,7 +416,7 @@ __kernel void arithm_s_div_D4 (__global int *src, int src_step, int src_offset,
...
@@ -405,7 +416,7 @@ __kernel void arithm_s_div_D4 (__global int *src, int src_step, int src_offset,
__kernel void arithm_s_div_D5 (__global float *src, int src_step, int src_offset,
__kernel void arithm_s_div_D5 (__global float *src, int src_step, int src_offset,
__global float *dst, int dst_step, int dst_offset,
__global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1);
...
@@ -417,12 +428,13 @@ __kernel void arithm_s_div_D5 (__global float *src, int src_step, int src_offset
...
@@ -417,12 +428,13 @@ __kernel void arithm_s_div_D5 (__global float *src, int src_step, int src_offset
float data = *((__global float *)((__global char *)src + src_index));
float data = *((__global float *)((__global char *)src + src_index));
float tmp_data = (scalar == 0 || data == 0) ? 0 : convert_float(scalar / (convert_
double
)(data));
float tmp_data = (scalar == 0 || data == 0) ? 0 : convert_float(scalar / (convert_
F
)(data));
*((__global float *)((__global char *)dst + dst_index)) = tmp_data;
*((__global float *)((__global char *)dst + dst_index)) = tmp_data;
}
}
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offset,
__kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offset,
__global double *dst, int dst_step, int dst_offset,
__global double *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1, double scalar)
int rows, int cols, int dst_step1, double scalar)
...
@@ -442,5 +454,6 @@ __kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offse
...
@@ -442,5 +454,6 @@ __kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offse
*
((
__global
double
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp_data
;
*
((
__global
double
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp_data
;
}
}
}
}
#
endif
modules/ocl/src/kernels/arithm_exp.cl
View file @
687d7639
...
@@ -70,6 +70,8 @@ __kernel void arithm_exp_D5(int rows, int cols, int srcStep, int dstStep, int sr
...
@@ -70,6 +70,8 @@ __kernel void arithm_exp_D5(int rows, int cols, int srcStep, int dstStep, int sr
}
}
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_exp_D6
(
int
rows,
int
cols,
int
srcStep,
int
dstStep,
int
srcOffset,
int
dstOffset,
__global
double
*src,
__global
double
*dst
)
__kernel
void
arithm_exp_D6
(
int
rows,
int
cols,
int
srcStep,
int
dstStep,
int
srcOffset,
int
dstOffset,
__global
double
*src,
__global
double
*dst
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
...
@@ -87,3 +89,5 @@ __kernel void arithm_exp_D6(int rows, int cols, int srcStep, int dstStep, int sr
...
@@ -87,3 +89,5 @@ __kernel void arithm_exp_D6(int rows, int cols, int srcStep, int dstStep, int sr
//
dst[dstIdx]
=
exp
(
src[srcIdx]
)
;
//
dst[dstIdx]
=
exp
(
src[srcIdx]
)
;
}
}
}
}
#
endif
modules/ocl/src/kernels/arithm_log.cl
View file @
687d7639
...
@@ -73,7 +73,7 @@ __kernel void arithm_log_D5(int rows, int cols, int srcStep, int dstStep, int sr
...
@@ -73,7 +73,7 @@ __kernel void arithm_log_D5(int rows, int cols, int srcStep, int dstStep, int sr
}
}
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_log_D6
(
int
rows,
int
cols,
int
srcStep,
int
dstStep,
int
srcOffset,
int
dstOffset,
__global
double
*src,
__global
double
*dst
)
__kernel
void
arithm_log_D6
(
int
rows,
int
cols,
int
srcStep,
int
dstStep,
int
srcOffset,
int
dstOffset,
__global
double
*src,
__global
double
*dst
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
...
@@ -91,4 +91,4 @@ __kernel void arithm_log_D6(int rows, int cols, int srcStep, int dstStep, int sr
...
@@ -91,4 +91,4 @@ __kernel void arithm_log_D6(int rows, int cols, int srcStep, int dstStep, int sr
}
}
}
}
#
endif
modules/ocl/src/kernels/convertC3C4.cl
View file @
687d7639
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
//
@Authors
//
@Authors
//
Zero
Lin,
zero.lin@amd
.com
//
Niko
Li,
newlife20080214@gmail
.com
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
...
@@ -32,106 +32,107 @@
...
@@ -32,106 +32,107 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
//
//
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
__kernel
void
convertC3C4
_D0
(
__global
const
char4
*
restrict
src,
__global
char
4
*dst,
int
cols,
int
rows,
__kernel
void
convertC3C4
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE
4
*dst,
int
cols,
int
rows,
int
srcStep,
int
dstStep
)
int
dstStep_in_piexl,int
pixel_end
)
{
{
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
y
=
id
/
cols
;
//int
pixel_end
=
mul24
(
cols
-1
,
rows
-1
)
;
int
x
=
id
%
cols
;
int3
pixelid
=
(
int3
)(
mul24
(
id,3
)
,
mad24
(
id,3,1
)
,
mad24
(
id,3,2
))
;
pixelid
=
clamp
(
pixelid,0,pixel_end
)
;
int
d
=
y
*
srcStep
+
x
*
3
;
GENTYPE4
pixel0,
pixel1,
pixel2,
outpix0,outpix1,outpix2,outpix3
;
char8
data
=
(
char8
)(
src[d>>2],
src[
(
d>>2
)
+
1]
)
;
pixel0
=
src[pixelid.x]
;
char
temp[8]
=
{data.s0,
data.s1,
data.s2,
data.s3,
data.s4,
data.s5,
data.s6,
data.s7}
;
pixel1
=
src[pixelid.y]
;
pixel2
=
src[pixelid.z]
;
int
start
=
d
&
3
;
char4
ndata
=
(
char4
)(
temp[start],
temp[start
+
1],
temp[start
+
2],
0
)
;
if
(
y
<
rows
)
outpix0
=
(
GENTYPE4
)(
pixel0.x,pixel0.y,pixel0.z,0
)
;
dst[y
*
dstStep
+
x]
=
ndata
;
outpix1
=
(
GENTYPE4
)(
pixel0.w,pixel1.x,pixel1.y,0
)
;
outpix2
=
(
GENTYPE4
)(
pixel1.z,pixel1.w,pixel2.x,0
)
;
outpix3
=
(
GENTYPE4
)(
pixel2.y,pixel2.z,pixel2.w,0
)
;
int4
outy
=
(
id<<2
)
/cols
;
int4
outx
=
(
id<<2
)
%cols
;
outx.y++
;
outx.z+=2
;
outx.w+=3
;
outy
=
select
(
outy,outy+1,outx>=cols
)
;
outx
=
select
(
outx,outx-cols,outx>=cols
)
;
//outpix3
=
select
(
outpix3,
outpix0,
(
uchar4
)(
outy.w>=rows
))
;
//outpix2
=
select
(
outpix2,
outpix0,
(
uchar4
)(
outy.z>=rows
))
;
//outpix1
=
select
(
outpix1,
outpix0,
(
uchar4
)(
outy.y>=rows
))
;
//outx
=
select
(
outx,
(
int4
)
outx.x,outy>=rows
)
;
//outy
=
select
(
outy,
(
int4
)
outy.x,outy>=rows
)
;
int4
addr
=
mad24
(
outy,dstStep_in_piexl,outx
)
;
if
(
outx.w<cols
&&
outy.w<rows
)
{
dst[addr.x]
=
outpix0
;
dst[addr.y]
=
outpix1
;
dst[addr.z]
=
outpix2
;
dst[addr.w]
=
outpix3
;
}
else
if
(
outx.z<cols
&&
outy.z<rows
)
{
dst[addr.x]
=
outpix0
;
dst[addr.y]
=
outpix1
;
dst[addr.z]
=
outpix2
;
}
else
if
(
outx.y<cols
&&
outy.y<rows
)
{
dst[addr.x]
=
outpix0
;
dst[addr.y]
=
outpix1
;
}
else
if
(
outx.x<cols
&&
outy.x<rows
)
{
dst[addr.x]
=
outpix0
;
}
}
}
__kernel
void
convertC3C4_D1
(
__global
const
short*
restrict
src,
__global
short4
*dst,
int
cols,
int
rows,
int
srcStep,
int
dstStep
)
{
int
id
=
get_global_id
(
0
)
;
int
y
=
id
/
cols
;
int
x
=
id
%
cols
;
int
d
=
(
y
*
srcStep
+
x
*
6
)
>>1
;
short4
data
=
*
(
__global
short4
*
)(
src
+
((
d>>1
)
<<1
))
;
short
temp[4]
=
{data.s0,
data.s1,
data.s2,
data.s3}
;
int
start
=
d
&
1
;
short4
ndata
=
(
short4
)(
temp[start],
temp[start
+
1],
temp[start
+
2],
0
)
;
if
(
y
<
rows
)
dst[y
*
dstStep
+
x]
=
ndata
;
}
__kernel
void
convertC
3C4_D2
(
__global
const
int
*
restrict
src,
__global
int
4
*dst,
int
cols,
int
rows,
__kernel
void
convertC
4C3
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE
4
*dst,
int
cols,
int
rows,
int
srcStep
,
int
dstStep
)
int
srcStep
_in_pixel,int
pixel_end
)
{
{
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
<<2
;
int
y
=
id
/
cols
;
int
y
=
id
/
cols
;
int
x
=
id
%
cols
;
int
x
=
id
%
cols
;
int4
x4
=
(
int4
)(
x,x+1,x+2,x+3
)
;
int
d
=
(
y
*
srcStep
+
x
*
12
)
>>2
;
int4
y4
=
select
((
int4
)
y,
(
int4
)(
y+1
)
,
x4>=
(
int4
)
cols
)
;
int4
data
=
*
(
__global
int4
*
)(
src
+
d
)
;
x4
=
select
(
x4,x4-
(
int4
)
cols,x4>=
(
int4
)
cols
)
;
data.z
=
0
;
int4
addr
=
mad24
(
y4,
(
int4
)
srcStep_in_pixel,x4
)
;
GENTYPE4
pixel0,pixel1,pixel2,pixel3,
outpixel1,
outpixel2
;
if
(
y
<
rows
)
pixel0
=
src[addr.x]
;
dst[y
*
dstStep
+
x]
=
data
;
pixel1
=
src[addr.y]
;
}
pixel2
=
src[addr.z]
;
pixel3
=
src[addr.w]
;
__kernel
void
convertC4C3_D2
(
__global
const
int4
*
restrict
src,
__global
int
*dst,
int
cols,
int
rows,
int
srcStep,
int
dstStep
)
pixel0.w
=
pixel1.x
;
{
outpixel1.x
=
pixel1.y
;
int
id
=
get_global_id
(
0
)
;
outpixel1.y
=
pixel1.z
;
int
y
=
id
/
cols
;
outpixel1.z
=
pixel2.x
;
int
x
=
id
%
cols
;
outpixel1.w
=
pixel2.y
;
outpixel2.x
=
pixel2.z
;
int4
data
=
src[y
*
srcStep
+
x]
;
outpixel2.y
=
pixel3.x
;
outpixel2.z
=
pixel3.y
;
if
(
y
<
rows
)
outpixel2.w
=
pixel3.z
;
int4
outaddr
=
mul24
(
id>>2
,
3
)
;
outaddr.y++
;
outaddr.z+=2
;
//printf
(
"%d "
,
outaddr.z
)
;
if
(
outaddr.z
<=
pixel_end
)
{
{
int
d
=
y
*
dstStep
+
x
*
3
;
dst[outaddr.x]
=
pixel0
;
dst[d]
=
data.x
;
dst[outaddr.y]
=
outpixel1
;
dst[d
+
1]
=
data.y
;
dst[outaddr.z]
=
outpixel2
;
dst[d
+
2]
=
data.z
;
}
}
}
else
if
(
outaddr.y
<=
pixel_end
)
__kernel
void
convertC4C3_D1
(
__global
const
short4
*
restrict
src,
__global
short
*dst,
int
cols,
int
rows,
int
srcStep,
int
dstStep
)
{
int
id
=
get_global_id
(
0
)
;
int
y
=
id
/
cols
;
int
x
=
id
%
cols
;
short4
data
=
src[y
*
srcStep
+
x]
;
if
(
y
<
rows
)
{
{
int
d
=
y
*
dstStep
+
x
*
3
;
dst[outaddr.x]
=
pixel0
;
dst[d]
=
data.x
;
dst[outaddr.y]
=
outpixel1
;
dst[d
+
1]
=
data.y
;
dst[d
+
2]
=
data.z
;
}
}
}
else
if
(
outaddr.x
<=
pixel_end
)
__kernel
void
convertC4C3_D0
(
__global
const
char4
*
restrict
src,
__global
char
*dst,
int
cols,
int
rows,
int
srcStep,
int
dstStep
)
{
int
id
=
get_global_id
(
0
)
;
int
y
=
id
/
cols
;
int
x
=
id
%
cols
;
char4
data
=
src[y
*
srcStep
+
x]
;
if
(
y
<
rows
)
{
{
int
d
=
y
*
dstStep
+
x
*
3
;
dst[outaddr.x]
=
pixel0
;
dst[d]
=
data.x
;
dst[d
+
1]
=
data.y
;
dst[d
+
2]
=
data.z
;
}
}
}
}
modules/ocl/src/kernels/imgproc_resize.cl
View file @
687d7639
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
//
//
//
@Authors
//
@Authors
//
Zhang
Ying,
zhangying913@gmail.com
//
Zhang
Ying,
zhangying913@gmail.com
//
//
Niko
Li,
newlife20080214@gmail.com
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
...
@@ -50,21 +50,11 @@
...
@@ -50,21 +50,11 @@
#
if
defined
DOUBLE_SUPPORT
#
if
defined
DOUBLE_SUPPORT
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
typedef
double
F
;
#
define
F
double
#
else
#
else
typedef
float
F
;
#
define
F
float
#
endif
#
endif
inline
uint4
getPoint_8uc4
(
__global
uchar4
*
data,
int
offset,
int
x,
int
y,
int
step
)
{
return
convert_uint4
(
data[
(
offset>>2
)
+
y
*
(
step>>2
)
+
x]
)
;
}
inline
float
getPoint_32fc1
(
__global
float
*
data,
int
offset,
int
x,
int
y,
int
step
)
{
return
data[
(
offset>>2
)
+
y
*
(
step>>2
)
+
x]
;
}
#
define
INTER_RESIZE_COEF_BITS
11
#
define
INTER_RESIZE_COEF_BITS
11
#
define
INTER_RESIZE_COEF_SCALE
(
1
<<
INTER_RESIZE_COEF_BITS
)
#
define
INTER_RESIZE_COEF_SCALE
(
1
<<
INTER_RESIZE_COEF_BITS
)
...
@@ -72,8 +62,8 @@ inline float getPoint_32fc1(__global float * data, int offset, int x, int y, int
...
@@ -72,8 +62,8 @@ inline float getPoint_32fc1(__global float * data, int offset, int x, int y, int
#
define
CAST_SCALE
(
1.0f/
(
1<<CAST_BITS
))
#
define
CAST_SCALE
(
1.0f/
(
1<<CAST_BITS
))
#
define
INC
(
x,l
)
((
x+1
)
>=
(
l
)
?
(
x
)
:
((
x
)
+1
))
#
define
INC
(
x,l
)
((
x+1
)
>=
(
l
)
?
(
x
)
:
((
x
)
+1
))
__kernel
void
resizeLN_C1_D0
(
__global
u
nsigned
char
*
dst,
__global
unsigned
char
const
*
restrict
src,
__kernel
void
resizeLN_C1_D0
(
__global
u
char
*
dst,
__global
u
char
const
*
restrict
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
{
int
gx
=
get_global_id
(
0
)
;
int
gx
=
get_global_id
(
0
)
;
...
@@ -81,7 +71,7 @@ __kernel void resizeLN_C1_D0(__global unsigned char * dst, __global unsigned cha
...
@@ -81,7 +71,7 @@ __kernel void resizeLN_C1_D0(__global unsigned char * dst, __global unsigned cha
float4
sx,
u,
xf
;
float4
sx,
u,
xf
;
int4
x,
DX
;
int4
x,
DX
;
gx
=
(
gx<<2
)
-
(
dst
_offset
&3
)
;
gx
=
(
gx<<2
)
-
(
dst
offset_in_pixel
&3
)
;
DX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
DX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
sx
=
(
convert_float4
(
DX
)
+
0.5f
)
*
ifx
-
0.5f
;
sx
=
(
convert_float4
(
DX
)
+
0.5f
)
*
ifx
-
0.5f
;
xf
=
floor
(
sx
)
;
xf
=
floor
(
sx
)
;
...
@@ -119,10 +109,10 @@ __kernel void resizeLN_C1_D0(__global unsigned char * dst, __global unsigned cha
...
@@ -119,10 +109,10 @@ __kernel void resizeLN_C1_D0(__global unsigned char * dst, __global unsigned cha
int4
val1,
val2,
val
;
int4
val1,
val2,
val
;
int4
sdata1,
sdata2,
sdata3,
sdata4
;
int4
sdata1,
sdata2,
sdata3,
sdata4
;
int4
pos1
=
src_offset
+
y
*
src_step
+
x
;
int4
pos1
=
mad24
(
y,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
int4
pos2
=
src_offset
+
y
*
src_step
+
x_
;
int4
pos2
=
mad24
(
y,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
int4
pos3
=
src_offset
+
y_
*
src_step
+
x
;
int4
pos3
=
mad24
(
y_,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
int4
pos4
=
src_offset
+
y_
*
src_step
+
x_
;
int4
pos4
=
mad24
(
y_,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
sdata1.s0
=
src[pos1.s0]
;
sdata1.s0
=
src[pos1.s0]
;
sdata1.s1
=
src[pos1.s1]
;
sdata1.s1
=
src[pos1.s1]
;
...
@@ -144,20 +134,44 @@ __kernel void resizeLN_C1_D0(__global unsigned char * dst, __global unsigned cha
...
@@ -144,20 +134,44 @@ __kernel void resizeLN_C1_D0(__global unsigned char * dst, __global unsigned cha
sdata4.s2
=
src[pos4.s2]
;
sdata4.s2
=
src[pos4.s2]
;
sdata4.s3
=
src[pos4.s3]
;
sdata4.s3
=
src[pos4.s3]
;
val1
=
U1
*
sdata1
+
U
*
sdata2
;
val1
=
mul24
(
U1
,
sdata1
)
+
mul24
(
U
,
sdata2
)
;
val2
=
U1
*
sdata3
+
U
*
sdata4
;
val2
=
mul24
(
U1
,
sdata3
)
+
mul24
(
U
,
sdata4
)
;
val
=
V1
*
val1
+
V
*
val2
;
val
=
mul24
(
V1
,
val1
)
+
mul24
(
V
,
val2
)
;
__global
uchar4*
d
=
(
__global
uchar4*
)(
dst
+
dst_offset
+
dy
*
dst_step
+
gx
)
;
//__global
uchar4*
d
=
(
__global
uchar4*
)(
dst
+
dstoffset_in_pixel
+
dy
*
dststep_in_pixel
+
gx
)
;
uchar4
dVal
=
*d
;
//
uchar4
dVal
=
*d
;
int4
con
=
(
DX
>=
0
&&
DX
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
;
//
int4
con
=
(
DX
>=
0
&&
DX
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
;
val
=
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>
CAST_BITS
)
;
val
=
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>
CAST_BITS
)
;
*d
=
convert_uchar4
(
con
!=
0
)
?
convert_uchar4_sat
(
val
)
:
dVal
;
//*d
=
convert_uchar4
(
con
!=
0
)
?
convert_uchar4_sat
(
val
)
:
dVal
;
pos4
=
mad24
(
dy,
dststep_in_pixel,
gx+dstoffset_in_pixel
)
;
pos4.y++
;
pos4.z+=2
;
uchar4
uval
=
convert_uchar4_sat
(
val
)
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
;
if
(
con
)
{
*
(
__global
uchar4*
)(
dst
+
pos4.x
)
=uval
;
}
else
{
if
(
gx
>=
0
&&
gx
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos4.x]=uval.x
;
}
if
(
gx+1
>=
0
&&
gx+1
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos4.y]=uval.y
;
}
if
(
gx+2
>=
0
&&
gx+2
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos4.z]=uval.z
;
}
}
}
}
__kernel
void
resizeLN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
__kernel
void
resizeLN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
;
...
@@ -182,18 +196,25 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
...
@@ -182,18 +196,25 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
int
y_
=
INC
(
y,src_rows
)
;
int
y_
=
INC
(
y,src_rows
)
;
int
x_
=
INC
(
x,src_cols
)
;
int
x_
=
INC
(
x,src_cols
)
;
int4
srcpos
;
uint4
val
=
U1*
V1
*
getPoint_8uc4
(
src,src_offset,x,y,src_step
)
+
srcpos.x
=
mad24
(
y,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
U1*
V
*
getPoint_8uc4
(
src,src_offset,x,y_,src_step
)
+
srcpos.y
=
mad24
(
y,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
U
*
V1
*
getPoint_8uc4
(
src,src_offset,x_,y,src_step
)
+
srcpos.z
=
mad24
(
y_,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
U
*
V
*
getPoint_8uc4
(
src,src_offset,x_,y_,src_step
)
;
srcpos.w
=
mad24
(
y_,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
int4
data0
=
convert_int4
(
src[srcpos.x]
)
;
int4
data1
=
convert_int4
(
src[srcpos.y]
)
;
int4
data2
=
convert_int4
(
src[srcpos.z]
)
;
int4
data3
=
convert_int4
(
src[srcpos.w]
)
;
int4
val
=
mul24
(
mul24
(
U1,
V1
)
,
data0
)
+
mul24
(
mul24
(
U,
V1
)
,
data1
)
+mul24
(
mul24
(
U1,
V
)
,
data2
)
+mul24
(
mul24
(
U,
V
)
,
data3
)
;
int
dstpos
=
mad24
(
dy,
dststep_in_pixel,
dx+dstoffset_in_pixel
)
;
uchar4
uval
=
convert_uchar4
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>CAST_BITS
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[
(
dst_offset>>2
)
+
dy
*
(
dst_step>>2
)
+
dx]
=
convert_uchar4
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>CAST_BITS
)
;
dst[
dstpos]
=
uval
;
}
}
__kernel
void
resizeLN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
__kernel
void
resizeLN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
;
...
@@ -210,19 +231,29 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
...
@@ -210,19 +231,29 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
int
y_
=
INC
(
y,src_rows
)
;
int
y_
=
INC
(
y,src_rows
)
;
int
x_
=
INC
(
x,src_cols
)
;
int
x_
=
INC
(
x,src_cols
)
;
float
u1
=
1.f-u
;
float
val1
=
(
1.0f-u
)
*
getPoint_32fc1
(
src,src_offset,x,y,src_step
)
+
float
v1
=
1.f-v
;
u
*
getPoint_32fc1
(
src,src_offset,x_,y,src_step
)
;
int4
srcpos
;
float
val2
=
(
1.0f-u
)
*
getPoint_32fc1
(
src,src_offset,x,y_,src_step
)
+
srcpos.x
=
mad24
(
y,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
u
*
getPoint_32fc1
(
src,src_offset,x_,y_,src_step
)
;
srcpos.y
=
mad24
(
y,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
float
val
=
(
1.0f-v
)
*
val1
+
v
*
val2
;
srcpos.z
=
mad24
(
y_,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
float
data0
=
src[srcpos.x]
;
float
data1
=
src[srcpos.y]
;
float
data2
=
src[srcpos.z]
;
float
data3
=
src[srcpos.w]
;
float
val1
=
u1
*
data0
+
u
*
data1
;
float
val2
=
u1
*
data2
+
u
*
data3
;
float
val
=
v1
*
val1
+
v
*
val2
;
int
dstpos
=
mad24
(
dy,
dststep_in_pixel,
dx+dstoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[
(
dst_offset>>2
)
+
dy
*
(
dst_step>>2
)
+
dx
]
=
val
;
dst[
dstpos
]
=
val
;
}
}
__kernel
void
resizeLN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
__kernel
void
resizeLN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
;
...
@@ -239,31 +270,35 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
...
@@ -239,31 +270,35 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
int
y_
=
INC
(
y,src_rows
)
;
int
y_
=
INC
(
y,src_rows
)
;
int
x_
=
INC
(
x,src_cols
)
;
int
x_
=
INC
(
x,src_cols
)
;
float
u1
=
1.f-u
;
float
v1
=
1.f-v
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
float4
s_data1,
s_data2,
s_data3,
s_data4
;
float4
s_data1,
s_data2,
s_data3,
s_data4
;
src_offset
=
(
src_offset
>>
4
)
;
s_data1
=
src[srcpos.x]
;
src_step
=
(
src_step
>>
4
)
;
s_data2
=
src[srcpos.y]
;
s_data1
=
src[src_offset
+
y*src_step
+
x]
;
s_data3
=
src[srcpos.z]
;
s_data2
=
src[src_offset
+
y*src_step
+
x_]
;
s_data4
=
src[srcpos.w]
;
s_data3
=
src[src_offset
+
y_*src_step
+
x]
;
float4
val
=
u1
*
v1
*
s_data1
+
u
*
v1
*
s_data2
s_data4
=
src[src_offset
+
y_*src_step
+
x_]
;
+u1
*
v
*s_data3
+
u
*
v
*s_data4
;
s_data1
=
(
1.0f-u
)
*
s_data1
+
u
*
s_data2
;
int
dstpos
=
mad24
(
dy,
dststep_in_pixel,
dx+dstoffset_in_pixel
)
;
s_data2
=
(
1.0f-u
)
*
s_data3
+
u
*
s_data4
;
s_data3
=
(
1.0f-v
)
*
s_data1
+
v
*
s_data2
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[
(
dst_offset>>4
)
+
dy
*
(
dst_step>>4
)
+
dx]
=
s_data3
;
dst[
dstpos]
=
val
;
}
}
__kernel
void
resizeNN_C1_D0
(
__global
uchar
*
dst,
__global
uchar
*
src,
__kernel
void
resizeNN_C1_D0
(
__global
uchar
*
dst,
__global
uchar
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
{
int
gx
=
get_global_id
(
0
)
;
int
gx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
int
dy
=
get_global_id
(
1
)
;
gx
=
(
gx<<2
)
-
(
dst
_offset
&3
)
;
gx
=
(
gx<<2
)
-
(
dst
offset_in_pixel
&3
)
;
int4
GX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
//
int4
GX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
int4
sx
;
int4
sx
;
int
sy
;
int
sy
;
...
@@ -279,22 +314,42 @@ __kernel void resizeNN_C1_D0(__global uchar * dst, __global uchar * src,
...
@@ -279,22 +314,42 @@ __kernel void resizeNN_C1_D0(__global uchar * dst, __global uchar * src,
sy
=
min
((
int
)
floor
(
s5
)
,
src_rows-1
)
;
sy
=
min
((
int
)
floor
(
s5
)
,
src_rows-1
)
;
uchar4
val
;
uchar4
val
;
int4
pos
=
src_offset
+
sy
*
src_step
+
sx
;
int4
pos
=
mad24
(
sy,
srcstep_in_pixel,
sx+srcoffset_in_pixel
)
;
val.s0
=
src[pos.s0]
;
val.s0
=
src[pos.s0]
;
val.s1
=
src[pos.s1]
;
val.s1
=
src[pos.s1]
;
val.s2
=
src[pos.s2]
;
val.s2
=
src[pos.s2]
;
val.s3
=
src[pos.s3]
;
val.s3
=
src[pos.s3]
;
__global
uchar4*
d
=
(
__global
uchar4*
)(
dst
+
dst_offset
+
dy
*
dst_step
+
gx
)
;
//__global
uchar4*
d
=
(
__global
uchar4*
)(
dst
+
dstoffset_in_pixel
+
dy
*
dststep_in_pixel
+
gx
)
;
uchar4
dVal
=
*d
;
//uchar4
dVal
=
*d
;
int4
con
=
(
GX
>=
0
&&
GX
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
;
pos
=
mad24
(
dy,
dststep_in_pixel,
gx+dstoffset_in_pixel
)
;
val
=
convert_uchar4
(
con
!=
0
)
?
val
:
dVal
;
pos.y++
;
pos.z+=2
;
*d
=
val
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
;
if
(
con
)
{
*
(
__global
uchar4*
)(
dst
+
pos.x
)
=val
;
}
else
{
if
(
gx
>=
0
&&
gx
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.x]=val.x
;
}
if
(
gx+1
>=
0
&&
gx+1
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.y]=val.y
;
}
if
(
gx+2
>=
0
&&
gx+2
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.z]=val.z
;
}
}
}
}
__kernel
void
resizeNN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
__kernel
void
resizeNN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
;
...
@@ -304,8 +359,8 @@ __kernel void resizeNN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
...
@@ -304,8 +359,8 @@ __kernel void resizeNN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
F
s2
=
dy*ify
;
F
s2
=
dy*ify
;
int
sx
=
fmin
((
float
)
floor
(
s1
)
,
(
float
)
src_cols-1
)
;
int
sx
=
fmin
((
float
)
floor
(
s1
)
,
(
float
)
src_cols-1
)
;
int
sy
=
fmin
((
float
)
floor
(
s2
)
,
(
float
)
src_rows-1
)
;
int
sy
=
fmin
((
float
)
floor
(
s2
)
,
(
float
)
src_rows-1
)
;
int
dpos
=
(
dst_offset>>2
)
+
dy
*
(
dst_step>>2
)
+
dx
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
(
src_offset>>2
)
+
sy
*
(
src_step>>2
)
+
sx
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
dst[dpos]
=
src[spos]
;
...
@@ -313,7 +368,7 @@ __kernel void resizeNN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
...
@@ -313,7 +368,7 @@ __kernel void resizeNN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
}
}
__kernel
void
resizeNN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
__kernel
void
resizeNN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
;
...
@@ -323,16 +378,16 @@ __kernel void resizeNN_C1_D5(__global float * dst, __global float * src,
...
@@ -323,16 +378,16 @@ __kernel void resizeNN_C1_D5(__global float * dst, __global float * src,
F
s2
=
dy*ify
;
F
s2
=
dy*ify
;
int
sx
=
fmin
((
float
)
floor
(
s1
)
,
(
float
)
src_cols-1
)
;
int
sx
=
fmin
((
float
)
floor
(
s1
)
,
(
float
)
src_cols-1
)
;
int
sy
=
fmin
((
float
)
floor
(
s2
)
,
(
float
)
src_rows-1
)
;
int
sy
=
fmin
((
float
)
floor
(
s2
)
,
(
float
)
src_rows-1
)
;
int
dpos
=
(
dst_offset>>2
)
+
dy
*
(
dst_step>>2
)
+
dx
;
int
spos
=
(
src_offset>>2
)
+
sy
*
(
src_step>>2
)
+
sx
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
dst[dpos]
=
src[spos]
;
}
}
__kernel
void
resizeNN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
__kernel
void
resizeNN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
;
...
@@ -343,8 +398,8 @@ __kernel void resizeNN_C4_D5(__global float4 * dst, __global float4 * src,
...
@@ -343,8 +398,8 @@ __kernel void resizeNN_C4_D5(__global float4 * dst, __global float4 * src,
int
s_row
=
floor
(
s2
)
;
int
s_row
=
floor
(
s2
)
;
int
sx
=
min
(
s_col,
src_cols-1
)
;
int
sx
=
min
(
s_col,
src_cols-1
)
;
int
sy
=
min
(
s_row,
src_rows-1
)
;
int
sy
=
min
(
s_row,
src_rows-1
)
;
int
dpos
=
(
dst_offset>>4
)
+
dy
*
(
dst_step>>4
)
+
dx
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
(
src_offset>>4
)
+
sy
*
(
src_step>>4
)
+
sx
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
dst[dpos]
=
src[spos]
;
...
...
modules/ocl/src/kernels/operator_setTo.cl
View file @
687d7639
...
@@ -34,13 +34,8 @@
...
@@ -34,13 +34,8 @@
//
//
//
//
/*
#
if
defined
(
DOUBLE_SUPPORT
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
*/
__kernel
void
set_to_without_mask_C1_D0
(
float4
scalar,__global
uchar
*
dstMat,
__kernel
void
set_to_without_mask_C1_D0
(
uchar
scalar,__global
uchar
*
dstMat,
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
{
{
int
x=get_global_id
(
0
)
<<2
;
int
x=get_global_id
(
0
)
<<2
;
...
@@ -49,7 +44,8 @@ __kernel void set_to_without_mask_C1_D0(float4 scalar,__global uchar * dstMat,
...
@@ -49,7 +44,8 @@ __kernel void set_to_without_mask_C1_D0(float4 scalar,__global uchar * dstMat,
int
addr_end
=
mad24
(
y,dstStep_in_pixel,cols+offset_in_pixel
)
;
int
addr_end
=
mad24
(
y,dstStep_in_pixel,cols+offset_in_pixel
)
;
int
idx
=
mad24
(
y,dstStep_in_pixel,
(
int
)(
x+
offset_in_pixel
&
(
int
)
0xfffffffc
))
;
int
idx
=
mad24
(
y,dstStep_in_pixel,
(
int
)(
x+
offset_in_pixel
&
(
int
)
0xfffffffc
))
;
uchar4
out
;
uchar4
out
;
out.x
=
out.y
=
out.z
=
out.w
=
convert_uchar_sat
(
scalar.x
)
;
out.x
=
out.y
=
out.z
=
out.w
=
scalar
;
if
(
(
idx>=addr_start
)
&
(
idx+3
<
addr_end
)
&
(
y
<
rows
))
if
(
(
idx>=addr_start
)
&
(
idx+3
<
addr_end
)
&
(
y
<
rows
))
{
{
*
(
__global
uchar4*
)(
dstMat+idx
)
=
out
;
*
(
__global
uchar4*
)(
dstMat+idx
)
=
out
;
...
@@ -65,52 +61,7 @@ __kernel void set_to_without_mask_C1_D0(float4 scalar,__global uchar * dstMat,
...
@@ -65,52 +61,7 @@ __kernel void set_to_without_mask_C1_D0(float4 scalar,__global uchar * dstMat,
}
}
}
}
__kernel
void
set_to_without_mask_C4_D0
(
float4
scalar,__global
uchar4
*
dstMat,
__kernel
void
set_to_without_mask
(
GENTYPE
scalar,__global
GENTYPE
*
dstMat,
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
if
(
(
x
<
cols
)
&
(
y
<
rows
))
{
int
idx
=
mad24
(
y,dstStep_in_pixel,x+
offset_in_pixel
)
;
dstMat[idx]
=
convert_uchar4_sat
(
scalar
)
;
}
}
__kernel
void
set_to_without_mask_C1_D4
(
float4
scalar,__global
int
*
dstMat,
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
if
(
(
x
<
cols
)
&
(
y
<
rows
))
{
int
idx
=
mad24
(
y,
dstStep_in_pixel,
x+offset_in_pixel
)
;
dstMat[idx]
=
convert_int_sat
(
scalar.x
)
;
}
}
__kernel
void
set_to_without_mask_C4_D4
(
float4
scalar,__global
int4
*
dstMat,
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
if
(
(
x
<
cols
)
&
(
y
<
rows
))
{
int
idx
=
mad24
(
y,dstStep_in_pixel,x+
offset_in_pixel
)
;
dstMat[idx]
=
convert_int4_sat
(
scalar
)
;
}
}
__kernel
void
set_to_without_mask_C1_D5
(
float4
scalar,__global
float
*
dstMat,
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
if
(
(
x
<
cols
)
&
(
y
<
rows
))
{
int
idx
=
mad24
(
y,dstStep_in_pixel,x+
offset_in_pixel
)
;
dstMat[idx]
=
scalar.x
;
}
}
__kernel
void
set_to_without_mask_C4_D5
(
float4
scalar,__global
float4
*
dstMat,
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
{
{
int
x=get_global_id
(
0
)
;
int
x=get_global_id
(
0
)
;
...
@@ -121,4 +72,3 @@ __kernel void set_to_without_mask_C4_D5(float4 scalar,__global float4 * dstMat,
...
@@ -121,4 +72,3 @@ __kernel void set_to_without_mask_C4_D5(float4 scalar,__global float4 * dstMat,
dstMat[idx]
=
scalar
;
dstMat[idx]
=
scalar
;
}
}
}
}
modules/ocl/src/kernels/operator_setToM.cl
View file @
687d7639
...
@@ -35,12 +35,6 @@
...
@@ -35,12 +35,6 @@
//
//
/*#if
defined
(
__ATI__
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
__NVIDIA__
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
*/
/*
/*
__kernel
void
set_to_with_mask_C1_D0
(
__kernel
void
set_to_with_mask_C1_D0
(
float4
scalar,
float4
scalar,
...
@@ -67,7 +61,7 @@ __kernel void set_to_with_mask_C1_D0(
...
@@ -67,7 +61,7 @@ __kernel void set_to_with_mask_C1_D0(
*/
*/
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
__kernel
void
set_to_with_mask_C1_D0
(
__kernel
void
set_to_with_mask_C1_D0
(
float4
scalar,
uchar
scalar,
__global
uchar*
dstMat,
__global
uchar*
dstMat,
int
cols,
int
cols,
int
rows,
int
rows,
...
@@ -85,7 +79,7 @@ __kernel void set_to_with_mask_C1_D0(
...
@@ -85,7 +79,7 @@ __kernel void set_to_with_mask_C1_D0(
int
mask_addr_start
=
mad24
(
y,maskStep,maskoffset
)
;
int
mask_addr_start
=
mad24
(
y,maskStep,maskoffset
)
;
int
mask_addr_end
=
mad24
(
y,maskStep,cols+maskoffset
)
;
int
mask_addr_end
=
mad24
(
y,maskStep,cols+maskoffset
)
;
int
maskidx
=
mad24
(
y,maskStep,x+
maskoffset
&
(
int
)
0xfffffffc
)
;
int
maskidx
=
mad24
(
y,maskStep,x+
maskoffset
&
(
int
)
0xfffffffc
)
;
uchar
out
=
convert_uchar_sat
(
scalar.x
)
;
int
off_mask
=
(
maskoffset
&
3
)
-
(
dstoffset_in_pixel
&
3
)
+3
;
int
off_mask
=
(
maskoffset
&
3
)
-
(
dstoffset_in_pixel
&
3
)
+3
;
if
(
(
x
<
cols
)
&
(
y
<
rows
)
)
if
(
(
x
<
cols
)
&
(
y
<
rows
)
)
...
@@ -107,104 +101,16 @@ __kernel void set_to_with_mask_C1_D0(
...
@@ -107,104 +101,16 @@ __kernel void set_to_with_mask_C1_D0(
temp_mask2.z
=
(
maskidx+6
>=mask_addr_start
)
&
(
maskidx+6
<
mask_addr_end
)
?
temp_mask2.z
:
0
;
temp_mask2.z
=
(
maskidx+6
>=mask_addr_start
)
&
(
maskidx+6
<
mask_addr_end
)
?
temp_mask2.z
:
0
;
temp_mask2.w
=
(
maskidx+7
>=mask_addr_start
)
&
(
maskidx+7
<
mask_addr_end
)
?
temp_mask2.w
:
0
;
temp_mask2.w
=
(
maskidx+7
>=mask_addr_start
)
&
(
maskidx+7
<
mask_addr_end
)
?
temp_mask2.w
:
0
;
uchar
trans_mask[10]
=
{temp_mask1.y,temp_mask1.z,temp_mask1.w,temp_mask.x,temp_mask.y,temp_mask.z,temp_mask.w,temp_mask2.x,temp_mask2.y,temp_mask2.z}
;
uchar
trans_mask[10]
=
{temp_mask1.y,temp_mask1.z,temp_mask1.w,temp_mask.x,temp_mask.y,temp_mask.z,temp_mask.w,temp_mask2.x,temp_mask2.y,temp_mask2.z}
;
temp_dst.x
=
(
dstidx>=dst_addr_start
)
&
(
dstidx<dst_addr_end
)
&
trans_mask[off_mask]
?
out
:
temp_dst.x
;
temp_dst.x
=
(
dstidx>=dst_addr_start
)
&
(
dstidx<dst_addr_end
)
&
trans_mask[off_mask]
?
scalar
:
temp_dst.x
;
temp_dst.y
=
(
dstidx+1>=dst_addr_start
)
&
(
dstidx+1<dst_addr_end
)
&
trans_mask[off_mask+1]
?
out
:
temp_dst.y
;
temp_dst.y
=
(
dstidx+1>=dst_addr_start
)
&
(
dstidx+1<dst_addr_end
)
&
trans_mask[off_mask+1]
?
scalar
:
temp_dst.y
;
temp_dst.z
=
(
dstidx+2>=dst_addr_start
)
&
(
dstidx+2<dst_addr_end
)
&
trans_mask[off_mask+2]
?
out
:
temp_dst.z
;
temp_dst.z
=
(
dstidx+2>=dst_addr_start
)
&
(
dstidx+2<dst_addr_end
)
&
trans_mask[off_mask+2]
?
scalar
:
temp_dst.z
;
temp_dst.w
=
(
dstidx+3>=dst_addr_start
)
&
(
dstidx+3<dst_addr_end
)
&
trans_mask[off_mask+3]
?
out
:
temp_dst.w
;
temp_dst.w
=
(
dstidx+3>=dst_addr_start
)
&
(
dstidx+3<dst_addr_end
)
&
trans_mask[off_mask+3]
?
scalar
:
temp_dst.w
;
*
(
__global
uchar4*
)(
dstMat+dstidx
)
=
temp_dst
;
*
(
__global
uchar4*
)(
dstMat+dstidx
)
=
temp_dst
;
}
}
}
}
__kernel
void
set_to_with_mask_C4_D0
(
__kernel
void
set_to_with_mask
(
float4
scalar,
GENTYPE
scalar,
__global
uchar4
*
dstMat,
__global
GENTYPE
*
dstMat,
int
cols,
int
rows,
int
dstStep_in_pixel,
int
dstoffset_in_pixel,
__global
const
uchar
*
restrict
maskMat,
int
maskStep,
int
maskoffset
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
int
dstidx
=
mad24
(
y,dstStep_in_pixel,x+
dstoffset_in_pixel
)
;
int
maskidx
=
mad24
(
y,maskStep,x+
maskoffset
)
;
uchar
mask
=
maskMat[maskidx]
;
if
(
(
x
<
cols
)
&
(
y
<
rows
)
&
mask
)
{
dstMat[dstidx]
=
convert_uchar4_sat
(
scalar
)
;
}
}
__kernel
void
set_to_with_mask_C1_D4
(
float4
scalar,
__global
int
*
dstMat,
int
cols,
int
rows,
int
dstStep_in_pixel,
int
dstoffset_in_pixel,
__global
const
uchar
*
restrict
maskMat,
int
maskStep,
int
maskoffset
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
int
dstidx
=
mad24
(
y,dstStep_in_pixel,x+
dstoffset_in_pixel
)
;
int
maskidx
=
mad24
(
y,maskStep,x+
maskoffset
)
;
uchar
mask
=
maskMat[maskidx]
;
if
(
(
x
<
cols
)
&
(
y
<
rows
)
&
mask
)
{
dstMat[dstidx]
=
convert_int_sat
(
scalar.x
)
;
}
}
__kernel
void
set_to_with_mask_C4_D4
(
float4
scalar,
__global
int4
*
dstMat,
int
cols,
int
rows,
int
dstStep_in_pixel,
int
dstoffset_in_pixel,
__global
const
uchar
*
restrict
maskMat,
int
maskStep,
int
maskoffset
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
int
dstidx
=
mad24
(
y,dstStep_in_pixel,x+
dstoffset_in_pixel
)
;
int
maskidx
=
mad24
(
y,maskStep,x+
maskoffset
)
;
uchar
mask
=
maskMat[maskidx]
;
if
(
(
x
<
cols
)
&
(
y
<
rows
)
&
mask
)
{
dstMat[dstidx]
=
convert_int4_sat
(
scalar
)
;
}
}
__kernel
void
set_to_with_mask_C1_D5
(
float4
scalar,
__global
float
*
dstMat,
int
cols,
int
rows,
int
dstStep_in_pixel,
int
dstoffset_in_pixel,
__global
const
uchar
*
restrict
maskMat,
int
maskStep,
int
maskoffset
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
int
dstidx
=
mad24
(
y,dstStep_in_pixel,x+
dstoffset_in_pixel
)
;
int
maskidx
=
mad24
(
y,maskStep,x+
maskoffset
)
;
uchar
mask
=
maskMat[maskidx]
;
if
(
(
x
<
cols
)
&
(
y
<
rows
)
&
mask
)
{
dstMat[dstidx]
=
scalar.x
;
}
}
__kernel
void
set_to_with_mask_C4_D5
(
float4
scalar,
__global
float4
*
dstMat,
int
cols,
int
cols,
int
rows,
int
rows,
int
dstStep_in_pixel,
int
dstStep_in_pixel,
...
...
modules/ocl/src/matrix_operations.cpp
View file @
687d7639
...
@@ -120,6 +120,7 @@ namespace cv
...
@@ -120,6 +120,7 @@ namespace cv
extern
const
char
*
operator_convertTo
;
extern
const
char
*
operator_convertTo
;
extern
const
char
*
operator_setTo
;
extern
const
char
*
operator_setTo
;
extern
const
char
*
operator_setToM
;
extern
const
char
*
operator_setToM
;
extern
const
char
*
convertC3C4
;
}
}
}
}
...
@@ -127,43 +128,98 @@ namespace cv
...
@@ -127,43 +128,98 @@ namespace cv
// convert_C3C4
// convert_C3C4
void
convert_C3C4
(
const
cl_mem
&
src
,
oclMat
&
dst
,
int
srcStep
)
void
convert_C3C4
(
const
cl_mem
&
src
,
oclMat
&
dst
,
int
srcStep
)
{
{
int
dstStep
=
dst
.
step1
()
/
dst
.
channels
();
int
dstStep_in_pixel
=
dst
.
step1
()
/
dst
.
channels
();
int
pixel_end
=
dst
.
wholecols
*
dst
.
wholerows
-
1
;
Context
*
clCxt
=
dst
.
clCxt
;
Context
*
clCxt
=
dst
.
clCxt
;
string
kernelName
=
"convertC3C4"
;
string
kernelName
=
"convertC3C4"
;
char
compile_option
[
32
];
switch
(
dst
.
depth
())
{
case
0
:
sprintf
(
compile_option
,
"-D GENTYPE4=uchar4"
);
break
;
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE4=char4"
);
break
;
case
2
:
sprintf
(
compile_option
,
"-D GENTYPE4=ushort4"
);
break
;
case
3
:
sprintf
(
compile_option
,
"-D GENTYPE4=short4"
);
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE4=int4"
);
break
;
case
5
:
sprintf
(
compile_option
,
"-D GENTYPE4=float4"
);
break
;
case
6
:
sprintf
(
compile_option
,
"-D GENTYPE4=double4"
);
break
;
default
:
CV_Error
(
-
217
,
"unknown depth"
);
}
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
wholecols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
wholecols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
wholerows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
wholerows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcStep
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstStep
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
pixel_end
));
size_t
globalThreads
[
3
]
=
{(
dst
.
wholecols
*
dst
.
wholerows
+
255
)
/
256
*
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{(
(
dst
.
wholecols
*
dst
.
wholerows
+
3
)
/
4
+
255
)
/
256
*
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
clCxt
,
&
convertC3C4
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
dst
.
elemSize1
()
>>
1
);
openCLExecuteKernel
(
clCxt
,
&
convertC3C4
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
compile_option
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
// convert_C4C3
// convert_C4C3
void
convert_C4C3
(
const
oclMat
&
src
,
cl_mem
&
dst
,
int
dstStep
)
void
convert_C4C3
(
const
oclMat
&
src
,
cl_mem
&
dst
,
int
dstStep
)
{
{
int
srcStep
=
src
.
step1
()
/
src
.
channels
();
int
srcStep_in_pixel
=
src
.
step1
()
/
src
.
channels
();
int
pixel_end
=
src
.
wholecols
*
src
.
wholerows
-
1
;
Context
*
clCxt
=
src
.
clCxt
;
Context
*
clCxt
=
src
.
clCxt
;
string
kernelName
=
"convertC4C3"
;
string
kernelName
=
"convertC4C3"
;
char
compile_option
[
32
];
switch
(
src
.
depth
())
{
case
0
:
sprintf
(
compile_option
,
"-D GENTYPE4=uchar4"
);
break
;
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE4=char4"
);
break
;
case
2
:
sprintf
(
compile_option
,
"-D GENTYPE4=ushort4"
);
break
;
case
3
:
sprintf
(
compile_option
,
"-D GENTYPE4=short4"
);
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE4=int4"
);
break
;
case
5
:
sprintf
(
compile_option
,
"-D GENTYPE4=float4"
);
break
;
case
6
:
sprintf
(
compile_option
,
"-D GENTYPE4=double4"
);
break
;
default
:
CV_Error
(
-
217
,
"unknown depth"
);
}
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
wholecols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
wholecols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
wholerows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
wholerows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcStep
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcStep
_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstStep
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
pixel_end
));
size_t
globalThreads
[
3
]
=
{(
src
.
wholecols
*
src
.
wholerows
+
255
)
/
256
*
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{(
(
src
.
wholecols
*
src
.
wholerows
+
3
)
/
4
+
255
)
/
256
*
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
clCxt
,
&
convertC3C4
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
src
.
elemSize1
()
>>
1
);
openCLExecuteKernel
(
clCxt
,
&
convertC3C4
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
compile_option
);
}
}
void
cv
::
ocl
::
oclMat
::
upload
(
const
Mat
&
m
)
void
cv
::
ocl
::
oclMat
::
upload
(
const
Mat
&
m
)
...
@@ -173,23 +229,47 @@ void cv::ocl::oclMat::upload(const Mat &m)
...
@@ -173,23 +229,47 @@ void cv::ocl::oclMat::upload(const Mat &m)
Point
ofs
;
Point
ofs
;
m
.
locateROI
(
wholeSize
,
ofs
);
m
.
locateROI
(
wholeSize
,
ofs
);
int
type
=
m
.
type
();
int
type
=
m
.
type
();
//if(m.channels() == 3)
if
(
m
.
channels
()
==
3
)
//type = CV_MAKETYPE(m.depth(), 4);
{
type
=
CV_MAKETYPE
(
m
.
depth
(),
4
);
}
create
(
wholeSize
,
type
);
create
(
wholeSize
,
type
);
//if(m.channels() == 3)
if
(
m
.
channels
()
==
3
)
{
int
pitch
=
wholeSize
.
width
*
3
*
m
.
elemSize1
();
int
tail_padding
=
m
.
elemSize1
()
*
3072
;
int
err
;
cl_mem
temp
=
clCreateBuffer
(
clCxt
->
impl
->
clContext
,
CL_MEM_READ_WRITE
,
(
pitch
*
wholeSize
.
height
+
tail_padding
-
1
)
/
tail_padding
*
tail_padding
,
0
,
&
err
);
openCLVerifyCall
(
err
);
openCLMemcpy2D
(
clCxt
,
temp
,
pitch
,
m
.
datastart
,
m
.
step
,
wholeSize
.
width
*
m
.
elemSize
(),
wholeSize
.
height
,
clMemcpyHostToDevice
,
3
);
convert_C3C4
(
temp
,
*
this
,
pitch
);
//int* cputemp=new int[wholeSize.height*wholeSize.width * 3];
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
// 0, wholeSize.height*wholeSize.width * 3* sizeof(int), cputemp, 0, NULL, NULL));
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
// 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
//for(int i=0;i<wholeSize.height;i++)
//{
//{
//int pitch = GPU_MATRIX_MALLOC_STEP(wholeSize.width * 3 * m.elemSize1());
// int *a = cputemp+i*wholeSize.width * 3,*b = cpudata + i*this->step/sizeof(int);
//int err;
// for(int j=0;j<wholeSize.width;j++)
//cl_mem temp = clCreateBuffer(clCxt->clContext,CL_MEM_READ_WRITE,
// {
//pitch*wholeSize.height,0,&err);
// if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
//CV_DbgAssert(err==0);
// printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
// i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
//openCLMemcpy2D(clCxt,temp,pitch,m.datastart,m.step,wholeSize.width*m.elemSize(),wholeSize.height,clMemcpyHostToDevice);
// }
//convert_C3C4(temp, *this, pitch);
//}
//}
//else
//delete []cputemp;
//delete []cpudata;
openCLSafeCall
(
clReleaseMemObject
(
temp
));
}
else
{
openCLMemcpy2D
(
clCxt
,
data
,
step
,
m
.
datastart
,
m
.
step
,
wholeSize
.
width
*
elemSize
(),
wholeSize
.
height
,
clMemcpyHostToDevice
);
openCLMemcpy2D
(
clCxt
,
data
,
step
,
m
.
datastart
,
m
.
step
,
wholeSize
.
width
*
elemSize
(),
wholeSize
.
height
,
clMemcpyHostToDevice
);
}
rows
=
m
.
rows
;
rows
=
m
.
rows
;
cols
=
m
.
cols
;
cols
=
m
.
cols
;
...
@@ -201,23 +281,47 @@ void cv::ocl::oclMat::download(cv::Mat &m) const
...
@@ -201,23 +281,47 @@ void cv::ocl::oclMat::download(cv::Mat &m) const
{
{
CV_DbgAssert
(
!
this
->
empty
());
CV_DbgAssert
(
!
this
->
empty
());
int
t
=
type
();
int
t
=
type
();
//if(download_channels == 3)
if
(
download_channels
==
3
)
//t = CV_MAKETYPE(depth(), 3);
{
t
=
CV_MAKETYPE
(
depth
(),
3
);
}
m
.
create
(
wholerows
,
wholecols
,
t
);
m
.
create
(
wholerows
,
wholecols
,
t
);
//if(download_channels == 3)
if
(
download_channels
==
3
)
{
int
pitch
=
wholecols
*
3
*
m
.
elemSize1
();
int
tail_padding
=
m
.
elemSize1
()
*
3072
;
int
err
;
cl_mem
temp
=
clCreateBuffer
(
clCxt
->
impl
->
clContext
,
CL_MEM_READ_WRITE
,
(
pitch
*
wholerows
+
tail_padding
-
1
)
/
tail_padding
*
tail_padding
,
0
,
&
err
);
openCLVerifyCall
(
err
);
convert_C4C3
(
*
this
,
temp
,
pitch
/
m
.
elemSize1
());
openCLMemcpy2D
(
clCxt
,
m
.
data
,
m
.
step
,
temp
,
pitch
,
wholecols
*
m
.
elemSize
(),
wholerows
,
clMemcpyDeviceToHost
,
3
);
//int* cputemp=new int[wholecols*wholerows * 3];
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
// 0, wholecols*wholerows * 3* sizeof(int), cputemp, 0, NULL, NULL));
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
// 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
//for(int i=0;i<wholerows;i++)
//{
//{
//int pitch = GPU_MATRIX_MALLOC_STEP(wholecols * 3 * m.elemSize1());
// int *a = cputemp+i*wholecols * 3,*b = cpudata + i*this->step/sizeof(int);
//int err;
// for(int j=0;j<wholecols;j++)
//cl_mem temp = clCreateBuffer(clCxt->clContext,CL_MEM_READ_WRITE,
// {
//pitch*wholerows,0,&err);
// if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
//CV_DbgAssert(err==0);
// printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
// i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
//convert_C4C3(*this, temp, pitch/m.elemSize1());
// }
//openCLMemcpy2D(clCxt,m.data,m.step,temp,pitch,wholecols*m.elemSize(),wholerows,clMemcpyDeviceToHost);
//}
//}
//else
//delete []cputemp;
//delete []cpudata;
openCLSafeCall
(
clReleaseMemObject
(
temp
));
}
else
{
openCLMemcpy2D
(
clCxt
,
m
.
data
,
m
.
step
,
data
,
step
,
wholecols
*
elemSize
(),
wholerows
,
clMemcpyDeviceToHost
);
openCLMemcpy2D
(
clCxt
,
m
.
data
,
m
.
step
,
data
,
step
,
wholecols
*
elemSize
(),
wholerows
,
clMemcpyDeviceToHost
);
}
Size
wholesize
;
Size
wholesize
;
Point
ofs
;
Point
ofs
;
locateROI
(
wholesize
,
ofs
);
locateROI
(
wholesize
,
ofs
);
...
@@ -373,11 +477,7 @@ oclMat &cv::ocl::oclMat::operator = (const Scalar &s)
...
@@ -373,11 +477,7 @@ oclMat &cv::ocl::oclMat::operator = (const Scalar &s)
void
set_to_withoutmask_run
(
const
oclMat
&
dst
,
const
Scalar
&
scalar
,
string
kernelName
)
void
set_to_withoutmask_run
(
const
oclMat
&
dst
,
const
Scalar
&
scalar
,
string
kernelName
)
{
{
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
cl_float4
val
;
val
.
s
[
0
]
=
scalar
.
val
[
0
];
val
.
s
[
1
]
=
scalar
.
val
[
1
];
val
.
s
[
2
]
=
scalar
.
val
[
2
];
val
.
s
[
3
]
=
scalar
.
val
[
3
];
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
];
size_t
globalThreads
[
3
];
globalThreads
[
0
]
=
(
dst
.
cols
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
globalThreads
[
0
]
=
(
dst
.
cols
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
...
@@ -388,25 +488,168 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern
...
@@ -388,25 +488,168 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern
{
{
globalThreads
[
0
]
=
((
dst
.
cols
+
4
)
/
4
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
globalThreads
[
0
]
=
((
dst
.
cols
+
4
)
/
4
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
}
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_float4
)
,
(
void
*
)
&
val
));
char
compile_option
[
32
];
union
sc
{
cl_uchar4
uval
;
cl_char4
cval
;
cl_ushort4
usval
;
cl_short4
shval
;
cl_int4
ival
;
cl_float4
fval
;
cl_double4
dval
;
}
val
;
switch
(
dst
.
depth
())
{
case
0
:
val
.
uval
.
s
[
0
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
0
]);
val
.
uval
.
s
[
1
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
1
]);
val
.
uval
.
s
[
2
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
2
]);
val
.
uval
.
s
[
3
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=uchar"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_uchar
)
,
(
void
*
)
&
val
.
uval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=uchar4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_uchar4
)
,
(
void
*
)
&
val
.
uval
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
case
1
:
val
.
cval
.
s
[
0
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
0
]);
val
.
cval
.
s
[
1
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
1
]);
val
.
cval
.
s
[
2
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
2
]);
val
.
cval
.
s
[
3
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=char"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
)
,
(
void
*
)
&
val
.
cval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=char4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_char4
)
,
(
void
*
)
&
val
.
cval
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
case
2
:
val
.
usval
.
s
[
0
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
0
]);
val
.
usval
.
s
[
1
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
1
]);
val
.
usval
.
s
[
2
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
2
]);
val
.
usval
.
s
[
3
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=ushort"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_ushort
)
,
(
void
*
)
&
val
.
usval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=ushort4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_ushort4
)
,
(
void
*
)
&
val
.
usval
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
case
3
:
val
.
shval
.
s
[
0
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
0
]);
val
.
shval
.
s
[
1
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
1
]);
val
.
shval
.
s
[
2
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
2
]);
val
.
shval
.
s
[
3
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=short"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_short
)
,
(
void
*
)
&
val
.
shval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=short4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_short4
)
,
(
void
*
)
&
val
.
shval
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
case
4
:
val
.
ival
.
s
[
0
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
0
]);
val
.
ival
.
s
[
1
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
1
]);
val
.
ival
.
s
[
2
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
2
]);
val
.
ival
.
s
[
3
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=int"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
val
.
ival
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=int4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int4
)
,
(
void
*
)
&
val
.
ival
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
case
5
:
val
.
fval
.
s
[
0
]
=
scalar
.
val
[
0
];
val
.
fval
.
s
[
1
]
=
scalar
.
val
[
1
];
val
.
fval
.
s
[
2
]
=
scalar
.
val
[
2
];
val
.
fval
.
s
[
3
]
=
scalar
.
val
[
3
];
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=float"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
val
.
fval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=float4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float4
)
,
(
void
*
)
&
val
.
fval
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
case
6
:
val
.
dval
.
s
[
0
]
=
scalar
.
val
[
0
];
val
.
dval
.
s
[
1
]
=
scalar
.
val
[
1
];
val
.
dval
.
s
[
2
]
=
scalar
.
val
[
2
];
val
.
dval
.
s
[
3
]
=
scalar
.
val
[
3
];
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=double"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
)
,
(
void
*
)
&
val
.
dval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=double4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_double4
)
,
(
void
*
)
&
val
.
dval
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
default
:
CV_Error
(
-
217
,
"unknown depth"
);
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset_in_pixel
));
openCLExecuteKernel
(
dst
.
clCxt
,
&
operator_setTo
,
kernelName
,
globalThreads
,
openCLExecuteKernel
(
dst
.
clCxt
,
&
operator_setTo
,
kernelName
,
globalThreads
,
localThreads
,
args
,
dst
.
channels
(),
dst
.
depth
()
);
localThreads
,
args
,
-
1
,
-
1
,
compile_option
);
}
}
void
set_to_withmask_run
(
const
oclMat
&
dst
,
const
Scalar
&
scalar
,
const
oclMat
&
mask
,
string
kernelName
)
void
set_to_withmask_run
(
const
oclMat
&
dst
,
const
Scalar
&
scalar
,
const
oclMat
&
mask
,
string
kernelName
)
{
{
CV_DbgAssert
(
dst
.
rows
==
mask
.
rows
&&
dst
.
cols
==
mask
.
cols
);
CV_DbgAssert
(
dst
.
rows
==
mask
.
rows
&&
dst
.
cols
==
mask
.
cols
);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
cl_float4
val
;
val
.
s
[
0
]
=
scalar
.
val
[
0
];
val
.
s
[
1
]
=
scalar
.
val
[
1
];
val
.
s
[
2
]
=
scalar
.
val
[
2
];
val
.
s
[
3
]
=
scalar
.
val
[
3
];
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
];
size_t
globalThreads
[
3
];
globalThreads
[
0
]
=
(
dst
.
cols
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
globalThreads
[
0
]
=
(
dst
.
cols
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
...
@@ -417,7 +660,155 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &
...
@@ -417,7 +660,155 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &
globalThreads
[
0
]
=
((
dst
.
cols
+
4
)
/
4
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
globalThreads
[
0
]
=
((
dst
.
cols
+
4
)
/
4
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
}
}
int
step_in_pixel
=
dst
.
step
/
dst
.
elemSize
(),
offset_in_pixel
=
dst
.
offset
/
dst
.
elemSize
();
int
step_in_pixel
=
dst
.
step
/
dst
.
elemSize
(),
offset_in_pixel
=
dst
.
offset
/
dst
.
elemSize
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_float4
)
,
(
void
*
)
&
val
));
char
compile_option
[
32
];
union
sc
{
cl_uchar4
uval
;
cl_char4
cval
;
cl_ushort4
usval
;
cl_short4
shval
;
cl_int4
ival
;
cl_float4
fval
;
cl_double4
dval
;
}
val
;
switch
(
dst
.
depth
())
{
case
0
:
val
.
uval
.
s
[
0
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
0
]);
val
.
uval
.
s
[
1
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
1
]);
val
.
uval
.
s
[
2
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
2
]);
val
.
uval
.
s
[
3
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=uchar"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_uchar
)
,
(
void
*
)
&
val
.
uval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=uchar4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_uchar4
)
,
(
void
*
)
&
val
.
uval
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
case
1
:
val
.
cval
.
s
[
0
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
0
]);
val
.
cval
.
s
[
1
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
1
]);
val
.
cval
.
s
[
2
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
2
]);
val
.
cval
.
s
[
3
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=char"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
)
,
(
void
*
)
&
val
.
cval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=char4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_char4
)
,
(
void
*
)
&
val
.
cval
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
case
2
:
val
.
usval
.
s
[
0
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
0
]);
val
.
usval
.
s
[
1
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
1
]);
val
.
usval
.
s
[
2
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
2
]);
val
.
usval
.
s
[
3
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=ushort"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_ushort
)
,
(
void
*
)
&
val
.
usval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=ushort4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_ushort4
)
,
(
void
*
)
&
val
.
usval
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
case
3
:
val
.
shval
.
s
[
0
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
0
]);
val
.
shval
.
s
[
1
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
1
]);
val
.
shval
.
s
[
2
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
2
]);
val
.
shval
.
s
[
3
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=short"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_short
)
,
(
void
*
)
&
val
.
shval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=short4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_short4
)
,
(
void
*
)
&
val
.
shval
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
case
4
:
val
.
ival
.
s
[
0
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
0
]);
val
.
ival
.
s
[
1
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
1
]);
val
.
ival
.
s
[
2
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
2
]);
val
.
ival
.
s
[
3
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=int"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
val
.
ival
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=int4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int4
)
,
(
void
*
)
&
val
.
ival
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
case
5
:
val
.
fval
.
s
[
0
]
=
scalar
.
val
[
0
];
val
.
fval
.
s
[
1
]
=
scalar
.
val
[
1
];
val
.
fval
.
s
[
2
]
=
scalar
.
val
[
2
];
val
.
fval
.
s
[
3
]
=
scalar
.
val
[
3
];
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=float"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
val
.
fval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=float4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float4
)
,
(
void
*
)
&
val
.
fval
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
case
6
:
val
.
dval
.
s
[
0
]
=
scalar
.
val
[
0
];
val
.
dval
.
s
[
1
]
=
scalar
.
val
[
1
];
val
.
dval
.
s
[
2
]
=
scalar
.
val
[
2
];
val
.
dval
.
s
[
3
]
=
scalar
.
val
[
3
];
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=double"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
)
,
(
void
*
)
&
val
.
dval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=double4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_double4
)
,
(
void
*
)
&
val
.
dval
));
break
;
default
:
CV_Error
(
-
217
,
"unsupported channels"
);
}
break
;
default
:
CV_Error
(
-
217
,
"unknown depth"
);
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
...
@@ -427,7 +818,7 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &
...
@@ -427,7 +818,7 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
mask
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
mask
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
mask
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
mask
.
offset
));
openCLExecuteKernel
(
dst
.
clCxt
,
&
operator_setToM
,
kernelName
,
globalThreads
,
openCLExecuteKernel
(
dst
.
clCxt
,
&
operator_setToM
,
kernelName
,
globalThreads
,
localThreads
,
args
,
dst
.
channels
(),
dst
.
depth
()
);
localThreads
,
args
,
-
1
,
-
1
,
compile_option
);
}
}
oclMat
&
cv
::
ocl
::
oclMat
::
setTo
(
const
Scalar
&
scalar
,
const
oclMat
&
mask
)
oclMat
&
cv
::
ocl
::
oclMat
::
setTo
(
const
Scalar
&
scalar
,
const
oclMat
&
mask
)
...
@@ -445,13 +836,27 @@ oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask)
...
@@ -445,13 +836,27 @@ oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask)
//openCLSafeCall(clEnqueueWriteBuffer(this->clCxt->clCmdQueue,
//openCLSafeCall(clEnqueueWriteBuffer(this->clCxt->clCmdQueue,
// (cl_mem)mem,1,0,sizeof(double)*4,s,0,0,0));
// (cl_mem)mem,1,0,sizeof(double)*4,s,0,0,0));
if
(
mask
.
empty
())
if
(
mask
.
empty
())
{
if
(
type
()
==
CV_8UC1
)
{
set_to_withoutmask_run
(
*
this
,
scalar
,
"set_to_without_mask_C1_D0"
);
}
else
{
{
set_to_withoutmask_run
(
*
this
,
scalar
,
"set_to_without_mask"
);
set_to_withoutmask_run
(
*
this
,
scalar
,
"set_to_without_mask"
);
}
}
}
else
{
if
(
type
()
==
CV_8UC1
)
{
set_to_withmask_run
(
*
this
,
scalar
,
mask
,
"set_to_with_mask_C1_D0"
);
}
else
else
{
{
set_to_withmask_run
(
*
this
,
scalar
,
mask
,
"set_to_with_mask"
);
set_to_withmask_run
(
*
this
,
scalar
,
mask
,
"set_to_with_mask"
);
}
}
}
return
*
this
;
return
*
this
;
}
}
...
...
modules/ocl/src/precomp.hpp
View file @
687d7639
...
@@ -97,7 +97,7 @@ namespace cv
...
@@ -97,7 +97,7 @@ namespace cv
size_t
widthInBytes
,
size_t
height
);
size_t
widthInBytes
,
size_t
height
);
void
openCLMemcpy2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
void
openCLMemcpy2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
const
void
*
src
,
size_t
spitch
,
const
void
*
src
,
size_t
spitch
,
size_t
width
,
size_t
height
,
enum
openCLMemcpyKind
kind
);
size_t
width
,
size_t
height
,
enum
openCLMemcpyKind
kind
,
int
channels
=-
1
);
void
openCLCopyBuffer2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
int
dst_offset
,
void
openCLCopyBuffer2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
int
dst_offset
,
const
void
*
src
,
size_t
spitch
,
const
void
*
src
,
size_t
spitch
,
size_t
width
,
size_t
height
,
int
src_offset
,
enum
openCLMemcpyKind
kind
);
size_t
width
,
size_t
height
,
int
src_offset
,
enum
openCLMemcpyKind
kind
);
...
@@ -126,8 +126,8 @@ namespace cv
...
@@ -126,8 +126,8 @@ namespace cv
cl_mem
openCLMalloc
(
cl_context
clCxt
,
size_t
size
,
cl_mem_flags
flags
,
void
*
host_ptr
);
cl_mem
openCLMalloc
(
cl_context
clCxt
,
size_t
size
,
cl_mem_flags
flags
,
void
*
host_ptr
);
void
openCLMemcpy2DWithNoPadding
(
cl_command_queue
command_queue
,
cl_mem
buffer
,
size_t
size
,
size_t
offset
,
void
*
ptr
,
//
void openCLMemcpy2DWithNoPadding(cl_command_queue command_queue, cl_mem buffer, size_t size, size_t offset, void *ptr,
enum
openCLMemcpyKind
kind
,
cl_bool
blocking_write
);
//
enum openCLMemcpyKind kind, cl_bool blocking_write);
int
savetofile
(
const
Context
*
clcxt
,
cl_program
&
program
,
const
char
*
fileName
);
int
savetofile
(
const
Context
*
clcxt
,
cl_program
&
program
,
const
char
*
fileName
);
struct
Context
::
Impl
struct
Context
::
Impl
{
{
...
...
modules/ocl/test/test_imgproc.cpp
View file @
687d7639
modules/ocl/test/test_matrix_operation.cpp
View file @
687d7639
...
@@ -396,6 +396,101 @@ TEST_P(SetTo, With_mask)
...
@@ -396,6 +396,101 @@ TEST_P(SetTo, With_mask)
}
}
}
}
//convertC3C4
PARAM_TEST_CASE
(
convertC3C4
,
MatType
,
cv
::
Size
)
{
int
type
;
cv
::
Size
ksize
;
//src mat
cv
::
Mat
mat1
;
cv
::
Mat
dst
;
// set up roi
int
roicols
;
int
roirows
;
int
src1x
;
int
src1y
;
int
dstx
;
int
dsty
;
//src mat with roi
cv
::
Mat
mat1_roi
;
cv
::
Mat
dst_roi
;
std
::
vector
<
cv
::
ocl
::
Info
>
oclinfo
;
//ocl dst mat for testing
cv
::
ocl
::
oclMat
gdst_whole
;
//ocl mat with roi
cv
::
ocl
::
oclMat
gmat1
;
cv
::
ocl
::
oclMat
gdst
;
virtual
void
SetUp
()
{
type
=
GET_PARAM
(
0
);
ksize
=
GET_PARAM
(
1
);
//dst = randomMat(rng, size, type, 5, 16, false);
int
devnums
=
getDevice
(
oclinfo
);
CV_Assert
(
devnums
>
0
);
//if you want to use undefault device, set it here
//setDevice(oclinfo[1]);
}
void
random_roi
()
{
#ifdef RANDOMROI
//randomize ROI
cv
::
RNG
&
rng
=
TS
::
ptr
()
->
get_rng
();
roicols
=
rng
.
uniform
(
2
,
mat1
.
cols
);
roirows
=
rng
.
uniform
(
2
,
mat1
.
rows
);
src1x
=
rng
.
uniform
(
0
,
mat1
.
cols
-
roicols
);
src1y
=
rng
.
uniform
(
0
,
mat1
.
rows
-
roirows
);
dstx
=
rng
.
uniform
(
0
,
dst
.
cols
-
roicols
);
dsty
=
rng
.
uniform
(
0
,
dst
.
rows
-
roirows
);
#else
roicols
=
mat1
.
cols
;
roirows
=
mat1
.
rows
;
src1x
=
0
;
src1y
=
0
;
dstx
=
0
;
dsty
=
0
;
#endif
mat1_roi
=
mat1
(
Rect
(
src1x
,
src1y
,
roicols
,
roirows
));
dst_roi
=
dst
(
Rect
(
dstx
,
dsty
,
roicols
,
roirows
));
gdst_whole
=
dst
;
gdst
=
gdst_whole
(
Rect
(
dstx
,
dsty
,
roicols
,
roirows
));
gmat1
=
mat1_roi
;
}
};
TEST_P
(
convertC3C4
,
Accuracy
)
{
cv
::
RNG
&
rng
=
TS
::
ptr
()
->
get_rng
();
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
{
//random_roi();
int
width
=
rng
.
uniform
(
2
,
MWIDTH
);
int
height
=
rng
.
uniform
(
2
,
MHEIGHT
);
cv
::
Size
size
(
width
,
height
);
mat1
=
randomMat
(
rng
,
size
,
type
,
0
,
40
,
false
);
gmat1
=
mat1
;
cv
::
Mat
cpu_dst
;
gmat1
.
download
(
cpu_dst
);
char
sss
[
1024
];
sprintf
(
sss
,
"cols=%d,rows=%d"
,
mat1
.
cols
,
mat1
.
rows
);
EXPECT_MAT_NEAR
(
mat1
,
cpu_dst
,
0.0
,
sss
);
}
}
INSTANTIATE_TEST_CASE_P
(
MatrixOperation
,
ConvertTo
,
Combine
(
INSTANTIATE_TEST_CASE_P
(
MatrixOperation
,
ConvertTo
,
Combine
(
Values
(
CV_8UC1
,
CV_8UC4
,
CV_32SC1
,
CV_32SC4
,
CV_32FC1
,
CV_32FC4
),
Values
(
CV_8UC1
,
CV_8UC4
,
CV_32SC1
,
CV_32SC4
,
CV_32FC1
,
CV_32FC4
),
...
@@ -409,4 +504,7 @@ INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
...
@@ -409,4 +504,7 @@ INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
Values
(
CV_8UC1
,
CV_8UC4
,
CV_32SC1
,
CV_32SC4
,
CV_32FC1
,
CV_32FC4
),
Values
(
CV_8UC1
,
CV_8UC4
,
CV_32SC1
,
CV_32SC4
,
CV_32FC1
,
CV_32FC4
),
Values
(
false
)));
// Values(false) is the reserved parameter
Values
(
false
)));
// Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P
(
MatrixOperation
,
convertC3C4
,
Combine
(
Values
(
CV_8UC3
,
CV_32SC3
,
CV_32FC3
),
Values
(
cv
::
Size
())));
#endif
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment