Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
7ee1d5f6
Commit
7ee1d5f6
authored
Apr 22, 2014
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ipp: added LUT optimization
parent
8114e071
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
241 additions
and
3 deletions
+241
-3
private.hpp
modules/core/include/opencv2/core/private.hpp
+2
-0
convert.cpp
modules/core/src/convert.cpp
+235
-2
morph.cpp
modules/imgproc/src/morph.cpp
+4
-1
No files found.
modules/core/include/opencv2/core/private.hpp
View file @
7ee1d5f6
...
@@ -218,6 +218,8 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
...
@@ -218,6 +218,8 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
# endif
# endif
# define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR)
# define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR)
#define IPP_ALIGN 32 // required for AVX optimization
#define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__)
#define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__)
static
inline
IppiSize
ippiSize
(
int
width
,
int
height
)
static
inline
IppiSize
ippiSize
(
int
width
,
int
height
)
...
...
modules/core/src/convert.cpp
View file @
7ee1d5f6
...
@@ -1543,10 +1543,10 @@ static LUTFunc lutTab[] =
...
@@ -1543,10 +1543,10 @@ static LUTFunc lutTab[] =
static
bool
ocl_LUT
(
InputArray
_src
,
InputArray
_lut
,
OutputArray
_dst
)
static
bool
ocl_LUT
(
InputArray
_src
,
InputArray
_lut
,
OutputArray
_dst
)
{
{
int
dtype
=
_dst
.
type
(),
lcn
=
_lut
.
channels
(),
dcn
=
CV_MAT_CN
(
dtype
),
ddepth
=
CV_MAT_DEPTH
(
dtype
);
int
lcn
=
_lut
.
channels
(),
dcn
=
_src
.
channels
(),
ddepth
=
_lut
.
depth
(
);
UMat
src
=
_src
.
getUMat
(),
lut
=
_lut
.
getUMat
();
UMat
src
=
_src
.
getUMat
(),
lut
=
_lut
.
getUMat
();
_dst
.
create
(
src
.
size
(),
dtype
);
_dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
ddepth
,
dcn
)
);
UMat
dst
=
_dst
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
ocl
::
Kernel
k
(
"LUT"
,
ocl
::
core
::
lut_oclsrc
,
ocl
::
Kernel
k
(
"LUT"
,
ocl
::
core
::
lut_oclsrc
,
...
@@ -1564,6 +1564,201 @@ static bool ocl_LUT(InputArray _src, InputArray _lut, OutputArray _dst)
...
@@ -1564,6 +1564,201 @@ static bool ocl_LUT(InputArray _src, InputArray _lut, OutputArray _dst)
#endif
#endif
#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
namespace
ipp
{
#if 0 // there are no performance benefits (PR #2653)
class IppLUTParallelBody_LUTC1 : public ParallelLoopBody
{
public:
bool* ok;
const Mat& src_;
const Mat& lut_;
Mat& dst_;
typedef IppStatus (*IppFn)(const Ipp8u* pSrc, int srcStep, void* pDst, int dstStep,
IppiSize roiSize, const void* pTable, int nBitSize);
IppFn fn;
int width;
IppLUTParallelBody_LUTC1(const Mat& src, const Mat& lut, Mat& dst, bool* _ok)
: ok(_ok), src_(src), lut_(lut), dst_(dst)
{
width = dst.cols * dst.channels();
size_t elemSize1 = CV_ELEM_SIZE1(dst.depth());
fn =
elemSize1 == 1 ? (IppFn)ippiLUTPalette_8u_C1R :
elemSize1 == 4 ? (IppFn)ippiLUTPalette_8u32u_C1R :
NULL;
*ok = (fn != NULL);
}
void operator()( const cv::Range& range ) const
{
if (!*ok)
return;
const int row0 = range.start;
const int row1 = range.end;
Mat src = src_.rowRange(row0, row1);
Mat dst = dst_.rowRange(row0, row1);
IppiSize sz = { width, dst.rows };
CV_DbgAssert(fn != NULL);
if (fn(src.data, (int)src.step[0], dst.data, (int)dst.step[0], sz, lut_.data, 8) < 0)
{
setIppErrorStatus();
*ok = false;
}
}
private:
IppLUTParallelBody_LUTC1(const IppLUTParallelBody_LUTC1&);
IppLUTParallelBody_LUTC1& operator=(const IppLUTParallelBody_LUTC1&);
};
#endif
class
IppLUTParallelBody_LUTCN
:
public
ParallelLoopBody
{
public
:
bool
*
ok
;
const
Mat
&
src_
;
const
Mat
&
lut_
;
Mat
&
dst_
;
int
lutcn
;
uchar
*
lutBuffer
;
uchar
*
lutTable
[
4
];
IppLUTParallelBody_LUTCN
(
const
Mat
&
src
,
const
Mat
&
lut
,
Mat
&
dst
,
bool
*
_ok
)
:
ok
(
_ok
),
src_
(
src
),
lut_
(
lut
),
dst_
(
dst
),
lutBuffer
(
NULL
)
{
lutcn
=
lut
.
channels
();
IppiSize
sz256
=
{
256
,
1
};
size_t
elemSize1
=
dst
.
elemSize1
();
CV_DbgAssert
(
elemSize1
==
1
);
lutBuffer
=
(
uchar
*
)
ippMalloc
(
256
*
(
int
)
elemSize1
*
4
);
lutTable
[
0
]
=
lutBuffer
+
0
;
lutTable
[
1
]
=
lutBuffer
+
1
*
256
*
elemSize1
;
lutTable
[
2
]
=
lutBuffer
+
2
*
256
*
elemSize1
;
lutTable
[
3
]
=
lutBuffer
+
3
*
256
*
elemSize1
;
CV_DbgAssert
(
lutcn
==
3
||
lutcn
==
4
);
if
(
lutcn
==
3
)
{
IppStatus
status
=
ippiCopy_8u_C3P3R
(
lut
.
data
,
(
int
)
lut
.
step
[
0
],
lutTable
,
(
int
)
lut
.
step
[
0
],
sz256
);
if
(
status
<
0
)
{
setIppErrorStatus
();
return
;
}
}
else
if
(
lutcn
==
4
)
{
IppStatus
status
=
ippiCopy_8u_C4P4R
(
lut
.
data
,
(
int
)
lut
.
step
[
0
],
lutTable
,
(
int
)
lut
.
step
[
0
],
sz256
);
if
(
status
<
0
)
{
setIppErrorStatus
();
return
;
}
}
*
ok
=
true
;
}
~
IppLUTParallelBody_LUTCN
()
{
if
(
lutBuffer
!=
NULL
)
ippFree
(
lutBuffer
);
lutBuffer
=
NULL
;
lutTable
[
0
]
=
NULL
;
}
void
operator
()(
const
cv
::
Range
&
range
)
const
{
if
(
!*
ok
)
return
;
const
int
row0
=
range
.
start
;
const
int
row1
=
range
.
end
;
Mat
src
=
src_
.
rowRange
(
row0
,
row1
);
Mat
dst
=
dst_
.
rowRange
(
row0
,
row1
);
if
(
lutcn
==
3
)
{
if
(
ippiLUTPalette_8u_C3R
(
src
.
data
,
(
int
)
src
.
step
[
0
],
dst
.
data
,
(
int
)
dst
.
step
[
0
],
ippiSize
(
dst
.
size
()),
lutTable
,
8
)
>=
0
)
return
;
}
else
if
(
lutcn
==
4
)
{
if
(
ippiLUTPalette_8u_C4R
(
src
.
data
,
(
int
)
src
.
step
[
0
],
dst
.
data
,
(
int
)
dst
.
step
[
0
],
ippiSize
(
dst
.
size
()),
lutTable
,
8
)
>=
0
)
return
;
}
setIppErrorStatus
();
*
ok
=
false
;
}
private
:
IppLUTParallelBody_LUTCN
(
const
IppLUTParallelBody_LUTCN
&
);
IppLUTParallelBody_LUTCN
&
operator
=
(
const
IppLUTParallelBody_LUTCN
&
);
};
}
// namespace ipp
#endif // IPP
class
LUTParallelBody
:
public
ParallelLoopBody
{
public
:
bool
*
ok
;
const
Mat
&
src_
;
const
Mat
&
lut_
;
Mat
&
dst_
;
LUTFunc
func
;
LUTParallelBody
(
const
Mat
&
src
,
const
Mat
&
lut
,
Mat
&
dst
,
bool
*
_ok
)
:
ok
(
_ok
),
src_
(
src
),
lut_
(
lut
),
dst_
(
dst
)
{
func
=
lutTab
[
lut
.
depth
()];
*
ok
=
(
func
!=
NULL
);
}
void
operator
()(
const
cv
::
Range
&
range
)
const
{
CV_DbgAssert
(
*
ok
);
const
int
row0
=
range
.
start
;
const
int
row1
=
range
.
end
;
Mat
src
=
src_
.
rowRange
(
row0
,
row1
);
Mat
dst
=
dst_
.
rowRange
(
row0
,
row1
);
int
cn
=
src
.
channels
();
int
lutcn
=
lut_
.
channels
();
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
,
0
};
uchar
*
ptrs
[
2
];
NAryMatIterator
it
(
arrays
,
ptrs
);
int
len
=
(
int
)
it
.
size
;
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
lut_
.
data
,
ptrs
[
1
],
len
,
cn
,
lutcn
);
}
private
:
LUTParallelBody
(
const
LUTParallelBody
&
);
LUTParallelBody
&
operator
=
(
const
LUTParallelBody
&
);
};
}
}
void
cv
::
LUT
(
InputArray
_src
,
InputArray
_lut
,
OutputArray
_dst
)
void
cv
::
LUT
(
InputArray
_src
,
InputArray
_lut
,
OutputArray
_dst
)
...
@@ -1582,6 +1777,44 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
...
@@ -1582,6 +1777,44 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
_dst
.
create
(
src
.
dims
,
src
.
size
,
CV_MAKETYPE
(
_lut
.
depth
(),
cn
));
_dst
.
create
(
src
.
dims
,
src
.
size
,
CV_MAKETYPE
(
_lut
.
depth
(),
cn
));
Mat
dst
=
_dst
.
getMat
();
Mat
dst
=
_dst
.
getMat
();
if
(
_src
.
dims
()
<=
2
)
{
bool
ok
=
false
;
Ptr
<
ParallelLoopBody
>
body
;
#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
size_t
elemSize1
=
CV_ELEM_SIZE1
(
dst
.
depth
());
#if 0 // there are no performance benefits (PR #2653)
if (lutcn == 1)
{
ParallelLoopBody* p = new ipp::IppLUTParallelBody_LUTC1(src, lut, dst, &ok);
body.reset(p);
}
else
#endif
if
((
lutcn
==
3
||
lutcn
==
4
)
&&
elemSize1
==
1
)
{
ParallelLoopBody
*
p
=
new
ipp
::
IppLUTParallelBody_LUTCN
(
src
,
lut
,
dst
,
&
ok
);
body
.
reset
(
p
);
}
#endif
if
(
body
==
NULL
||
ok
==
false
)
{
ok
=
false
;
ParallelLoopBody
*
p
=
new
LUTParallelBody
(
src
,
lut
,
dst
,
&
ok
);
body
.
reset
(
p
);
}
if
(
body
!=
NULL
&&
ok
)
{
Range
all
(
0
,
dst
.
rows
);
if
(
dst
.
total
()
>>
18
)
parallel_for_
(
all
,
*
body
,
(
double
)
std
::
max
((
size_t
)
1
,
dst
.
total
()
>>
16
));
else
(
*
body
)(
all
);
if
(
ok
)
return
;
}
}
LUTFunc
func
=
lutTab
[
lut
.
depth
()];
LUTFunc
func
=
lutTab
[
lut
.
depth
()];
CV_Assert
(
func
!=
0
);
CV_Assert
(
func
!=
0
);
...
...
modules/imgproc/src/morph.cpp
View file @
7ee1d5f6
...
@@ -1258,8 +1258,11 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne
...
@@ -1258,8 +1258,11 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne
default:
default:
return
false
;
return
false
;
}
}
#undef IPP_MORPH_CASE
#undef IPP_MORPH_CASE
#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8
return
false
;
/// It disables false positive warning in GCC 4.8.2
#endif
#endif
#endif
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment