Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
2057f2c4
Commit
2057f2c4
authored
Apr 18, 2011
by
Vladimir Dudnik
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed build issues related to changes in IPP calls.
parent
8e776837
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
404 additions
and
302 deletions
+404
-302
arithm.cpp
modules/core/src/arithm.cpp
+326
-226
matmul.cpp
modules/core/src/matmul.cpp
+69
-68
precomp.hpp
modules/core/src/precomp.hpp
+7
-7
system.cpp
modules/core/src/system.cpp
+2
-1
No files found.
modules/core/src/arithm.cpp
View file @
2057f2c4
...
...
@@ -56,7 +56,7 @@ struct IPPArithmInitializer
{
IPPArithmInitializer
(
void
)
{
IppStatus
status
=
ippStaticInit
();
ippStaticInit
();
}
};
...
...
@@ -64,19 +64,19 @@ IPPArithmInitializer ippArithmInitializer;
#endif
struct
NOP
{};
template
<
typename
T
,
class
Op
,
class
Op8
>
void
vBinOp8
(
const
T
*
src1
,
size_t
step1
,
const
T
*
src2
,
size_t
step2
,
T
*
dst
,
size_t
step
,
Size
sz
)
{
Op8
op8
;
Op
op
;
for
(
;
sz
.
height
--
;
src1
+=
step1
/
sizeof
(
src1
[
0
]),
src2
+=
step2
/
sizeof
(
src2
[
0
]),
dst
+=
step
/
sizeof
(
dst
[
0
])
)
{
int
x
=
0
;
#if CV_SSE2
if
(
USE_SSE2
)
{
...
...
@@ -97,7 +97,7 @@ void vBinOp8(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, s
}
}
#endif
for
(
;
x
<=
sz
.
width
-
4
;
x
+=
4
)
{
T
v0
=
op
(
src1
[
x
],
src2
[
x
]);
...
...
@@ -107,7 +107,7 @@ void vBinOp8(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, s
v1
=
op
(
src1
[
x
+
3
],
src2
[
x
+
3
]);
dst
[
x
+
2
]
=
v0
;
dst
[
x
+
3
]
=
v1
;
}
for
(
;
x
<
sz
.
width
;
x
++
)
dst
[
x
]
=
op
(
src1
[
x
],
src2
[
x
]);
}
...
...
@@ -119,13 +119,13 @@ void vBinOp16(const T* src1, size_t step1, const T* src2, size_t step2,
{
Op16
op16
;
Op
op
;
for
(
;
sz
.
height
--
;
src1
+=
step1
/
sizeof
(
src1
[
0
]),
src2
+=
step2
/
sizeof
(
src2
[
0
]),
dst
+=
step
/
sizeof
(
dst
[
0
])
)
{
int
x
=
0
;
#if CV_SSE2
if
(
USE_SSE2
)
{
...
...
@@ -147,7 +147,7 @@ void vBinOp16(const T* src1, size_t step1, const T* src2, size_t step2,
}
else
#endif
for
(
;
x
<=
sz
.
width
-
4
;
x
+=
4
)
{
T
v0
=
op
(
src1
[
x
],
src2
[
x
]);
...
...
@@ -157,26 +157,26 @@ void vBinOp16(const T* src1, size_t step1, const T* src2, size_t step2,
v1
=
op
(
src1
[
x
+
3
],
src2
[
x
+
3
]);
dst
[
x
+
2
]
=
v0
;
dst
[
x
+
3
]
=
v1
;
}
for
(
;
x
<
sz
.
width
;
x
++
)
dst
[
x
]
=
op
(
src1
[
x
],
src2
[
x
]);
}
}
template
<
class
Op
,
class
Op32
>
void
vBinOp32s
(
const
int
*
src1
,
size_t
step1
,
const
int
*
src2
,
size_t
step2
,
int
*
dst
,
size_t
step
,
Size
sz
)
{
Op32
op32
;
Op
op
;
for
(
;
sz
.
height
--
;
src1
+=
step1
/
sizeof
(
src1
[
0
]),
src2
+=
step2
/
sizeof
(
src2
[
0
]),
dst
+=
step
/
sizeof
(
dst
[
0
])
)
{
int
x
=
0
;
#if CV_SSE2
if
(
USE_SSE2
)
{
...
...
@@ -202,7 +202,7 @@ void vBinOp32s(const int* src1, size_t step1, const int* src2, size_t step2,
}
}
#endif
for
(
;
x
<=
sz
.
width
-
4
;
x
+=
4
)
{
int
v0
=
op
(
src1
[
x
],
src2
[
x
]);
...
...
@@ -212,26 +212,26 @@ void vBinOp32s(const int* src1, size_t step1, const int* src2, size_t step2,
v1
=
op
(
src1
[
x
+
3
],
src2
[
x
+
3
]);
dst
[
x
+
2
]
=
v0
;
dst
[
x
+
3
]
=
v1
;
}
for
(
;
x
<
sz
.
width
;
x
++
)
dst
[
x
]
=
op
(
src1
[
x
],
src2
[
x
]);
}
}
template
<
class
Op
,
class
Op32
>
void
vBinOp32f
(
const
float
*
src1
,
size_t
step1
,
const
float
*
src2
,
size_t
step2
,
float
*
dst
,
size_t
step
,
Size
sz
)
{
Op32
op32
;
Op
op
;
for
(
;
sz
.
height
--
;
src1
+=
step1
/
sizeof
(
src1
[
0
]),
src2
+=
step2
/
sizeof
(
src2
[
0
]),
dst
+=
step
/
sizeof
(
dst
[
0
])
)
{
int
x
=
0
;
#if CV_SSE2
if
(
USE_SSE2
)
{
...
...
@@ -266,7 +266,7 @@ void vBinOp32f(const float* src1, size_t step1, const float* src2, size_t step2,
v1
=
op
(
src1
[
x
+
3
],
src2
[
x
+
3
]);
dst
[
x
+
2
]
=
v0
;
dst
[
x
+
3
]
=
v1
;
}
for
(
;
x
<
sz
.
width
;
x
++
)
dst
[
x
]
=
op
(
src1
[
x
],
src2
[
x
]);
}
...
...
@@ -278,13 +278,13 @@ void vBinOp64f(const double* src1, size_t step1, const double* src2, size_t step
{
Op64
op64
;
Op
op
;
for
(
;
sz
.
height
--
;
src1
+=
step1
/
sizeof
(
src1
[
0
]),
src2
+=
step2
/
sizeof
(
src2
[
0
]),
dst
+=
step
/
sizeof
(
dst
[
0
])
)
{
int
x
=
0
;
#if CV_SSE2
if
(
USE_SSE2
&&
(((
size_t
)
src1
|
(
size_t
)
src2
|
(
size_t
)
dst
)
&
15
)
==
0
)
for
(
;
x
<=
sz
.
width
-
4
;
x
+=
4
)
...
...
@@ -307,14 +307,14 @@ void vBinOp64f(const double* src1, size_t step1, const double* src2, size_t step
v1
=
op
(
src1
[
x
+
3
],
src2
[
x
+
3
]);
dst
[
x
+
2
]
=
v0
;
dst
[
x
+
3
]
=
v1
;
}
for
(
;
x
<
sz
.
width
;
x
++
)
dst
[
x
]
=
op
(
src1
[
x
],
src2
[
x
]);
}
}
#if CV_SSE2
struct
_VAdd8u
{
__m128i
operator
()(
const
__m128i
&
a
,
const
__m128i
&
b
)
const
{
return
_mm_adds_epu8
(
a
,
b
);
}};
struct
_VSub8u
{
__m128i
operator
()(
const
__m128i
&
a
,
const
__m128i
&
b
)
const
{
return
_mm_subs_epu8
(
a
,
b
);
}};
struct
_VMin8u
{
__m128i
operator
()(
const
__m128i
&
a
,
const
__m128i
&
b
)
const
{
return
_mm_min_epu8
(
a
,
b
);
}};
...
...
@@ -410,7 +410,7 @@ struct _VAbsDiff32s
__m128i
m
=
_mm_cmpgt_epi32
(
b
,
a
);
return
_mm_sub_epi32
(
_mm_xor_si128
(
d
,
m
),
m
);
}
};
};
struct
_VAdd32f
{
__m128
operator
()(
const
__m128
&
a
,
const
__m128
&
b
)
const
{
return
_mm_add_ps
(
a
,
b
);
}};
struct
_VSub32f
{
__m128
operator
()(
const
__m128
&
a
,
const
__m128
&
b
)
const
{
return
_mm_sub_ps
(
a
,
b
);
}};
...
...
@@ -429,7 +429,7 @@ struct _VAdd64f { __m128d operator()(const __m128d& a, const __m128d& b) const {
struct
_VSub64f
{
__m128d
operator
()(
const
__m128d
&
a
,
const
__m128d
&
b
)
const
{
return
_mm_sub_pd
(
a
,
b
);
}};
struct
_VMin64f
{
__m128d
operator
()(
const
__m128d
&
a
,
const
__m128d
&
b
)
const
{
return
_mm_min_pd
(
a
,
b
);
}};
struct
_VMax64f
{
__m128d
operator
()(
const
__m128d
&
a
,
const
__m128d
&
b
)
const
{
return
_mm_max_pd
(
a
,
b
);
}};
static
int
CV_DECL_ALIGNED
(
16
)
v64f_absmask
[]
=
{
0xffffffff
,
0x7fffffff
,
0xffffffff
,
0x7fffffff
};
struct
_VAbsDiff64f
{
...
...
@@ -437,13 +437,13 @@ struct _VAbsDiff64f
{
return
_mm_and_pd
(
_mm_sub_pd
(
a
,
b
),
*
(
const
__m128d
*
)
v64f_absmask
);
}
};
};
struct
_VAnd8u
{
__m128i
operator
()(
const
__m128i
&
a
,
const
__m128i
&
b
)
const
{
return
_mm_and_si128
(
a
,
b
);
}};
struct
_VOr8u
{
__m128i
operator
()(
const
__m128i
&
a
,
const
__m128i
&
b
)
const
{
return
_mm_or_si128
(
a
,
b
);
}};
struct
_VXor8u
{
__m128i
operator
()(
const
__m128i
&
a
,
const
__m128i
&
b
)
const
{
return
_mm_xor_si128
(
a
,
b
);
}};
struct
_VNot8u
{
__m128i
operator
()(
const
__m128i
&
a
,
const
__m128i
&
)
const
{
return
_mm_andnot_si128
(
_mm_setzero_si128
(),
a
);
}};
#endif
#if CV_SSE2
...
...
@@ -451,12 +451,12 @@ struct _VNot8u { __m128i operator()(const __m128i& a, const __m128i&) const { re
#else
#define IF_SIMD(op) NOP
#endif
template
<>
inline
uchar
OpAdd
<
uchar
>::
operator
()(
uchar
a
,
uchar
b
)
const
{
return
CV_FAST_CAST_8U
(
a
+
b
);
}
template
<>
inline
uchar
OpSub
<
uchar
>::
operator
()(
uchar
a
,
uchar
b
)
const
{
return
CV_FAST_CAST_8U
(
a
-
b
);
}
template
<
typename
T
>
struct
OpAbsDiff
{
typedef
T
type1
;
...
...
@@ -470,7 +470,7 @@ template<> inline short OpAbsDiff<short>::operator ()(short a, short b) const
template
<>
inline
schar
OpAbsDiff
<
schar
>::
operator
()(
schar
a
,
schar
b
)
const
{
return
saturate_cast
<
schar
>
(
std
::
abs
(
a
-
b
));
}
template
<
typename
T
,
typename
WT
=
T
>
struct
OpAbsDiffS
{
typedef
T
type1
;
...
...
@@ -510,19 +510,19 @@ template<typename T> struct OpNot
typedef
T
rtype
;
T
operator
()(
T
a
,
T
)
const
{
return
~
a
;
}
};
static
inline
void
fixSteps
(
Size
sz
,
size_t
elemSize
,
size_t
&
step1
,
size_t
&
step2
,
size_t
&
step
)
{
if
(
sz
.
height
==
1
)
step1
=
step2
=
step
=
sz
.
width
*
elemSize
;
}
static
void
add8u
(
const
uchar
*
src1
,
size_t
step1
,
const
uchar
*
src2
,
size_t
step2
,
uchar
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiAdd_8u_C1RSfs
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
,
0
),
ippiAdd_8u_C1RSfs
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
,
0
),
(
vBinOp8
<
uchar
,
OpAdd
<
uchar
>
,
IF_SIMD
(
_VAdd8u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -538,7 +538,7 @@ static void add16u( const ushort* src1, size_t step1,
ushort
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiAdd_16u_C1RSfs
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
,
0
),
ippiAdd_16u_C1RSfs
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
,
0
),
(
vBinOp16
<
ushort
,
OpAdd
<
ushort
>
,
IF_SIMD
(
_VAdd16u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -547,7 +547,7 @@ static void add16s( const short* src1, size_t step1,
short
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiAdd_16s_C1RSfs
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
,
0
),
ippiAdd_16s_C1RSfs
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
,
0
),
(
vBinOp16
<
short
,
OpAdd
<
short
>
,
IF_SIMD
(
_VAdd16s
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -563,7 +563,7 @@ static void add32f( const float* src1, size_t step1,
float
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiAdd_32f_C1R
Sfs
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
,
0
),
ippiAdd_32f_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
),
(
vBinOp32f
<
OpAdd
<
float
>
,
IF_SIMD
(
_VAdd32f
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -579,7 +579,7 @@ static void sub8u( const uchar* src1, size_t step1,
uchar
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiSub_8u_C1RSfs
(
src2
,
(
int
)
step2
,
src1
,
(
int
)
step1
,
dst
,
(
IppiSize
&
)
sz
,
0
),
ippiSub_8u_C1RSfs
(
src2
,
(
int
)
step2
,
src1
,
(
int
)
step1
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
,
0
),
(
vBinOp8
<
uchar
,
OpSub
<
uchar
>
,
IF_SIMD
(
_VSub8u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -595,7 +595,7 @@ static void sub16u( const ushort* src1, size_t step1,
ushort
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiSub_16u_C1RSfs
(
src2
,
(
int
)
step2
,
src1
,
(
int
)
step1
,
dst
,
(
IppiSize
&
)
sz
,
0
),
ippiSub_16u_C1RSfs
(
src2
,
(
int
)
step2
,
src1
,
(
int
)
step1
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
,
0
),
(
vBinOp16
<
ushort
,
OpSub
<
ushort
>
,
IF_SIMD
(
_VSub16u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -604,7 +604,7 @@ static void sub16s( const short* src1, size_t step1,
short
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiSub_16s_C1RSfs
(
src2
,
(
int
)
step2
,
src1
,
(
int
)
step1
,
dst
,
(
IppiSize
&
)
sz
,
0
),
ippiSub_16s_C1RSfs
(
src2
,
(
int
)
step2
,
src1
,
(
int
)
step1
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
,
0
),
(
vBinOp16
<
short
,
OpSub
<
short
>
,
IF_SIMD
(
_VSub16s
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -620,7 +620,7 @@ static void sub32f( const float* src1, size_t step1,
float
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiSub_32f_C1R
Sfs
(
src2
,
(
int
)
step2
,
src1
,
(
int
)
step1
,
dst
,
(
IppiSize
&
)
sz
,
0
),
ippiSub_32f_C1R
(
src2
,
(
int
)
step2
,
src1
,
(
int
)
step1
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
),
(
vBinOp32f
<
OpSub
<
float
>
,
IF_SIMD
(
_VSub32f
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -629,18 +629,36 @@ static void sub64f( const double* src1, size_t step1,
double
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
vBinOp64f
<
OpSub
<
double
>
,
IF_SIMD
(
_VSub64f
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
);
}
}
template
<>
inline
uchar
OpMin
<
uchar
>::
operator
()(
uchar
a
,
uchar
b
)
const
{
return
CV_MIN_8U
(
a
,
b
);
}
template
<>
inline
uchar
OpMax
<
uchar
>::
operator
()(
uchar
a
,
uchar
b
)
const
{
return
CV_MAX_8U
(
a
,
b
);
}
static
void
max8u
(
const
uchar
*
src1
,
size_t
step1
,
const
uchar
*
src2
,
size_t
step2
,
uchar
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiMaxEvery_8u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
(
vBinOp8
<
uchar
,
OpMax
<
uchar
>
,
IF_SIMD
(
_VMax8u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
#if (ARITHM_USE_IPP == 1)
{
uchar
*
s1
=
(
uchar
*
)
src1
;
uchar
*
s2
=
(
uchar
*
)
src2
;
uchar
*
d
=
dst
;
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
for
(
int
i
=
0
;
i
<
sz
.
height
;
i
++
)
{
ippsMaxEvery_8u
(
s1
,
s2
,
d
,
sz
.
width
);
s1
+=
step1
;
s2
+=
step2
;
d
+=
step
;
}
}
#else
vBinOp8
<
uchar
,
OpMax
<
uchar
>
,
IF_SIMD
(
_VMax8u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
);
#endif
// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
// ippiMaxEvery_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
// (vBinOp8<uchar, OpMax<uchar>, IF_SIMD(_VMax8u)>(src1, step1, src2, step2, dst, step, sz)));
}
static
void
max8s
(
const
schar
*
src1
,
size_t
step1
,
...
...
@@ -654,18 +672,34 @@ static void max16u( const ushort* src1, size_t step1,
const
ushort
*
src2
,
size_t
step2
,
ushort
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiMaxEvery_16u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
(
vBinOp16
<
ushort
,
OpMax
<
ushort
>
,
IF_SIMD
(
_VMax16u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
#if (ARITHM_USE_IPP == 1)
{
ushort
*
s1
=
(
ushort
*
)
src1
;
ushort
*
s2
=
(
ushort
*
)
src2
;
ushort
*
d
=
dst
;
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
for
(
int
i
=
0
;
i
<
sz
.
height
;
i
++
)
{
ippsMaxEvery_16u
(
s1
,
s2
,
d
,
sz
.
width
);
s1
=
(
ushort
*
)((
uchar
*
)
s1
+
step1
);
s2
=
(
ushort
*
)((
uchar
*
)
s2
+
step2
);
d
=
(
ushort
*
)((
uchar
*
)
d
+
step
);
}
}
#else
vBinOp16
<
ushort
,
OpMax
<
ushort
>
,
IF_SIMD
(
_VMax16u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
);
#endif
// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
// ippiMaxEvery_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
// (vBinOp16<ushort, OpMax<ushort>, IF_SIMD(_VMax16u)>(src1, step1, src2, step2, dst, step, sz)));
}
static
void
max16s
(
const
short
*
src1
,
size_t
step1
,
const
short
*
src2
,
size_t
step2
,
short
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiMaxEvery_16s_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
(
vBinOp16
<
short
,
OpMax
<
short
>
,
IF_SIMD
(
_VMax16s
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
vBinOp16
<
short
,
OpMax
<
short
>
,
IF_SIMD
(
_VMax16s
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
);
}
static
void
max32s
(
const
int
*
src1
,
size_t
step1
,
...
...
@@ -679,9 +713,26 @@ static void max32f( const float* src1, size_t step1,
const
float
*
src2
,
size_t
step2
,
float
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiMaxEvery_32f_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
(
vBinOp32f
<
OpMax
<
float
>
,
IF_SIMD
(
_VMax32f
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
#if (ARITHM_USE_IPP == 1)
{
float
*
s1
=
(
float
*
)
src1
;
float
*
s2
=
(
float
*
)
src2
;
float
*
d
=
dst
;
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
for
(
int
i
=
0
;
i
<
sz
.
height
;
i
++
)
{
ippsMaxEvery_32f
(
s1
,
s2
,
d
,
sz
.
width
);
s1
=
(
float
*
)((
uchar
*
)
s1
+
step1
);
s2
=
(
float
*
)((
uchar
*
)
s2
+
step2
);
d
=
(
float
*
)((
uchar
*
)
d
+
step
);
}
}
#else
vBinOp32f
<
OpMax
<
float
>
,
IF_SIMD
(
_VMax32f
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
);
#endif
// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
// ippiMaxEvery_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
// (vBinOp32f<OpMax<float>, IF_SIMD(_VMax32f)>(src1, step1, src2, step2, dst, step, sz)));
}
static
void
max64f
(
const
double
*
src1
,
size_t
step1
,
...
...
@@ -695,9 +746,27 @@ static void min8u( const uchar* src1, size_t step1,
const
uchar
*
src2
,
size_t
step2
,
uchar
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiMinEvery_8u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
(
vBinOp8
<
uchar
,
OpMin
<
uchar
>
,
IF_SIMD
(
_VMin8u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
#if (ARITHM_USE_IPP == 1)
{
uchar
*
s1
=
(
uchar
*
)
src1
;
uchar
*
s2
=
(
uchar
*
)
src2
;
uchar
*
d
=
dst
;
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
for
(
int
i
=
0
;
i
<
sz
.
height
;
i
++
)
{
ippsMinEvery_8u
(
s1
,
s2
,
d
,
sz
.
width
);
s1
+=
step1
;
s2
+=
step2
;
d
+=
step
;
}
}
#else
vBinOp8
<
uchar
,
OpMin
<
uchar
>
,
IF_SIMD
(
_VMin8u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
);
#endif
// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
// ippiMinEvery_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
// (vBinOp8<uchar, OpMin<uchar>, IF_SIMD(_VMin8u)>(src1, step1, src2, step2, dst, step, sz)));
}
static
void
min8s
(
const
schar
*
src1
,
size_t
step1
,
...
...
@@ -711,18 +780,34 @@ static void min16u( const ushort* src1, size_t step1,
const
ushort
*
src2
,
size_t
step2
,
ushort
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiMinEvery_16u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
(
vBinOp16
<
ushort
,
OpMin
<
ushort
>
,
IF_SIMD
(
_VMin16u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
#if (ARITHM_USE_IPP == 1)
{
ushort
*
s1
=
(
ushort
*
)
src1
;
ushort
*
s2
=
(
ushort
*
)
src2
;
ushort
*
d
=
dst
;
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
for
(
int
i
=
0
;
i
<
sz
.
height
;
i
++
)
{
ippsMinEvery_16u
(
s1
,
s2
,
d
,
sz
.
width
);
s1
=
(
ushort
*
)((
uchar
*
)
s1
+
step1
);
s2
=
(
ushort
*
)((
uchar
*
)
s2
+
step2
);
d
=
(
ushort
*
)((
uchar
*
)
d
+
step
);
}
}
#else
vBinOp16
<
ushort
,
OpMin
<
ushort
>
,
IF_SIMD
(
_VMin16u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
);
#endif
// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
// ippiMinEvery_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
// (vBinOp16<ushort, OpMin<ushort>, IF_SIMD(_VMin16u)>(src1, step1, src2, step2, dst, step, sz)));
}
static
void
min16s
(
const
short
*
src1
,
size_t
step1
,
const
short
*
src2
,
size_t
step2
,
short
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiMinEvery_16s_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
(
vBinOp16
<
short
,
OpMin
<
short
>
,
IF_SIMD
(
_VMin16s
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
vBinOp16
<
short
,
OpMin
<
short
>
,
IF_SIMD
(
_VMin16s
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
);
}
static
void
min32s
(
const
int
*
src1
,
size_t
step1
,
...
...
@@ -736,9 +821,26 @@ static void min32f( const float* src1, size_t step1,
const
float
*
src2
,
size_t
step2
,
float
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiMinEvery_32f_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
(
vBinOp32f
<
OpMin
<
float
>
,
IF_SIMD
(
_VMin32f
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
#if (ARITHM_USE_IPP == 1)
{
float
*
s1
=
(
float
*
)
src1
;
float
*
s2
=
(
float
*
)
src2
;
float
*
d
=
dst
;
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
for
(
int
i
=
0
;
i
<
sz
.
height
;
i
++
)
{
ippsMinEvery_32f
(
s1
,
s2
,
d
,
sz
.
width
);
s1
=
(
float
*
)((
uchar
*
)
s1
+
step1
);
s2
=
(
float
*
)((
uchar
*
)
s2
+
step2
);
d
=
(
float
*
)((
uchar
*
)
d
+
step
);
}
}
#else
vBinOp32f
<
OpMin
<
float
>
,
IF_SIMD
(
_VMin32f
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
);
#endif
// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
// ippiMinEvery_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
// (vBinOp32f<OpMin<float>, IF_SIMD(_VMin32f)>(src1, step1, src2, step2, dst, step, sz)));
}
static
void
min64f
(
const
double
*
src1
,
size_t
step1
,
...
...
@@ -746,14 +848,14 @@ static void min64f( const double* src1, size_t step1,
double
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
vBinOp64f
<
OpMin
<
double
>
,
IF_SIMD
(
_VMin64f
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
);
}
}
static
void
absdiff8u
(
const
uchar
*
src1
,
size_t
step1
,
const
uchar
*
src2
,
size_t
step2
,
uchar
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiAbsDiff_8u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
ippiAbsDiff_8u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
),
(
vBinOp8
<
uchar
,
OpAbsDiff
<
uchar
>
,
IF_SIMD
(
_VAbsDiff8u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -769,7 +871,7 @@ static void absdiff16u( const ushort* src1, size_t step1,
ushort
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiAbsDiff_16u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
ippiAbsDiff_16u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
),
(
vBinOp16
<
ushort
,
OpAbsDiff
<
ushort
>
,
IF_SIMD
(
_VAbsDiff16u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -777,9 +879,7 @@ static void absdiff16s( const short* src1, size_t step1,
const
short
*
src2
,
size_t
step2
,
short
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiAbsDiff_16s_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
(
vBinOp16
<
short
,
OpAbsDiff
<
short
>
,
IF_SIMD
(
_VAbsDiff16s
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
vBinOp16
<
short
,
OpAbsDiff
<
short
>
,
IF_SIMD
(
_VAbsDiff16s
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
);
}
static
void
absdiff32s
(
const
int
*
src1
,
size_t
step1
,
...
...
@@ -794,7 +894,7 @@ static void absdiff32f( const float* src1, size_t step1,
float
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiAbsDiff_32f_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
ippiAbsDiff_32f_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
),
(
vBinOp32f
<
OpAbsDiff
<
float
>
,
IF_SIMD
(
_VAbsDiff32f
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -803,15 +903,15 @@ static void absdiff64f( const double* src1, size_t step1,
double
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
vBinOp64f
<
OpAbsDiff
<
double
>
,
IF_SIMD
(
_VAbsDiff64f
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
);
}
}
static
void
and8u
(
const
uchar
*
src1
,
size_t
step1
,
const
uchar
*
src2
,
size_t
step2
,
uchar
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiAnd_8u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
ippiAnd_8u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
),
(
vBinOp8
<
uchar
,
OpAnd
<
uchar
>
,
IF_SIMD
(
_VAnd8u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -820,7 +920,7 @@ static void or8u( const uchar* src1, size_t step1,
uchar
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiOr_8u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
ippiOr_8u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
),
(
vBinOp8
<
uchar
,
OpOr
<
uchar
>
,
IF_SIMD
(
_VOr8u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
...
...
@@ -829,23 +929,23 @@ static void xor8u( const uchar* src1, size_t step1,
uchar
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiXor_8u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
IppiSize
&
)
sz
),
ippiXor_8u_C1R
(
src1
,
(
int
)
step1
,
src2
,
(
int
)
step2
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
),
(
vBinOp8
<
uchar
,
OpXor
<
uchar
>
,
IF_SIMD
(
_VXor8u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
}
static
void
not8u
(
const
uchar
*
src1
,
size_t
step1
,
const
uchar
*
src2
,
size_t
step2
,
uchar
*
dst
,
size_t
step
,
Size
sz
,
void
*
)
{
IF_IPP
(
fixSteps
(
sz
,
sizeof
(
dst
[
0
]),
step1
,
step2
,
step
);
ippiNot_8u_C1R
(
src1
,
(
int
)
step1
,
dst
,
(
IppiSize
&
)
sz
),
ippiNot_8u_C1R
(
src1
,
(
int
)
step1
,
dst
,
(
int
)
step
,
(
IppiSize
&
)
sz
),
(
vBinOp8
<
uchar
,
OpNot
<
uchar
>
,
IF_SIMD
(
_VNot8u
)
>
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
)));
}
/****************************************************************************************\
* logical operations *
\****************************************************************************************/
static
inline
bool
checkScalar
(
const
Mat
&
sc
,
int
atype
,
int
sckind
,
int
akind
)
{
if
(
sc
.
dims
>
2
||
(
sc
.
cols
!=
1
&&
sc
.
rows
!=
1
)
||
!
sc
.
isContinuous
()
)
...
...
@@ -856,7 +956,7 @@ static inline bool checkScalar(const Mat& sc, int atype, int sckind, int akind)
return
sc
.
size
()
==
Size
(
1
,
1
)
||
sc
.
size
()
==
Size
(
1
,
cn
)
||
sc
.
size
()
==
Size
(
cn
,
1
)
||
(
sc
.
size
()
==
Size
(
1
,
4
)
&&
sc
.
type
()
==
CV_64F
&&
cn
<=
4
);
}
static
void
convertAndUnrollScalar
(
const
Mat
&
sc
,
int
buftype
,
uchar
*
scbuf
,
size_t
blocksize
)
{
int
scn
=
(
int
)
sc
.
total
(),
cn
=
CV_MAT_CN
(
buftype
);
...
...
@@ -872,9 +972,9 @@ static void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, si
}
for
(
size_t
i
=
esz
;
i
<
blocksize
*
esz
;
i
++
)
scbuf
[
i
]
=
scbuf
[
i
-
esz
];
}
void
binary_op
(
const
InputArray
&
_src1
,
const
InputArray
&
_src2
,
OutputArray
&
_dst
,
const
InputArray
&
_mask
,
const
BinaryFunc
*
tab
,
bool
bitwise
)
{
...
...
@@ -883,7 +983,7 @@ void binary_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
bool
haveMask
=
!
_mask
.
empty
(),
haveScalar
=
false
;
BinaryFunc
func
;
int
c
;
if
(
src1
.
dims
<=
2
&&
src2
.
dims
<=
2
&&
kind1
==
kind2
&&
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
()
&&
!
haveMask
)
{
...
...
@@ -899,12 +999,12 @@ void binary_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
func
=
tab
[
src1
.
depth
()];
c
=
src1
.
channels
();
}
Size
sz
=
getContinuousSize
(
src1
,
src2
,
dst
,
c
);
func
(
src1
.
data
,
src1
.
step
,
src2
.
data
,
src2
.
step
,
dst
.
data
,
dst
.
step
,
sz
,
0
);
return
;
}
if
(
(
kind1
==
InputArray
::
MATX
)
+
(
kind2
==
InputArray
::
MATX
)
==
1
||
src1
.
size
!=
src2
.
size
||
src1
.
type
()
!=
src2
.
type
()
)
{
...
...
@@ -917,13 +1017,13 @@ void binary_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
"nor 'array op scalar', nor 'scalar op array'"
);
haveScalar
=
true
;
}
size_t
esz
=
src1
.
elemSize
();
size_t
blocksize0
=
(
BLOCK_SIZE
+
esz
-
1
)
/
esz
;
int
cn
=
src1
.
channels
();
BinaryFunc
copymask
=
0
;
Mat
mask
;
if
(
haveMask
)
{
mask
=
_mask
.
getMat
();
...
...
@@ -931,13 +1031,13 @@ void binary_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
CV_Assert
(
mask
.
size
==
src1
.
size
);
copymask
=
getCopyMaskFunc
(
esz
);
}
AutoBuffer
<
uchar
>
_buf
;
uchar
*
scbuf
=
0
,
*
maskbuf
=
0
;
_dst
.
create
(
src1
.
dims
,
src1
.
size
,
src1
.
type
());
Mat
dst
=
_dst
.
getMat
();
if
(
bitwise
)
{
func
=
*
tab
;
...
...
@@ -948,35 +1048,35 @@ void binary_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
func
=
tab
[
src1
.
depth
()];
c
=
cn
;
}
if
(
!
haveScalar
)
{
const
Mat
*
arrays
[]
=
{
&
src1
,
&
src2
,
&
dst
,
&
mask
,
0
};
uchar
*
ptrs
[
4
];
NAryMatIterator
it
(
arrays
,
ptrs
);
size_t
total
=
it
.
size
,
blocksize
=
total
;
if
(
haveMask
)
{
blocksize
=
std
::
min
(
blocksize
,
blocksize0
);
_buf
.
allocate
(
blocksize
*
esz
);
maskbuf
=
_buf
;
}
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
{
for
(
size_t
j
=
0
;
j
<
total
;
j
+=
blocksize
)
{
int
bsz
=
(
int
)
std
::
min
(
total
-
j
,
blocksize
);
func
(
ptrs
[
0
],
0
,
ptrs
[
1
],
0
,
haveMask
?
maskbuf
:
ptrs
[
2
],
0
,
Size
(
bsz
*
c
,
1
),
0
);
func
(
ptrs
[
0
],
0
,
ptrs
[
1
],
0
,
haveMask
?
maskbuf
:
ptrs
[
2
],
0
,
Size
(
bsz
*
c
,
1
),
0
);
if
(
haveMask
)
{
copymask
(
maskbuf
,
0
,
ptrs
[
3
],
0
,
ptrs
[
2
],
0
,
Size
(
bsz
,
1
),
&
esz
);
ptrs
[
3
]
+=
bsz
;
}
bsz
*=
(
int
)
esz
;
ptrs
[
0
]
+=
bsz
;
ptrs
[
1
]
+=
bsz
;
ptrs
[
2
]
+=
bsz
;
}
...
...
@@ -986,41 +1086,41 @@ void binary_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
{
const
Mat
*
arrays
[]
=
{
&
src1
,
&
dst
,
&
mask
,
0
};
uchar
*
ptrs
[
3
];
NAryMatIterator
it
(
arrays
,
ptrs
);
size_t
total
=
it
.
size
,
blocksize
=
std
::
min
(
total
,
blocksize0
);
_buf
.
allocate
(
blocksize
*
(
haveMask
?
2
:
1
)
*
esz
+
32
);
scbuf
=
_buf
;
maskbuf
=
alignPtr
(
scbuf
+
blocksize
*
esz
,
16
);
convertAndUnrollScalar
(
src2
,
src1
.
type
(),
scbuf
,
blocksize
);
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
{
for
(
size_t
j
=
0
;
j
<
total
;
j
+=
blocksize
)
{
int
bsz
=
(
int
)
std
::
min
(
total
-
j
,
blocksize
);
func
(
ptrs
[
0
],
0
,
scbuf
,
0
,
haveMask
?
maskbuf
:
ptrs
[
1
],
0
,
Size
(
bsz
*
c
,
1
),
0
);
if
(
haveMask
)
{
copymask
(
maskbuf
,
0
,
ptrs
[
2
],
0
,
ptrs
[
1
],
0
,
Size
(
bsz
,
1
),
&
esz
);
ptrs
[
2
]
+=
bsz
;
}
bsz
*=
(
int
)
esz
;
ptrs
[
0
]
+=
bsz
;
ptrs
[
1
]
+=
bsz
;
}
}
}
}
static
BinaryFunc
maxTab
[]
=
{
(
BinaryFunc
)
max8u
,
(
BinaryFunc
)
max8s
,
(
BinaryFunc
)
max16u
,
(
BinaryFunc
)
max16s
,
(
BinaryFunc
)
max32s
,
(
BinaryFunc
)
max32f
,
(
BinaryFunc
)
max64f
,
0
};
};
static
BinaryFunc
minTab
[]
=
{
...
...
@@ -1029,7 +1129,7 @@ static BinaryFunc minTab[] =
};
}
void
cv
::
bitwise_and
(
const
InputArray
&
a
,
const
InputArray
&
b
,
OutputArray
c
,
const
InputArray
&
mask
)
{
BinaryFunc
f
=
and8u
;
...
...
@@ -1068,26 +1168,26 @@ void cv::max(const Mat& src1, const Mat& src2, Mat& dst)
{
OutputArray
_dst
(
dst
);
binary_op
(
src1
,
src2
,
_dst
,
InputArray
(),
maxTab
,
false
);
}
}
void
cv
::
min
(
const
Mat
&
src1
,
const
Mat
&
src2
,
Mat
&
dst
)
{
OutputArray
_dst
(
dst
);
binary_op
(
src1
,
src2
,
_dst
,
InputArray
(),
minTab
,
false
);
}
void
cv
::
max
(
const
Mat
&
src1
,
double
src2
,
Mat
&
dst
)
{
OutputArray
_dst
(
dst
);
binary_op
(
src1
,
src2
,
_dst
,
InputArray
(),
maxTab
,
false
);
}
}
void
cv
::
min
(
const
Mat
&
src1
,
double
src2
,
Mat
&
dst
)
{
OutputArray
_dst
(
dst
);
binary_op
(
src1
,
src2
,
_dst
,
InputArray
(),
minTab
,
false
);
}
/****************************************************************************************\
* add/subtract *
\****************************************************************************************/
...
...
@@ -1101,7 +1201,7 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
int
kind1
=
_src1
.
kind
(),
kind2
=
_src2
.
kind
();
Mat
src1
=
_src1
.
getMat
(),
src2
=
_src2
.
getMat
();
bool
haveMask
=
!
_mask
.
empty
();
if
(
kind1
==
kind2
&&
src1
.
dims
<=
2
&&
src2
.
dims
<=
2
&&
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
()
&&
!
haveMask
&&
((
!
_dst
.
fixedType
()
&&
(
dtype
<
0
||
CV_MAT_DEPTH
(
dtype
)
==
src1
.
depth
()))
||
...
...
@@ -1113,9 +1213,9 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
tab
[
src1
.
depth
()](
src1
.
data
,
src1
.
step
,
src2
.
data
,
src2
.
step
,
dst
.
data
,
dst
.
step
,
sz
,
usrdata
);
return
;
}
bool
haveScalar
=
false
,
swapped12
=
false
;
if
(
(
kind1
==
InputArray
::
MATX
)
+
(
kind2
==
InputArray
::
MATX
)
==
1
||
src1
.
size
!=
src2
.
size
||
src1
.
channels
()
!=
src2
.
channels
()
)
{
...
...
@@ -1131,10 +1231,10 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
"nor 'array op scalar', nor 'scalar op array'"
);
haveScalar
=
true
;
}
int
cn
=
src1
.
channels
(),
depth1
=
src1
.
depth
(),
depth2
=
src2
.
depth
(),
wtype
;
BinaryFunc
cvtsrc1
=
0
,
cvtsrc2
=
0
,
cvtdst
=
0
;
if
(
dtype
<
0
)
{
if
(
_dst
.
fixedType
()
)
...
...
@@ -1149,7 +1249,7 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
}
}
dtype
=
CV_MAT_DEPTH
(
dtype
);
if
(
depth1
==
depth2
&&
dtype
==
depth1
)
wtype
=
dtype
;
else
if
(
!
muldiv
)
...
...
@@ -1157,7 +1257,7 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
wtype
=
depth1
<=
CV_8S
&&
depth2
<=
CV_8S
?
CV_16S
:
depth1
<=
CV_32S
&&
depth2
<=
CV_32S
?
CV_32S
:
std
::
max
(
depth1
,
depth2
);
wtype
=
std
::
max
(
wtype
,
dtype
);
// when the result of addition should be converted to an integer type,
// and just one of the input arrays is floating-point, it makes sense to convert that input to integer type before the operation,
// instead of converting the other input to floating-point and then converting the operation result back to integers.
...
...
@@ -1169,20 +1269,20 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
wtype
=
std
::
max
(
depth1
,
std
::
max
(
depth2
,
CV_32F
));
wtype
=
std
::
max
(
wtype
,
dtype
);
}
cvtsrc1
=
depth1
==
wtype
?
0
:
getConvertFunc
(
depth1
,
wtype
);
cvtsrc2
=
depth2
==
depth1
?
cvtsrc1
:
depth2
==
wtype
?
0
:
getConvertFunc
(
depth2
,
wtype
);
cvtdst
=
dtype
==
wtype
?
0
:
getConvertFunc
(
wtype
,
dtype
);
dtype
=
CV_MAKETYPE
(
dtype
,
cn
);
wtype
=
CV_MAKETYPE
(
wtype
,
cn
);
size_t
esz1
=
src1
.
elemSize
(),
esz2
=
src2
.
elemSize
();
size_t
dsz
=
CV_ELEM_SIZE
(
dtype
),
wsz
=
CV_ELEM_SIZE
(
wtype
);
size_t
blocksize0
=
(
size_t
)(
BLOCK_SIZE
+
wsz
-
1
)
/
wsz
;
BinaryFunc
copymask
=
0
;
Mat
mask
;
if
(
haveMask
)
{
mask
=
_mask
.
getMat
();
...
...
@@ -1190,23 +1290,23 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
CV_Assert
(
mask
.
size
==
src1
.
size
);
copymask
=
getCopyMaskFunc
(
dsz
);
}
AutoBuffer
<
uchar
>
_buf
;
uchar
*
buf
,
*
maskbuf
=
0
,
*
buf1
=
0
,
*
buf2
=
0
,
*
wbuf
=
0
;
size_t
bufesz
=
(
cvtsrc1
?
wsz
:
0
)
+
(
cvtsrc2
||
haveScalar
?
wsz
:
0
)
+
(
cvtdst
?
wsz
:
0
)
+
(
haveMask
?
dsz
:
0
);
_dst
.
create
(
src1
.
dims
,
src1
.
size
,
src1
.
type
());
Mat
dst
=
_dst
.
getMat
();
BinaryFunc
func
=
tab
[
CV_MAT_DEPTH
(
wtype
)];
if
(
!
haveScalar
)
{
const
Mat
*
arrays
[]
=
{
&
src1
,
&
src2
,
&
dst
,
&
mask
,
0
};
uchar
*
ptrs
[
4
];
NAryMatIterator
it
(
arrays
,
ptrs
);
size_t
total
=
it
.
size
,
blocksize
=
total
;
if
(
haveMask
||
cvtsrc1
||
cvtsrc2
||
cvtdst
)
blocksize
=
std
::
min
(
blocksize
,
blocksize0
);
...
...
@@ -1221,7 +1321,7 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
buf
=
alignPtr
(
buf
+
blocksize
*
wsz
,
16
);
if
(
haveMask
)
maskbuf
=
buf
;
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
{
for
(
size_t
j
=
0
;
j
<
total
;
j
+=
blocksize
)
...
...
@@ -1242,7 +1342,7 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
cvtsrc2
(
sptr2
,
0
,
0
,
0
,
buf2
,
0
,
bszn
,
0
);
sptr2
=
buf2
;
}
if
(
!
haveMask
&&
!
cvtdst
)
func
(
sptr1
,
0
,
sptr2
,
0
,
dptr
,
0
,
bszn
,
usrdata
);
else
...
...
@@ -1270,10 +1370,10 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
{
const
Mat
*
arrays
[]
=
{
&
src1
,
&
dst
,
&
mask
,
0
};
uchar
*
ptrs
[
3
];
NAryMatIterator
it
(
arrays
,
ptrs
);
size_t
total
=
it
.
size
,
blocksize
=
std
::
min
(
total
,
blocksize0
);
_buf
.
allocate
(
bufesz
*
blocksize
+
64
);
buf
=
_buf
;
if
(
cvtsrc1
)
...
...
@@ -1284,9 +1384,9 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
buf
=
alignPtr
(
buf
+
blocksize
*
wsz
,
16
);
if
(
haveMask
)
maskbuf
=
buf
;
convertAndUnrollScalar
(
src2
,
wtype
,
buf2
,
blocksize
);
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
{
for
(
size_t
j
=
0
;
j
<
total
;
j
+=
blocksize
)
...
...
@@ -1296,16 +1396,16 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
const
uchar
*
sptr1
=
ptrs
[
0
];
const
uchar
*
sptr2
=
buf2
;
uchar
*
dptr
=
ptrs
[
1
];
if
(
cvtsrc1
)
{
cvtsrc1
(
sptr1
,
0
,
0
,
0
,
buf1
,
0
,
bszn
,
0
);
sptr1
=
buf1
;
}
if
(
swapped12
)
std
::
swap
(
sptr1
,
sptr2
);
if
(
!
haveMask
&&
!
cvtdst
)
func
(
sptr1
,
0
,
sptr2
,
0
,
dptr
,
0
,
bszn
,
usrdata
);
else
...
...
@@ -1330,13 +1430,13 @@ void arithm_op(const InputArray& _src1, const InputArray& _src2, OutputArray& _d
}
}
}
static
BinaryFunc
addTab
[]
=
{
(
BinaryFunc
)
add8u
,
(
BinaryFunc
)
add8s
,
(
BinaryFunc
)
add16u
,
(
BinaryFunc
)
add16s
,
(
BinaryFunc
)
add32s
,
(
BinaryFunc
)
add32f
,
(
BinaryFunc
)
add64f
,
0
};
static
BinaryFunc
subTab
[]
=
{
(
BinaryFunc
)
sub8u
,
(
BinaryFunc
)
sub8s
,
(
BinaryFunc
)
sub16u
,
(
BinaryFunc
)
sub16s
,
...
...
@@ -1348,10 +1448,10 @@ static BinaryFunc absdiffTab[] =
(
BinaryFunc
)
absdiff8u
,
(
BinaryFunc
)
absdiff8s
,
(
BinaryFunc
)
absdiff16u
,
(
BinaryFunc
)
absdiff16s
,
(
BinaryFunc
)
absdiff32s
,
(
BinaryFunc
)
absdiff32f
,
(
BinaryFunc
)
absdiff64f
,
0
};
};
}
void
cv
::
add
(
const
InputArray
&
src1
,
const
InputArray
&
src2
,
OutputArray
dst
,
const
InputArray
&
mask
,
int
dtype
)
{
...
...
@@ -1367,7 +1467,7 @@ void cv::subtract( const InputArray& src1, const InputArray& src2, OutputArray d
void
cv
::
absdiff
(
const
InputArray
&
src1
,
const
InputArray
&
src2
,
OutputArray
dst
)
{
arithm_op
(
src1
,
src2
,
dst
,
InputArray
(),
-
1
,
absdiffTab
);
}
}
/****************************************************************************************\
* multiply/divide *
...
...
@@ -1437,7 +1537,7 @@ div_( const T* src1, size_t step1, const T* src2, size_t step2,
step1
/=
sizeof
(
src1
[
0
]);
step2
/=
sizeof
(
src2
[
0
]);
step
/=
sizeof
(
dst
[
0
]);
for
(
;
size
.
height
--
;
src1
+=
step1
,
src2
+=
step2
,
dst
+=
step
)
{
int
i
=
0
;
...
...
@@ -1450,12 +1550,12 @@ div_( const T* src1, size_t step1, const T* src2, size_t step2,
double
d
=
scale
/
(
a
*
b
);
b
*=
d
;
a
*=
d
;
T
z0
=
saturate_cast
<
T
>
(
src2
[
i
+
1
]
*
((
double
)
src1
[
i
]
*
b
));
T
z1
=
saturate_cast
<
T
>
(
src2
[
i
]
*
((
double
)
src1
[
i
+
1
]
*
b
));
T
z2
=
saturate_cast
<
T
>
(
src2
[
i
+
3
]
*
((
double
)
src1
[
i
+
2
]
*
a
));
T
z3
=
saturate_cast
<
T
>
(
src2
[
i
+
2
]
*
((
double
)
src1
[
i
+
3
]
*
a
));
dst
[
i
]
=
z0
;
dst
[
i
+
1
]
=
z1
;
dst
[
i
+
2
]
=
z2
;
dst
[
i
+
3
]
=
z3
;
}
...
...
@@ -1465,12 +1565,12 @@ div_( const T* src1, size_t step1, const T* src2, size_t step2,
T
z1
=
src2
[
i
+
1
]
!=
0
?
saturate_cast
<
T
>
(
src1
[
i
+
1
]
*
scale
/
src2
[
i
+
1
])
:
0
;
T
z2
=
src2
[
i
+
2
]
!=
0
?
saturate_cast
<
T
>
(
src1
[
i
+
2
]
*
scale
/
src2
[
i
+
2
])
:
0
;
T
z3
=
src2
[
i
+
3
]
!=
0
?
saturate_cast
<
T
>
(
src1
[
i
+
3
]
*
scale
/
src2
[
i
+
3
])
:
0
;
dst
[
i
]
=
z0
;
dst
[
i
+
1
]
=
z1
;
dst
[
i
+
2
]
=
z2
;
dst
[
i
+
3
]
=
z3
;
}
}
for
(
;
i
<
size
.
width
;
i
++
)
dst
[
i
]
=
src2
[
i
]
!=
0
?
saturate_cast
<
T
>
(
src1
[
i
]
*
scale
/
src2
[
i
])
:
0
;
}
...
...
@@ -1482,7 +1582,7 @@ recip_( const T*, size_t, const T* src2, size_t step2,
{
step2
/=
sizeof
(
src2
[
0
]);
step
/=
sizeof
(
dst
[
0
]);
for
(
;
size
.
height
--
;
src2
+=
step2
,
dst
+=
step
)
{
int
i
=
0
;
...
...
@@ -1495,12 +1595,12 @@ recip_( const T*, size_t, const T* src2, size_t step2,
double
d
=
scale
/
(
a
*
b
);
b
*=
d
;
a
*=
d
;
T
z0
=
saturate_cast
<
T
>
(
src2
[
i
+
1
]
*
b
);
T
z1
=
saturate_cast
<
T
>
(
src2
[
i
]
*
b
);
T
z2
=
saturate_cast
<
T
>
(
src2
[
i
+
3
]
*
a
);
T
z3
=
saturate_cast
<
T
>
(
src2
[
i
+
2
]
*
a
);
dst
[
i
]
=
z0
;
dst
[
i
+
1
]
=
z1
;
dst
[
i
+
2
]
=
z2
;
dst
[
i
+
3
]
=
z3
;
}
...
...
@@ -1515,13 +1615,13 @@ recip_( const T*, size_t, const T* src2, size_t step2,
dst
[
i
+
2
]
=
z2
;
dst
[
i
+
3
]
=
z3
;
}
}
for
(
;
i
<
size
.
width
;
i
++
)
dst
[
i
]
=
src2
[
i
]
!=
0
?
saturate_cast
<
T
>
(
scale
/
src2
[
i
])
:
0
;
}
}
static
void
mul8u
(
const
uchar
*
src1
,
size_t
step1
,
const
uchar
*
src2
,
size_t
step2
,
uchar
*
dst
,
size_t
step
,
Size
sz
,
void
*
scale
)
{
...
...
@@ -1551,7 +1651,7 @@ static void mul32s( const int* src1, size_t step1, const int* src2, size_t step2
{
mul_
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
,
*
(
const
double
*
)
scale
);
}
static
void
mul32f
(
const
float
*
src1
,
size_t
step1
,
const
float
*
src2
,
size_t
step2
,
float
*
dst
,
size_t
step
,
Size
sz
,
void
*
scale
)
{
...
...
@@ -1563,7 +1663,7 @@ static void mul64f( const double* src1, size_t step1, const double* src2, size_t
{
mul_
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
,
*
(
const
double
*
)
scale
);
}
static
void
div8u
(
const
uchar
*
src1
,
size_t
step1
,
const
uchar
*
src2
,
size_t
step2
,
uchar
*
dst
,
size_t
step
,
Size
sz
,
void
*
scale
)
{
...
...
@@ -1650,8 +1750,8 @@ static void recip64f( const double* src1, size_t step1, const double* src2, size
{
recip_
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
sz
,
*
(
const
double
*
)
scale
);
}
static
BinaryFunc
mulTab
[]
=
{
(
BinaryFunc
)
mul8u
,
(
BinaryFunc
)
mul8s
,
(
BinaryFunc
)
mul16u
,
...
...
@@ -1673,9 +1773,9 @@ static BinaryFunc recipTab[] =
(
BinaryFunc
)
recip64f
,
0
};
}
void
cv
::
multiply
(
const
InputArray
&
src1
,
const
InputArray
&
src2
,
OutputArray
dst
,
double
scale
,
int
dtype
)
{
...
...
@@ -1692,8 +1792,8 @@ void cv::divide(double scale, const InputArray& src2,
OutputArray
dst
,
int
dtype
)
{
arithm_op
(
src2
,
src2
,
dst
,
InputArray
(),
dtype
,
recipTab
,
true
,
&
scale
);
}
}
/****************************************************************************************\
* addWeighted *
\****************************************************************************************/
...
...
@@ -1739,34 +1839,34 @@ addWeighted8u( const uchar* src1, size_t step1,
{
const
double
*
scalars
=
(
const
double
*
)
_scalars
;
float
alpha
=
(
float
)
scalars
[
0
],
beta
=
(
float
)
scalars
[
1
],
gamma
=
(
float
)
scalars
[
2
];
for
(
;
size
.
height
--
;
src1
+=
step1
,
src2
+=
step2
,
dst
+=
step
)
{
int
x
=
0
;
#if CV_SSE2
if
(
USE_SSE2
)
{
__m128
a4
=
_mm_set1_ps
(
alpha
),
b4
=
_mm_set1_ps
(
beta
),
g4
=
_mm_set1_ps
(
gamma
);
__m128i
z
=
_mm_setzero_si128
();
for
(
;
x
<=
size
.
width
-
8
;
x
+=
8
)
{
__m128i
u
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
const
__m128i
*
)(
src1
+
x
)),
z
);
__m128i
v
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
const
__m128i
*
)(
src2
+
x
)),
z
);
__m128
u0
=
_mm_cvtepi32_ps
(
_mm_unpacklo_epi16
(
u
,
z
));
__m128
u1
=
_mm_cvtepi32_ps
(
_mm_unpackhi_epi16
(
u
,
z
));
__m128
v0
=
_mm_cvtepi32_ps
(
_mm_unpacklo_epi16
(
v
,
z
));
__m128
v1
=
_mm_cvtepi32_ps
(
_mm_unpackhi_epi16
(
v
,
z
));
u0
=
_mm_add_ps
(
_mm_mul_ps
(
u0
,
a4
),
_mm_mul_ps
(
v0
,
b4
));
u1
=
_mm_add_ps
(
_mm_mul_ps
(
u1
,
a4
),
_mm_mul_ps
(
v1
,
b4
));
u0
=
_mm_add_ps
(
u0
,
g4
);
u1
=
_mm_add_ps
(
u1
,
g4
);
u
=
_mm_packs_epi32
(
_mm_cvtps_epi32
(
u0
),
_mm_cvtps_epi32
(
u1
));
u
=
_mm_packus_epi16
(
u
,
u
);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
x
),
u
);
}
}
...
...
@@ -1837,9 +1937,9 @@ static BinaryFunc addWeightedTab[] =
(
BinaryFunc
)
addWeighted16s
,
(
BinaryFunc
)
addWeighted32s
,
(
BinaryFunc
)
addWeighted32f
,
(
BinaryFunc
)
addWeighted64f
,
0
};
}
void
cv
::
addWeighted
(
const
InputArray
&
src1
,
double
alpha
,
const
InputArray
&
src2
,
double
beta
,
double
gamma
,
OutputArray
dst
,
int
dtype
)
{
...
...
@@ -1847,7 +1947,7 @@ void cv::addWeighted( const InputArray& src1, double alpha, const InputArray& sr
arithm_op
(
src1
,
src2
,
dst
,
InputArray
(),
dtype
,
addWeightedTab
,
true
,
scalars
);
}
/****************************************************************************************\
* compare *
\****************************************************************************************/
...
...
@@ -1867,7 +1967,7 @@ cmp_(const T* src1, size_t step1, const T* src2, size_t step2,
std
::
swap
(
step1
,
step2
);
code
=
code
==
CMP_GE
?
CMP_LE
:
CMP_GT
;
}
if
(
code
==
CMP_GT
||
code
==
CMP_LE
)
{
int
m
=
code
==
CMP_GT
?
0
:
255
;
...
...
@@ -1884,7 +1984,7 @@ cmp_(const T* src1, size_t step1, const T* src2, size_t step2,
t1
=
-
(
src1
[
x
+
3
]
>
src2
[
x
+
3
])
^
m
;
dst
[
x
+
2
]
=
(
uchar
)
t0
;
dst
[
x
+
3
]
=
(
uchar
)
t1
;
}
for
(
;
x
<
size
.
width
;
x
++
)
dst
[
x
]
=
(
uchar
)(
-
(
src1
[
x
]
>
src2
[
x
])
^
m
);
}
...
...
@@ -1905,14 +2005,14 @@ cmp_(const T* src1, size_t step1, const T* src2, size_t step2,
t1
=
-
(
src1
[
x
+
3
]
==
src2
[
x
+
3
])
^
m
;
dst
[
x
+
2
]
=
(
uchar
)
t0
;
dst
[
x
+
3
]
=
(
uchar
)
t1
;
}
for
(
;
x
<
size
.
width
;
x
++
)
dst
[
x
]
=
(
uchar
)(
-
(
src1
[
x
]
==
src2
[
x
])
^
m
);
}
}
}
static
void
cmp8u
(
const
uchar
*
src1
,
size_t
step1
,
const
uchar
*
src2
,
size_t
step2
,
uchar
*
dst
,
size_t
step
,
Size
size
,
void
*
_cmpop
)
{
...
...
@@ -1953,8 +2053,8 @@ static void cmp64f(const double* src1, size_t step1, const double* src2, size_t
uchar
*
dst
,
size_t
step
,
Size
size
,
void
*
_cmpop
)
{
cmp_
(
src1
,
step1
,
src2
,
step2
,
dst
,
step
,
size
,
*
(
int
*
)
_cmpop
);
}
}
static
BinaryFunc
cmpTab
[]
=
{
(
BinaryFunc
)
cmp8u
,
(
BinaryFunc
)
cmp8s
,
(
BinaryFunc
)
cmp16u
,
...
...
@@ -1962,7 +2062,7 @@ static BinaryFunc cmpTab[] =
(
BinaryFunc
)
cmp64f
,
0
};
static
double
getMinVal
(
int
depth
)
{
static
const
double
tab
[]
=
{
0
,
-
128
,
0
,
-
32768
,
INT_MIN
,
-
FLT_MAX
,
-
DBL_MAX
,
0
};
...
...
@@ -1973,18 +2073,18 @@ static double getMaxVal(int depth)
{
static
const
double
tab
[]
=
{
255
,
127
,
65535
,
32767
,
INT_MAX
,
FLT_MAX
,
DBL_MAX
,
0
};
return
tab
[
depth
];
}
}
}
void
cv
::
compare
(
const
InputArray
&
_src1
,
const
InputArray
&
_src2
,
OutputArray
_dst
,
int
op
)
{
CV_Assert
(
op
==
CMP_LT
||
op
==
CMP_LE
||
op
==
CMP_EQ
||
op
==
CMP_NE
||
op
==
CMP_GE
||
op
==
CMP_GT
);
int
kind1
=
_src1
.
kind
(),
kind2
=
_src2
.
kind
();
Mat
src1
=
_src1
.
getMat
(),
src2
=
_src2
.
getMat
();
if
(
kind1
==
kind2
&&
src1
.
dims
<=
2
&&
src2
.
dims
<=
2
&&
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
()
)
{
_dst
.
create
(
src1
.
size
(),
CV_8UC1
);
...
...
@@ -1993,9 +2093,9 @@ void cv::compare(const InputArray& _src1, const InputArray& _src2, OutputArray _
cmpTab
[
src1
.
depth
()](
src1
.
data
,
src1
.
step
,
src2
.
data
,
src2
.
step
,
dst
.
data
,
dst
.
step
,
sz
,
&
op
);
return
;
}
bool
haveScalar
=
false
;
if
(
(
kind1
==
InputArray
::
MATX
)
+
(
kind2
==
InputArray
::
MATX
)
==
1
||
src1
.
size
!=
src2
.
size
||
src1
.
type
()
!=
src2
.
type
()
)
{
...
...
@@ -2012,26 +2112,26 @@ void cv::compare(const InputArray& _src1, const InputArray& _src2, OutputArray _
"nor 'array op scalar', nor 'scalar op array'"
);
haveScalar
=
true
;
}
int
cn
=
src1
.
channels
(),
depth1
=
src1
.
depth
(),
depth2
=
src2
.
depth
();
if
(
cn
!=
1
)
CV_Error
(
CV_StsUnsupportedFormat
,
"compare() can only process single-channel arrays"
);
size_t
esz
=
src1
.
elemSize
();
size_t
blocksize0
=
(
size_t
)(
BLOCK_SIZE
+
esz
-
1
)
/
esz
;
_dst
.
create
(
src1
.
dims
,
src1
.
size
,
CV_8U
);
Mat
dst
=
_dst
.
getMat
();
BinaryFunc
func
=
cmpTab
[
depth1
];
if
(
!
haveScalar
)
{
const
Mat
*
arrays
[]
=
{
&
src1
,
&
src2
,
&
dst
,
0
};
uchar
*
ptrs
[
3
];
NAryMatIterator
it
(
arrays
,
ptrs
);
size_t
total
=
it
.
size
;
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
0
,
ptrs
[
1
],
0
,
ptrs
[
2
],
0
,
Size
((
int
)
total
,
1
),
&
op
);
}
...
...
@@ -2039,10 +2139,10 @@ void cv::compare(const InputArray& _src1, const InputArray& _src2, OutputArray _
{
const
Mat
*
arrays
[]
=
{
&
src1
,
&
dst
,
0
};
uchar
*
ptrs
[
2
];
NAryMatIterator
it
(
arrays
,
ptrs
);
size_t
total
=
it
.
size
,
blocksize
=
std
::
min
(
total
,
blocksize0
);
AutoBuffer
<
uchar
>
_buf
(
blocksize
*
esz
);
uchar
*
buf
=
_buf
;
...
...
@@ -2057,13 +2157,13 @@ void cv::compare(const InputArray& _src1, const InputArray& _src2, OutputArray _
dst
=
Scalar
::
all
(
op
==
CMP_GT
||
op
==
CMP_GE
||
op
==
CMP_NE
?
255
:
0
);
return
;
}
if
(
fval
>
getMaxVal
(
depth1
)
)
{
dst
=
Scalar
::
all
(
op
==
CMP_LT
||
op
==
CMP_LE
||
op
==
CMP_NE
?
255
:
0
);
return
;
}
int
ival
=
cvRound
(
fval
);
if
(
fval
!=
ival
)
{
...
...
@@ -2079,7 +2179,7 @@ void cv::compare(const InputArray& _src1, const InputArray& _src2, OutputArray _
}
convertAndUnrollScalar
(
Mat
(
1
,
1
,
CV_32S
,
&
ival
),
depth1
,
buf
,
blocksize
);
}
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
{
for
(
size_t
j
=
0
;
j
<
total
;
j
+=
blocksize
)
...
...
@@ -2092,7 +2192,7 @@ void cv::compare(const InputArray& _src1, const InputArray& _src2, OutputArray _
}
}
}
/****************************************************************************************\
* inRange *
\****************************************************************************************/
...
...
@@ -2108,7 +2208,7 @@ inRange_(const T* src1, size_t step1, const T* src2, size_t step2,
step1
/=
sizeof
(
src1
[
0
]);
step2
/=
sizeof
(
src2
[
0
]);
step3
/=
sizeof
(
src3
[
0
]);
for
(
;
size
.
height
--
;
src1
+=
step1
,
src2
+=
step2
,
src3
+=
step3
,
dst
+=
step
)
{
int
x
=
0
;
...
...
@@ -2122,13 +2222,13 @@ inRange_(const T* src1, size_t step1, const T* src2, size_t step2,
t1
=
src2
[
x
+
3
]
<=
src1
[
x
+
3
]
&&
src1
[
x
+
3
]
<=
src3
[
x
+
3
];
dst
[
x
+
2
]
=
(
uchar
)
-
t0
;
dst
[
x
+
3
]
=
(
uchar
)
-
t1
;
}
for
(
;
x
<
size
.
width
;
x
++
)
dst
[
x
]
=
(
uchar
)
-
(
src2
[
x
]
<=
src1
[
x
]
&&
src1
[
x
]
<=
src3
[
x
]);
}
}
static
void
inRange8u
(
const
uchar
*
src1
,
size_t
step1
,
const
uchar
*
src2
,
size_t
step2
,
const
uchar
*
src3
,
size_t
step3
,
uchar
*
dst
,
size_t
step
,
Size
size
)
{
...
...
@@ -2169,7 +2269,7 @@ static void inRange64f(const double* src1, size_t step1, const double* src2, siz
const
double
*
src3
,
size_t
step3
,
uchar
*
dst
,
size_t
step
,
Size
size
)
{
inRange_
(
src1
,
step1
,
src2
,
step2
,
src3
,
step3
,
dst
,
step
,
size
);
}
}
static
void
inRangeReduce
(
const
uchar
*
src
,
uchar
*
dst
,
size_t
len
,
int
cn
)
{
...
...
@@ -2187,14 +2287,14 @@ static void inRangeReduce(const uchar* src, uchar* dst, size_t len, int cn)
else
for
(
i
=
j
=
0
;
i
<
len
;
i
++
,
j
+=
cn
)
dst
[
i
]
=
src
[
j
]
&
src
[
j
+
1
]
&
src
[
j
+
2
]
&
src
[
j
+
3
];
for
(
;
k
<
cn
;
k
+=
4
)
{
for
(
i
=
0
,
j
=
k
;
i
<
len
;
i
++
,
j
+=
cn
)
dst
[
i
]
&=
src
[
j
]
&
src
[
j
+
1
]
&
src
[
j
+
2
]
&
src
[
j
+
3
];
}
}
typedef
void
(
*
InRangeFunc
)(
const
uchar
*
src1
,
size_t
step1
,
const
uchar
*
src2
,
size_t
step2
,
const
uchar
*
src3
,
size_t
step3
,
uchar
*
dst
,
size_t
step
,
Size
sz
);
...
...
@@ -2204,7 +2304,7 @@ static InRangeFunc inRangeTab[] =
(
InRangeFunc
)
inRange16s
,
(
InRangeFunc
)
inRange32s
,
(
InRangeFunc
)
inRange32f
,
(
InRangeFunc
)
inRange64f
,
0
};
}
void
cv
::
inRange
(
const
InputArray
&
_src
,
const
InputArray
&
_lowerb
,
...
...
@@ -2212,9 +2312,9 @@ void cv::inRange(const InputArray& _src, const InputArray& _lowerb,
{
int
skind
=
_src
.
kind
(),
lkind
=
_lowerb
.
kind
(),
ukind
=
_upperb
.
kind
();
Mat
src
=
_src
.
getMat
(),
lb
=
_lowerb
.
getMat
(),
ub
=
_upperb
.
getMat
();
bool
lbScalar
=
false
,
ubScalar
=
false
;
if
(
(
lkind
==
InputArray
::
MATX
&&
skind
!=
InputArray
::
MATX
)
||
src
.
size
!=
lb
.
size
||
src
.
type
()
!=
lb
.
type
()
)
{
...
...
@@ -2223,7 +2323,7 @@ void cv::inRange(const InputArray& _src, const InputArray& _lowerb,
"The lower bounary is neither an array of the same size and same type as src, nor a scalar"
);
lbScalar
=
true
;
}
if
(
(
ukind
==
InputArray
::
MATX
&&
skind
!=
InputArray
::
MATX
)
||
src
.
size
!=
ub
.
size
||
src
.
type
()
!=
ub
.
type
()
)
{
...
...
@@ -2232,47 +2332,47 @@ void cv::inRange(const InputArray& _src, const InputArray& _lowerb,
"The upper bounary is neither an array of the same size and same type as src, nor a scalar"
);
ubScalar
=
true
;
}
CV_Assert
(
((
int
)
lbScalar
^
(
int
)
ubScalar
)
==
0
);
int
cn
=
src
.
channels
(),
depth
=
src
.
depth
();
size_t
esz
=
src
.
elemSize
();
size_t
blocksize0
=
(
size_t
)(
BLOCK_SIZE
+
esz
-
1
)
/
esz
;
_dst
.
create
(
src
.
dims
,
src
.
size
,
CV_8U
);
Mat
dst
=
_dst
.
getMat
();
InRangeFunc
func
=
inRangeTab
[
depth
];
const
Mat
*
arrays_sc
[]
=
{
&
src
,
&
dst
,
0
};
const
Mat
*
arrays_nosc
[]
=
{
&
src
,
&
dst
,
&
lb
,
&
ub
,
0
};
uchar
*
ptrs
[
4
];
NAryMatIterator
it
(
lbScalar
&&
ubScalar
?
arrays_sc
:
arrays_nosc
,
ptrs
);
size_t
total
=
it
.
size
,
blocksize
=
std
::
min
(
total
,
blocksize0
);
AutoBuffer
<
uchar
>
_buf
(
blocksize
*
(((
int
)
lbScalar
+
(
int
)
ubScalar
)
*
esz
+
cn
)
+
2
*
cn
*
sizeof
(
int
)
+
128
);
uchar
*
buf
=
_buf
,
*
mbuf
=
buf
,
*
lbuf
=
0
,
*
ubuf
=
0
;
buf
=
alignPtr
(
buf
+
blocksize
*
cn
,
16
);
if
(
lbScalar
&&
ubScalar
)
{
lbuf
=
buf
;
ubuf
=
buf
=
alignPtr
(
buf
+
blocksize
*
esz
,
16
);
CV_Assert
(
lb
.
type
()
==
ub
.
type
()
);
int
scdepth
=
lb
.
depth
();
if
(
scdepth
!=
depth
&&
depth
<
CV_32S
)
{
int
*
ilbuf
=
(
int
*
)
alignPtr
(
buf
+
blocksize
*
esz
,
16
);
int
*
iubuf
=
ilbuf
+
cn
;
BinaryFunc
sccvtfunc
=
getConvertFunc
(
scdepth
,
CV_32S
);
sccvtfunc
(
lb
.
data
,
0
,
0
,
0
,
(
uchar
*
)
ilbuf
,
0
,
Size
(
cn
,
1
),
0
);
sccvtfunc
(
ub
.
data
,
0
,
0
,
0
,
(
uchar
*
)
iubuf
,
0
,
Size
(
cn
,
1
),
0
);
int
minval
=
cvRound
(
getMinVal
(
depth
)),
maxval
=
cvRound
(
getMaxVal
(
depth
));
for
(
int
k
=
0
;
k
<
cn
;
k
++
)
{
if
(
ilbuf
[
k
]
>
iubuf
[
k
]
||
ilbuf
[
k
]
>
maxval
||
iubuf
[
k
]
<
minval
)
...
...
@@ -2281,11 +2381,11 @@ void cv::inRange(const InputArray& _src, const InputArray& _lowerb,
lb
=
Mat
(
cn
,
1
,
CV_32S
,
ilbuf
);
ub
=
Mat
(
cn
,
1
,
CV_32S
,
iubuf
);
}
convertAndUnrollScalar
(
lb
,
src
.
type
(),
lbuf
,
blocksize
);
convertAndUnrollScalar
(
ub
,
src
.
type
(),
ubuf
,
blocksize
);
}
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
{
for
(
size_t
j
=
0
;
j
<
total
;
j
+=
blocksize
)
...
...
modules/core/src/matmul.cpp
View file @
2057f2c4
...
...
@@ -646,8 +646,8 @@ static void GEMMBlockMul_64fc( const Complexd* a_data, size_t a_step,
{
GEMMBlockMul
(
a_data
,
a_step
,
b_data
,
b_step
,
d_data
,
d_step
,
a_size
,
d_size
,
flags
);
}
static
void
GEMMStore_32f
(
const
float
*
c_data
,
size_t
c_step
,
const
double
*
d_buf
,
size_t
d_buf_step
,
float
*
d_data
,
size_t
d_step
,
Size
d_size
,
...
...
@@ -664,7 +664,7 @@ static void GEMMStore_64f( const double* c_data, size_t c_step,
{
GEMMStore
(
c_data
,
c_step
,
d_buf
,
d_buf_step
,
d_data
,
d_step
,
d_size
,
alpha
,
beta
,
flags
);
}
static
void
GEMMStore_32fc
(
const
Complexf
*
c_data
,
size_t
c_step
,
const
Complexd
*
d_buf
,
size_t
d_buf_step
,
...
...
@@ -1130,7 +1130,7 @@ void cv::gemm( const InputArray& matA, const InputArray& matB, double alpha,
int
dm0
,
dn0
,
dk0
;
size_t
a_step0
,
a_step1
,
b_step0
,
b_step1
,
c_step0
,
c_step1
;
int
work_elem_size
=
elem_size
<<
(
CV_MAT_DEPTH
(
type
)
==
CV_32F
?
1
:
0
);
if
(
!
is_a_t
)
a_step0
=
A
.
step
,
a_step1
=
elem_size
;
else
...
...
@@ -1273,7 +1273,7 @@ template<typename T, typename WT> static void
transform_
(
const
T
*
src
,
T
*
dst
,
const
WT
*
m
,
int
len
,
int
scn
,
int
dcn
)
{
int
x
;
if
(
scn
==
2
&&
dcn
==
2
)
{
for
(
x
=
0
;
x
<
len
*
2
;
x
+=
2
)
...
...
@@ -1352,7 +1352,7 @@ load4x4Matrix( const float* m, __m128& m0, __m128& m1, __m128& m2, __m128& m3, _
}
#endif
static
void
transform_8u
(
const
uchar
*
src
,
uchar
*
dst
,
const
float
*
m
,
int
len
,
int
scn
,
int
dcn
)
{
...
...
@@ -1379,7 +1379,7 @@ transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, in
__m128i
m2
=
_mm_setr_epi16
(
0
,
m20
,
m21
,
m22
,
m20
,
m21
,
m22
,
0
);
__m128i
m3
=
_mm_setr_epi32
(
m03
,
m13
,
m23
,
0
);
int
x
=
0
;
for
(
;
x
<=
(
len
-
8
)
*
3
;
x
+=
8
*
3
)
{
__m128i
z
=
_mm_setzero_si128
(),
t0
,
t1
,
t2
,
r0
,
r1
;
...
...
@@ -1470,14 +1470,14 @@ transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, in
return
;
}
#endif
transform_
(
src
,
dst
,
m
,
len
,
scn
,
dcn
);
}
static
void
transform_16u
(
const
ushort
*
src
,
ushort
*
dst
,
const
float
*
m
,
int
len
,
int
scn
,
int
dcn
)
{
#if CV_SSE2
#if CV_SSE2
if
(
USE_SSE2
&&
scn
==
3
&&
dcn
==
3
)
{
__m128
m0
,
m1
,
m2
,
m3
;
...
...
@@ -1536,11 +1536,11 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn,
return
;
}
#endif
transform_
(
src
,
dst
,
m
,
len
,
scn
,
dcn
);
}
static
void
transform_32f
(
const
float
*
src
,
float
*
dst
,
const
float
*
m
,
int
len
,
int
scn
,
int
dcn
)
{
...
...
@@ -1574,12 +1574,12 @@ transform_32f( const float* src, float* dst, const float* m, int len, int scn, i
}
return
;
}
if
(
scn
==
4
&&
dcn
==
4
)
{
__m128
m0
,
m1
,
m2
,
m3
,
m4
;
load4x4Matrix
(
m
,
m0
,
m1
,
m2
,
m3
,
m4
);
for
(
;
x
<
len
*
4
;
x
+=
4
)
{
__m128
x0
=
_mm_loadu_ps
(
src
+
x
);
...
...
@@ -1616,18 +1616,18 @@ transform_32s(const int* src, int* dst, const double* m, int len, int scn, int d
{
transform_
(
src
,
dst
,
m
,
len
,
scn
,
dcn
);
}
static
void
transform_64f
(
const
double
*
src
,
double
*
dst
,
const
double
*
m
,
int
len
,
int
scn
,
int
dcn
)
{
transform_
(
src
,
dst
,
m
,
len
,
scn
,
dcn
);
}
}
template
<
typename
T
,
typename
WT
>
static
void
diagtransform_
(
const
T
*
src
,
T
*
dst
,
const
WT
*
m
,
int
len
,
int
cn
,
int
)
{
int
x
;
if
(
cn
==
2
)
{
for
(
x
=
0
;
x
<
len
*
2
;
x
+=
2
)
...
...
@@ -1674,8 +1674,8 @@ static void
diagtransform_8u
(
const
uchar
*
src
,
uchar
*
dst
,
const
float
*
m
,
int
len
,
int
scn
,
int
dcn
)
{
diagtransform_
(
src
,
dst
,
m
,
len
,
scn
,
dcn
);
}
}
static
void
diagtransform_8s
(
const
schar
*
src
,
schar
*
dst
,
const
float
*
m
,
int
len
,
int
scn
,
int
dcn
)
{
...
...
@@ -1686,8 +1686,8 @@ static void
diagtransform_16u
(
const
ushort
*
src
,
ushort
*
dst
,
const
float
*
m
,
int
len
,
int
scn
,
int
dcn
)
{
diagtransform_
(
src
,
dst
,
m
,
len
,
scn
,
dcn
);
}
}
static
void
diagtransform_16s
(
const
short
*
src
,
short
*
dst
,
const
float
*
m
,
int
len
,
int
scn
,
int
dcn
)
{
...
...
@@ -1704,17 +1704,17 @@ static void
diagtransform_32f
(
const
float
*
src
,
float
*
dst
,
const
float
*
m
,
int
len
,
int
scn
,
int
dcn
)
{
diagtransform_
(
src
,
dst
,
m
,
len
,
scn
,
dcn
);
}
}
static
void
diagtransform_64f
(
const
double
*
src
,
double
*
dst
,
const
double
*
m
,
int
len
,
int
scn
,
int
dcn
)
{
diagtransform_
(
src
,
dst
,
m
,
len
,
scn
,
dcn
);
}
}
typedef
void
(
*
TransformFunc
)(
const
uchar
*
src
,
uchar
*
dst
,
const
uchar
*
m
,
int
,
int
,
int
);
static
TransformFunc
transformTab
[]
=
{
(
TransformFunc
)
transform_8u
,
(
TransformFunc
)
transform_8s
,
(
TransformFunc
)
transform_16u
,
...
...
@@ -1728,23 +1728,23 @@ static TransformFunc diagTransformTab[] =
(
TransformFunc
)
diagtransform_16s
,
(
TransformFunc
)
diagtransform_32s
,
(
TransformFunc
)
diagtransform_32f
,
(
TransformFunc
)
diagtransform_64f
,
0
};
}
void
cv
::
transform
(
const
InputArray
&
_src
,
OutputArray
_dst
,
const
InputArray
&
_mtx
)
{
Mat
src
=
_src
.
getMat
(),
m
=
_mtx
.
getMat
();
int
depth
=
src
.
depth
(),
scn
=
src
.
channels
(),
dcn
=
m
.
rows
;
CV_Assert
(
scn
==
m
.
cols
||
scn
+
1
==
m
.
cols
);
bool
isDiag
=
false
;
_dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
depth
,
dcn
)
);
Mat
dst
=
_dst
.
getMat
();
int
mtype
=
depth
==
CV_32S
||
depth
==
CV_64F
?
CV_64F
:
CV_32F
;
AutoBuffer
<
double
>
_mbuf
;
double
*
mbuf
=
_mbuf
;
if
(
!
m
.
isContinuous
()
||
m
.
type
()
!=
mtype
||
m
.
cols
!=
scn
+
1
)
{
_mbuf
.
allocate
(
dcn
*
(
scn
+
1
));
...
...
@@ -1791,12 +1791,12 @@ void cv::transform( const InputArray& _src, OutputArray _dst, const InputArray&
TransformFunc
func
=
isDiag
?
diagTransformTab
[
depth
]
:
transformTab
[
depth
];
CV_Assert
(
func
!=
0
);
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
,
0
};
uchar
*
ptrs
[
2
];
NAryMatIterator
it
(
arrays
,
ptrs
);
size_t
i
,
total
=
it
.
size
;
for
(
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
ptrs
[
1
],
(
uchar
*
)
mbuf
,
(
int
)
total
,
scn
,
dcn
);
}
...
...
@@ -1813,7 +1813,7 @@ perspectiveTransform_( const T* src, T* dst, const double* m, int len, int scn,
{
const
double
eps
=
FLT_EPSILON
;
int
i
;
if
(
scn
==
2
&&
dcn
==
2
)
{
for
(
i
=
0
;
i
<
len
*
2
;
i
+=
2
)
...
...
@@ -1837,7 +1837,7 @@ perspectiveTransform_( const T* src, T* dst, const double* m, int len, int scn,
{
T
x
=
src
[
i
],
y
=
src
[
i
+
1
],
z
=
src
[
i
+
2
];
double
w
=
x
*
m
[
12
]
+
y
*
m
[
13
]
+
z
*
m
[
14
]
+
m
[
15
];
if
(
fabs
(
w
)
>
eps
)
{
w
=
1.
/
w
;
...
...
@@ -1855,7 +1855,7 @@ perspectiveTransform_( const T* src, T* dst, const double* m, int len, int scn,
{
T
x
=
src
[
0
],
y
=
src
[
1
],
z
=
src
[
2
];
double
w
=
x
*
m
[
8
]
+
y
*
m
[
9
]
+
z
*
m
[
10
]
+
m
[
11
];
if
(
fabs
(
w
)
>
eps
)
{
w
=
1.
/
w
;
...
...
@@ -1893,7 +1893,7 @@ perspectiveTransform_( const T* src, T* dst, const double* m, int len, int scn,
}
}
static
void
perspectiveTransform_32f
(
const
float
*
src
,
float
*
dst
,
const
double
*
m
,
int
len
,
int
scn
,
int
dcn
)
{
...
...
@@ -1905,22 +1905,22 @@ perspectiveTransform_64f(const double* src, double* dst, const double* m, int le
{
perspectiveTransform_
(
src
,
dst
,
m
,
len
,
scn
,
dcn
);
}
}
void
cv
::
perspectiveTransform
(
const
InputArray
&
_src
,
OutputArray
_dst
,
const
InputArray
&
_mtx
)
{
Mat
src
=
_src
.
getMat
(),
m
=
_mtx
.
getMat
();
int
depth
=
src
.
depth
(),
scn
=
src
.
channels
(),
dcn
=
m
.
rows
-
1
;
CV_Assert
(
scn
+
1
==
m
.
cols
&&
(
depth
==
CV_32F
||
depth
==
CV_64F
));
_dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
depth
,
dcn
)
);
Mat
dst
=
_dst
.
getMat
();
const
int
mtype
=
CV_64F
;
AutoBuffer
<
double
>
_mbuf
;
double
*
mbuf
=
_mbuf
;
if
(
!
m
.
isContinuous
()
||
m
.
type
()
!=
mtype
)
{
_mbuf
.
allocate
((
dcn
+
1
)
*
(
scn
+
1
));
...
...
@@ -1930,20 +1930,20 @@ void cv::perspectiveTransform( const InputArray& _src, OutputArray _dst, const I
}
else
mbuf
=
(
double
*
)
m
.
data
;
TransformFunc
func
=
depth
==
CV_32F
?
(
TransformFunc
)
perspectiveTransform_32f
:
(
TransformFunc
)
perspectiveTransform_64f
;
CV_Assert
(
func
!=
0
);
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
,
0
};
uchar
*
ptrs
[
2
];
NAryMatIterator
it
(
arrays
,
ptrs
);
size_t
i
,
total
=
it
.
size
;
for
(
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
ptrs
[
1
],
(
uchar
*
)
mbuf
,
(
int
)
total
,
scn
,
dcn
);
}
}
/****************************************************************************************\
* ScaleAdd *
...
...
@@ -2000,7 +2000,7 @@ static void scaleAdd_32f(const float* src1, const float* src2, float* dst,
dst
[
i
]
=
src1
[
i
]
*
alpha
+
src2
[
i
];
}
static
void
scaleAdd_64f
(
const
double
*
src1
,
const
double
*
src2
,
double
*
dst
,
int
len
,
double
*
_alpha
)
{
...
...
@@ -2040,39 +2040,39 @@ static void scaleAdd_64f(const double* src1, const double* src2, double* dst,
typedef
void
(
*
ScaleAddFunc
)(
const
uchar
*
src1
,
const
uchar
*
src2
,
uchar
*
dst
,
int
len
,
const
void
*
alpha
);
}
void
cv
::
scaleAdd
(
const
InputArray
&
_src1
,
double
alpha
,
const
InputArray
&
_src2
,
OutputArray
_dst
)
{
Mat
src1
=
_src1
.
getMat
(),
src2
=
_src2
.
getMat
();
int
depth
=
src1
.
depth
(),
cn
=
src1
.
channels
();
CV_Assert
(
src1
.
type
()
==
src2
.
type
()
);
if
(
depth
<
CV_32F
)
{
addWeighted
(
_src1
,
alpha
,
_src2
,
1
,
0
,
_dst
,
depth
);
return
;
}
_dst
.
create
(
src1
.
dims
,
src1
.
size
,
src1
.
type
());
Mat
dst
=
_dst
.
getMat
();
float
falpha
=
(
float
)
alpha
;
void
*
palpha
=
depth
==
CV_32F
?
(
void
*
)
&
falpha
:
(
void
*
)
&
alpha
;
ScaleAddFunc
func
=
depth
==
CV_32F
?
(
ScaleAddFunc
)
scaleAdd_32f
:
(
ScaleAddFunc
)
scaleAdd_64f
;
if
(
src1
.
isContinuous
()
&&
src2
.
isContinuous
()
&&
dst
.
isContinuous
()
)
{
size_t
len
=
src1
.
total
()
*
cn
;
func
(
src1
.
data
,
src2
.
data
,
dst
.
data
,
(
int
)
len
,
palpha
);
return
;
}
const
Mat
*
arrays
[]
=
{
&
src1
,
&
src2
,
&
dst
,
0
};
uchar
*
ptrs
[
3
];
NAryMatIterator
it
(
arrays
,
ptrs
);
size_t
i
,
len
=
it
.
size
*
cn
;
for
(
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
ptrs
[
1
],
ptrs
[
2
],
(
int
)
len
,
palpha
);
}
...
...
@@ -2243,7 +2243,7 @@ double cv::Mahalonobis( const InputArray& _v1, const InputArray& _v2, const Inpu
{
return
Mahalanobis
(
_v1
,
_v2
,
_icovar
);
}
/****************************************************************************************\
* MulTransposed *
\****************************************************************************************/
...
...
@@ -2445,7 +2445,7 @@ MulTransposedL( const Mat& srcmat, Mat& dstmat, const Mat& deltamat, double scal
typedef
void
(
*
MulTransposedFunc
)(
const
Mat
&
src
,
Mat
&
dst
,
const
Mat
&
delta
,
double
scale
);
}
void
cv
::
mulTransposed
(
const
InputArray
&
_src
,
OutputArray
_dst
,
bool
ata
,
const
InputArray
&
_delta
,
double
scale
,
int
dtype
)
{
...
...
@@ -2578,7 +2578,7 @@ dotProd_(const T* src1, const T* src2, int len)
(
double
)
src1
[
i
+
2
]
*
src2
[
i
+
2
]
+
(
double
)
src1
[
i
+
3
]
*
src2
[
i
+
3
];
for
(
;
i
<
len
;
i
++
)
result
+=
(
double
)
src1
[
i
]
*
src2
[
i
];
return
result
;
}
...
...
@@ -2590,9 +2590,10 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
ippiDotProd_8u64f_C1R
(
src1
,
(
int
)(
len
*
sizeof
(
src1
[
0
])),
src2
,
(
int
)(
len
*
sizeof
(
src2
[
0
])),
ippiSize
(
len
,
1
),
&
r
);
return
r
;
#else
int
i
=
0
;
#if CV_SSE2
if
(
USE_SSE2
)
{
...
...
@@ -2616,7 +2617,7 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
s
=
_mm_add_epi32
(
s
,
s0
);
s
=
_mm_add_epi32
(
s
,
s2
);
}
for
(
;
j
<
blockSize
;
j
+=
4
)
{
__m128i
s0
=
_mm_unpacklo_epi8
(
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
src1
+
j
)),
z
);
...
...
@@ -2627,7 +2628,7 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
CV_DECL_ALIGNED
(
16
)
int
buf
[
4
];
_mm_store_si128
((
__m128i
*
)
buf
,
s
);
r
+=
buf
[
0
]
+
buf
[
1
]
+
buf
[
2
]
+
buf
[
3
];
src1
+=
blockSize
;
src2
+=
blockSize
;
i
+=
blockSize
;
...
...
@@ -2692,7 +2693,7 @@ static double dotProd_64f(const double* src1, const double* src2, int len)
typedef
double
(
*
DotProdFunc
)(
const
uchar
*
src1
,
const
uchar
*
src2
,
int
len
);
static
DotProdFunc
dotProdTab
[]
=
{
(
DotProdFunc
)
dotProd_8u
,
(
DotProdFunc
)
dotProd_8s
,
(
DotProdFunc
)
dotProd_16u
,
...
...
@@ -2713,16 +2714,16 @@ double Mat::dot(const InputArray& _mat) const
if
(
len
==
(
size_t
)(
int
)
len
)
return
func
(
data
,
mat
.
data
,
len
);
}
const
Mat
*
arrays
[]
=
{
this
,
&
mat
,
0
};
uchar
*
ptrs
[
2
];
NAryMatIterator
it
(
arrays
,
ptrs
);
int
len
=
(
int
)(
it
.
size
*
cn
);
double
r
=
0
;
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
r
+=
func
(
ptrs
[
0
],
ptrs
[
1
],
len
);
return
r
;
}
...
...
@@ -3027,12 +3028,12 @@ cvCalcPCA( const CvArr* data_arr, CvArr* avg_arr, CvArr* eigenvals, CvArr* eigen
evects
=
pca
.
eigenvectors
;
int
ecount0
=
evals0
.
cols
+
evals0
.
rows
-
1
;
int
ecount
=
evals
.
cols
+
evals
.
rows
-
1
;
CV_Assert
(
(
evals0
.
cols
==
1
||
evals0
.
rows
==
1
)
&&
ecount0
<=
ecount
&&
evects0
.
cols
==
evects
.
cols
&&
evects0
.
rows
==
ecount0
);
cv
::
Mat
temp
=
evals0
;
if
(
evals
.
rows
==
1
)
evals
.
colRange
(
0
,
ecount0
).
convertTo
(
temp
,
evals0
.
type
());
...
...
modules/core/src/precomp.hpp
View file @
2057f2c4
...
...
@@ -87,7 +87,7 @@ extern const uchar g_Saturate8u[];
void
deleteThreadAllocData
();
void
deleteThreadRNGData
();
#endif
template
<
typename
T1
,
typename
T2
=
T1
,
typename
T3
=
T1
>
struct
OpAdd
{
typedef
T1
type1
;
...
...
@@ -176,24 +176,24 @@ typedef void (*BinaryFunc)(const uchar* src1, size_t step1,
void
*
);
BinaryFunc
getConvertFunc
(
int
sdepth
,
int
ddepth
);
BinaryFunc
getConvertScaleFunc
(
int
sdepth
,
int
ddepth
);
BinaryFunc
getConvertScaleFunc
(
int
sdepth
,
int
ddepth
);
BinaryFunc
getCopyMaskFunc
(
size_t
esz
);
enum
{
BLOCK_SIZE
=
1024
};
#ifdef HAVE_IPP
static
inline
IppiSize
ippiSize
(
int
width
,
int
height
)
{
IppiSize
sz
=
{
width
,
height
};
return
sz
;
}
static
inline
IppiSize
ippiSize
(
Size
_sz
)
{
reIppiSize
sz
=
{
_sz
.
width
,
_sz
.
height
};
return
sz
;
}
static
inline
IppiSize
ippiSize
(
int
width
,
int
height
)
{
IppiSize
sz
=
{
width
,
height
};
return
sz
;
}
static
inline
IppiSize
ippiSize
(
Size
_sz
)
{
IppiSize
sz
=
{
_sz
.
width
,
_sz
.
height
};
return
sz
;
}
#endif
#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
#define ARITHM_USE_IPP 1
#define IF_IPP(then_call, else_call) then_call
#else
#define ARITHM_USE_IPP 0
#define IF_IPP(then_call, else_call) else_call
#endif
#endif
}
#endif
/*_CXCORE_INTERNAL_H_*/
modules/core/src/system.cpp
View file @
2057f2c4
...
...
@@ -170,9 +170,10 @@ struct IPPInitializer
IPPInitializer
ippInitializer
;
#else
volatile
bool
useOptimizedFlag
=
false
;
volatile
bool
USE_SSE2
=
false
;
#endif
volatile
bool
USE_SSE2
=
false
;
void
setUseOptimized
(
bool
flag
)
{
useOptimizedFlag
=
flag
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment