Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
93a36b0d
Commit
93a36b0d
authored
Feb 23, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
core: keep history of merge.cpp
parents
4e12febe
6eabe6bc
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
438 additions
and
0 deletions
+438
-0
merge.dispatch.cpp
modules/core/src/merge.dispatch.cpp
+438
-0
No files found.
modules/core/src/merge.dispatch.cpp
0 → 100644
View file @
93a36b0d
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
namespace
cv
{
namespace
hal
{
#if CV_SIMD
/*
The trick with STORE_UNALIGNED/STORE_ALIGNED_NOCACHE is the following:
on IA there are instructions movntps and such to which
v_store_interleave(...., STORE_ALIGNED_NOCACHE) is mapped.
Those instructions write directly into memory w/o touching cache
that results in dramatic speed improvements, especially on
large arrays (FullHD, 4K etc.).
Those intrinsics require the destination address to be aligned
by 16/32 bits (with SSE2 and AVX2, respectively).
So we potentially split the processing into 3 stages:
1) the optional prefix part [0:i0), where we use simple unaligned stores.
2) the optional main part [i0:len - VECSZ], where we use "nocache" mode.
But in some cases we have to use unaligned stores in this part.
3) the optional suffix part (the tail) (len - VECSZ:len) where we switch back to "unaligned" mode
to process the remaining len - VECSZ elements.
In principle there can be very poorly aligned data where there is no main part.
For that we set i0=0 and use unaligned stores for the whole array.
*/
template
<
typename
T
,
typename
VecT
>
static
void
vecmerge_
(
const
T
**
src
,
T
*
dst
,
int
len
,
int
cn
)
{
const
int
VECSZ
=
VecT
::
nlanes
;
int
i
,
i0
=
0
;
const
T
*
src0
=
src
[
0
];
const
T
*
src1
=
src
[
1
];
const
int
dstElemSize
=
cn
*
sizeof
(
T
);
int
r
=
(
int
)((
size_t
)(
void
*
)
dst
%
(
VECSZ
*
sizeof
(
T
)));
hal
::
StoreMode
mode
=
hal
::
STORE_ALIGNED_NOCACHE
;
if
(
r
!=
0
)
{
mode
=
hal
::
STORE_UNALIGNED
;
if
(
r
%
dstElemSize
==
0
&&
len
>
VECSZ
*
2
)
i0
=
VECSZ
-
(
r
/
dstElemSize
);
}
if
(
cn
==
2
)
{
for
(
i
=
0
;
i
<
len
;
i
+=
VECSZ
)
{
if
(
i
>
len
-
VECSZ
)
{
i
=
len
-
VECSZ
;
mode
=
hal
::
STORE_UNALIGNED
;
}
VecT
a
=
vx_load
(
src0
+
i
),
b
=
vx_load
(
src1
+
i
);
v_store_interleave
(
dst
+
i
*
cn
,
a
,
b
,
mode
);
if
(
i
<
i0
)
{
i
=
i0
-
VECSZ
;
mode
=
hal
::
STORE_ALIGNED_NOCACHE
;
}
}
}
else
if
(
cn
==
3
)
{
const
T
*
src2
=
src
[
2
];
for
(
i
=
0
;
i
<
len
;
i
+=
VECSZ
)
{
if
(
i
>
len
-
VECSZ
)
{
i
=
len
-
VECSZ
;
mode
=
hal
::
STORE_UNALIGNED
;
}
VecT
a
=
vx_load
(
src0
+
i
),
b
=
vx_load
(
src1
+
i
),
c
=
vx_load
(
src2
+
i
);
v_store_interleave
(
dst
+
i
*
cn
,
a
,
b
,
c
,
mode
);
if
(
i
<
i0
)
{
i
=
i0
-
VECSZ
;
mode
=
hal
::
STORE_ALIGNED_NOCACHE
;
}
}
}
else
{
CV_Assert
(
cn
==
4
);
const
T
*
src2
=
src
[
2
];
const
T
*
src3
=
src
[
3
];
for
(
i
=
0
;
i
<
len
;
i
+=
VECSZ
)
{
if
(
i
>
len
-
VECSZ
)
{
i
=
len
-
VECSZ
;
mode
=
hal
::
STORE_UNALIGNED
;
}
VecT
a
=
vx_load
(
src0
+
i
),
b
=
vx_load
(
src1
+
i
);
VecT
c
=
vx_load
(
src2
+
i
),
d
=
vx_load
(
src3
+
i
);
v_store_interleave
(
dst
+
i
*
cn
,
a
,
b
,
c
,
d
,
mode
);
if
(
i
<
i0
)
{
i
=
i0
-
VECSZ
;
mode
=
hal
::
STORE_ALIGNED_NOCACHE
;
}
}
}
vx_cleanup
();
}
#endif
template
<
typename
T
>
static
void
merge_
(
const
T
**
src
,
T
*
dst
,
int
len
,
int
cn
)
{
int
k
=
cn
%
4
?
cn
%
4
:
4
;
int
i
,
j
;
if
(
k
==
1
)
{
const
T
*
src0
=
src
[
0
];
for
(
i
=
j
=
0
;
i
<
len
;
i
++
,
j
+=
cn
)
dst
[
j
]
=
src0
[
i
];
}
else
if
(
k
==
2
)
{
const
T
*
src0
=
src
[
0
],
*
src1
=
src
[
1
];
i
=
j
=
0
;
for
(
;
i
<
len
;
i
++
,
j
+=
cn
)
{
dst
[
j
]
=
src0
[
i
];
dst
[
j
+
1
]
=
src1
[
i
];
}
}
else
if
(
k
==
3
)
{
const
T
*
src0
=
src
[
0
],
*
src1
=
src
[
1
],
*
src2
=
src
[
2
];
i
=
j
=
0
;
for
(
;
i
<
len
;
i
++
,
j
+=
cn
)
{
dst
[
j
]
=
src0
[
i
];
dst
[
j
+
1
]
=
src1
[
i
];
dst
[
j
+
2
]
=
src2
[
i
];
}
}
else
{
const
T
*
src0
=
src
[
0
],
*
src1
=
src
[
1
],
*
src2
=
src
[
2
],
*
src3
=
src
[
3
];
i
=
j
=
0
;
for
(
;
i
<
len
;
i
++
,
j
+=
cn
)
{
dst
[
j
]
=
src0
[
i
];
dst
[
j
+
1
]
=
src1
[
i
];
dst
[
j
+
2
]
=
src2
[
i
];
dst
[
j
+
3
]
=
src3
[
i
];
}
}
for
(
;
k
<
cn
;
k
+=
4
)
{
const
T
*
src0
=
src
[
k
],
*
src1
=
src
[
k
+
1
],
*
src2
=
src
[
k
+
2
],
*
src3
=
src
[
k
+
3
];
for
(
i
=
0
,
j
=
k
;
i
<
len
;
i
++
,
j
+=
cn
)
{
dst
[
j
]
=
src0
[
i
];
dst
[
j
+
1
]
=
src1
[
i
];
dst
[
j
+
2
]
=
src2
[
i
];
dst
[
j
+
3
]
=
src3
[
i
];
}
}
}
void
merge8u
(
const
uchar
**
src
,
uchar
*
dst
,
int
len
,
int
cn
)
{
CALL_HAL
(
merge8u
,
cv_hal_merge8u
,
src
,
dst
,
len
,
cn
)
#if CV_SIMD
if
(
len
>=
v_uint8
::
nlanes
&&
2
<=
cn
&&
cn
<=
4
)
vecmerge_
<
uchar
,
v_uint8
>
(
src
,
dst
,
len
,
cn
);
else
#endif
merge_
(
src
,
dst
,
len
,
cn
);
}
void
merge16u
(
const
ushort
**
src
,
ushort
*
dst
,
int
len
,
int
cn
)
{
CALL_HAL
(
merge16u
,
cv_hal_merge16u
,
src
,
dst
,
len
,
cn
)
#if CV_SIMD
if
(
len
>=
v_uint16
::
nlanes
&&
2
<=
cn
&&
cn
<=
4
)
vecmerge_
<
ushort
,
v_uint16
>
(
src
,
dst
,
len
,
cn
);
else
#endif
merge_
(
src
,
dst
,
len
,
cn
);
}
void
merge32s
(
const
int
**
src
,
int
*
dst
,
int
len
,
int
cn
)
{
CALL_HAL
(
merge32s
,
cv_hal_merge32s
,
src
,
dst
,
len
,
cn
)
#if CV_SIMD
if
(
len
>=
v_int32
::
nlanes
&&
2
<=
cn
&&
cn
<=
4
)
vecmerge_
<
int
,
v_int32
>
(
src
,
dst
,
len
,
cn
);
else
#endif
merge_
(
src
,
dst
,
len
,
cn
);
}
void
merge64s
(
const
int64
**
src
,
int64
*
dst
,
int
len
,
int
cn
)
{
CALL_HAL
(
merge64s
,
cv_hal_merge64s
,
src
,
dst
,
len
,
cn
)
#if CV_SIMD
if
(
len
>=
v_int64
::
nlanes
&&
2
<=
cn
&&
cn
<=
4
)
vecmerge_
<
int64
,
v_int64
>
(
src
,
dst
,
len
,
cn
);
else
#endif
merge_
(
src
,
dst
,
len
,
cn
);
}
}}
// cv::hal::
typedef
void
(
*
MergeFunc
)(
const
uchar
**
src
,
uchar
*
dst
,
int
len
,
int
cn
);
static
MergeFunc
getMergeFunc
(
int
depth
)
{
static
MergeFunc
mergeTab
[]
=
{
(
MergeFunc
)
GET_OPTIMIZED
(
cv
::
hal
::
merge8u
),
(
MergeFunc
)
GET_OPTIMIZED
(
cv
::
hal
::
merge8u
),
(
MergeFunc
)
GET_OPTIMIZED
(
cv
::
hal
::
merge16u
),
(
MergeFunc
)
GET_OPTIMIZED
(
cv
::
hal
::
merge16u
),
(
MergeFunc
)
GET_OPTIMIZED
(
cv
::
hal
::
merge32s
),
(
MergeFunc
)
GET_OPTIMIZED
(
cv
::
hal
::
merge32s
),
(
MergeFunc
)
GET_OPTIMIZED
(
cv
::
hal
::
merge64s
),
0
};
return
mergeTab
[
depth
];
}
#ifdef HAVE_IPP
namespace
cv
{
static
bool
ipp_merge
(
const
Mat
*
mv
,
Mat
&
dst
,
int
channels
)
{
#ifdef HAVE_IPP_IW_LL
CV_INSTRUMENT_REGION_IPP
();
if
(
channels
!=
3
&&
channels
!=
4
)
return
false
;
if
(
mv
[
0
].
dims
<=
2
)
{
IppiSize
size
=
ippiSize
(
mv
[
0
].
size
());
const
void
*
srcPtrs
[
4
]
=
{
NULL
};
size_t
srcStep
=
mv
[
0
].
step
;
for
(
int
i
=
0
;
i
<
channels
;
i
++
)
{
srcPtrs
[
i
]
=
mv
[
i
].
ptr
();
if
(
srcStep
!=
mv
[
i
].
step
)
return
false
;
}
return
CV_INSTRUMENT_FUN_IPP
(
llwiCopyMerge
,
srcPtrs
,
(
int
)
srcStep
,
dst
.
ptr
(),
(
int
)
dst
.
step
,
size
,
(
int
)
mv
[
0
].
elemSize1
(),
channels
,
0
)
>=
0
;
}
else
{
const
Mat
*
arrays
[
5
]
=
{
NULL
};
uchar
*
ptrs
[
5
]
=
{
NULL
};
arrays
[
0
]
=
&
dst
;
for
(
int
i
=
1
;
i
<
channels
;
i
++
)
{
arrays
[
i
]
=
&
mv
[
i
-
1
];
}
NAryMatIterator
it
(
arrays
,
ptrs
);
IppiSize
size
=
{
(
int
)
it
.
size
,
1
};
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
{
if
(
CV_INSTRUMENT_FUN_IPP
(
llwiCopyMerge
,
(
const
void
**
)
&
ptrs
[
1
],
0
,
ptrs
[
0
],
0
,
size
,
(
int
)
mv
[
0
].
elemSize1
(),
channels
,
0
)
<
0
)
return
false
;
}
return
true
;
}
#else
CV_UNUSED
(
dst
);
CV_UNUSED
(
mv
);
CV_UNUSED
(
channels
);
return
false
;
#endif
}
}
#endif
void
cv
::
merge
(
const
Mat
*
mv
,
size_t
n
,
OutputArray
_dst
)
{
CV_INSTRUMENT_REGION
();
CV_Assert
(
mv
&&
n
>
0
);
int
depth
=
mv
[
0
].
depth
();
bool
allch1
=
true
;
int
k
,
cn
=
0
;
size_t
i
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
CV_Assert
(
mv
[
i
].
size
==
mv
[
0
].
size
&&
mv
[
i
].
depth
()
==
depth
);
allch1
=
allch1
&&
mv
[
i
].
channels
()
==
1
;
cn
+=
mv
[
i
].
channels
();
}
CV_Assert
(
0
<
cn
&&
cn
<=
CV_CN_MAX
);
_dst
.
create
(
mv
[
0
].
dims
,
mv
[
0
].
size
,
CV_MAKETYPE
(
depth
,
cn
));
Mat
dst
=
_dst
.
getMat
();
if
(
n
==
1
)
{
mv
[
0
].
copyTo
(
dst
);
return
;
}
CV_IPP_RUN
(
allch1
,
ipp_merge
(
mv
,
dst
,
(
int
)
n
));
if
(
!
allch1
)
{
AutoBuffer
<
int
>
pairs
(
cn
*
2
);
int
j
,
ni
=
0
;
for
(
i
=
0
,
j
=
0
;
i
<
n
;
i
++
,
j
+=
ni
)
{
ni
=
mv
[
i
].
channels
();
for
(
k
=
0
;
k
<
ni
;
k
++
)
{
pairs
[(
j
+
k
)
*
2
]
=
j
+
k
;
pairs
[(
j
+
k
)
*
2
+
1
]
=
j
+
k
;
}
}
mixChannels
(
mv
,
n
,
&
dst
,
1
,
&
pairs
[
0
],
cn
);
return
;
}
MergeFunc
func
=
getMergeFunc
(
depth
);
CV_Assert
(
func
!=
0
);
size_t
esz
=
dst
.
elemSize
(),
esz1
=
dst
.
elemSize1
();
size_t
blocksize0
=
(
int
)((
BLOCK_SIZE
+
esz
-
1
)
/
esz
);
AutoBuffer
<
uchar
>
_buf
((
cn
+
1
)
*
(
sizeof
(
Mat
*
)
+
sizeof
(
uchar
*
))
+
16
);
const
Mat
**
arrays
=
(
const
Mat
**
)
_buf
.
data
();
uchar
**
ptrs
=
(
uchar
**
)
alignPtr
(
arrays
+
cn
+
1
,
16
);
arrays
[
0
]
=
&
dst
;
for
(
k
=
0
;
k
<
cn
;
k
++
)
arrays
[
k
+
1
]
=
&
mv
[
k
];
NAryMatIterator
it
(
arrays
,
ptrs
,
cn
+
1
);
size_t
total
=
(
int
)
it
.
size
;
size_t
blocksize
=
std
::
min
((
size_t
)
CV_SPLIT_MERGE_MAX_BLOCK_SIZE
(
cn
),
cn
<=
4
?
total
:
std
::
min
(
total
,
blocksize0
));
for
(
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
{
for
(
size_t
j
=
0
;
j
<
total
;
j
+=
blocksize
)
{
size_t
bsz
=
std
::
min
(
total
-
j
,
blocksize
);
func
(
(
const
uchar
**
)
&
ptrs
[
1
],
ptrs
[
0
],
(
int
)
bsz
,
cn
);
if
(
j
+
blocksize
<
total
)
{
ptrs
[
0
]
+=
bsz
*
esz
;
for
(
int
t
=
0
;
t
<
cn
;
t
++
)
ptrs
[
t
+
1
]
+=
bsz
*
esz1
;
}
}
}
}
#ifdef HAVE_OPENCL
namespace
cv
{
static
bool
ocl_merge
(
InputArrayOfArrays
_mv
,
OutputArray
_dst
)
{
std
::
vector
<
UMat
>
src
,
ksrc
;
_mv
.
getUMatVector
(
src
);
CV_Assert
(
!
src
.
empty
());
int
type
=
src
[
0
].
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
rowsPerWI
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
4
:
1
;
Size
size
=
src
[
0
].
size
();
for
(
size_t
i
=
0
,
srcsize
=
src
.
size
();
i
<
srcsize
;
++
i
)
{
int
itype
=
src
[
i
].
type
(),
icn
=
CV_MAT_CN
(
itype
),
idepth
=
CV_MAT_DEPTH
(
itype
),
esz1
=
CV_ELEM_SIZE1
(
idepth
);
if
(
src
[
i
].
dims
>
2
)
return
false
;
CV_Assert
(
size
==
src
[
i
].
size
()
&&
depth
==
idepth
);
for
(
int
cn
=
0
;
cn
<
icn
;
++
cn
)
{
UMat
tsrc
=
src
[
i
];
tsrc
.
offset
+=
cn
*
esz1
;
ksrc
.
push_back
(
tsrc
);
}
}
int
dcn
=
(
int
)
ksrc
.
size
();
String
srcargs
,
processelem
,
cndecl
,
indexdecl
;
for
(
int
i
=
0
;
i
<
dcn
;
++
i
)
{
srcargs
+=
format
(
"DECLARE_SRC_PARAM(%d)"
,
i
);
processelem
+=
format
(
"PROCESS_ELEM(%d)"
,
i
);
indexdecl
+=
format
(
"DECLARE_INDEX(%d)"
,
i
);
cndecl
+=
format
(
" -D scn%d=%d"
,
i
,
ksrc
[
i
].
channels
());
}
ocl
::
Kernel
k
(
"merge"
,
ocl
::
core
::
split_merge_oclsrc
,
format
(
"-D OP_MERGE -D cn=%d -D T=%s -D DECLARE_SRC_PARAMS_N=%s"
" -D DECLARE_INDEX_N=%s -D PROCESS_ELEMS_N=%s%s"
,
dcn
,
ocl
::
memopTypeToStr
(
depth
),
srcargs
.
c_str
(),
indexdecl
.
c_str
(),
processelem
.
c_str
(),
cndecl
.
c_str
()));
if
(
k
.
empty
())
return
false
;
_dst
.
create
(
size
,
CV_MAKE_TYPE
(
depth
,
dcn
));
UMat
dst
=
_dst
.
getUMat
();
int
argidx
=
0
;
for
(
int
i
=
0
;
i
<
dcn
;
++
i
)
argidx
=
k
.
set
(
argidx
,
ocl
::
KernelArg
::
ReadOnlyNoSize
(
ksrc
[
i
]));
argidx
=
k
.
set
(
argidx
,
ocl
::
KernelArg
::
WriteOnly
(
dst
));
k
.
set
(
argidx
,
rowsPerWI
);
size_t
globalsize
[
2
]
=
{
(
size_t
)
dst
.
cols
,
((
size_t
)
dst
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
}
#endif
void
cv
::
merge
(
InputArrayOfArrays
_mv
,
OutputArray
_dst
)
{
CV_INSTRUMENT_REGION
();
CV_OCL_RUN
(
_mv
.
isUMatVector
()
&&
_dst
.
isUMat
(),
ocl_merge
(
_mv
,
_dst
))
std
::
vector
<
Mat
>
mv
;
_mv
.
getMatVector
(
mv
);
merge
(
!
mv
.
empty
()
?
&
mv
[
0
]
:
0
,
mv
.
size
(),
_dst
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment