Commit fcec053d authored by Alexander Alekhin's avatar Alexander Alekhin

Merge remote-tracking branch 'upstream/3.4' into merge-3.4

parents d073215f a63f66c9
...@@ -2993,7 +2993,11 @@ int Kernel::set(int i, const KernelArg& arg) ...@@ -2993,7 +2993,11 @@ int Kernel::set(int i, const KernelArg& arg)
if( !p || !p->handle ) if( !p || !p->handle )
return -1; return -1;
if (i < 0) if (i < 0)
{
CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d): negative arg_index",
p->name.c_str(), (int)i));
return i; return i;
}
if( i == 0 ) if( i == 0 )
p->cleanupUMats(); p->cleanupUMats();
cl_int status = 0; cl_int status = 0;
...@@ -3002,10 +3006,19 @@ int Kernel::set(int i, const KernelArg& arg) ...@@ -3002,10 +3006,19 @@ int Kernel::set(int i, const KernelArg& arg)
AccessFlag accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : static_cast<AccessFlag>(0)) | AccessFlag accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : static_cast<AccessFlag>(0)) |
((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : static_cast<AccessFlag>(0)); ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : static_cast<AccessFlag>(0));
bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0; bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
if (ptronly && arg.m->empty())
{
cl_mem h_null = (cl_mem)NULL;
status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h_null), &h_null);
CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=NULL)", p->name.c_str(), (int)i).c_str());
return i + 1;
}
cl_mem h = (cl_mem)arg.m->handle(accessFlags); cl_mem h = (cl_mem)arg.m->handle(accessFlags);
if (!h) if (!h)
{ {
CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d, flags=%d): can't create cl_mem handle for passed UMat buffer (addr=%p)",
p->name.c_str(), (int)i, (int)arg.flags, arg.m));
p->release(); p->release();
p = 0; p = 0;
return -1; return -1;
......
...@@ -138,9 +138,12 @@ public: ...@@ -138,9 +138,12 @@ public:
UMat& bnorm_weight = umat_scale; UMat& bnorm_weight = umat_scale;
UMat& bnorm_bias = umat_shift; UMat& bnorm_bias = umat_shift;
const unsigned LOCAL_SIZE = 128;
bool use_half = (inputs[0].depth() == CV_16S); bool use_half = (inputs[0].depth() == CV_16S);
String opts = format(" -DT=%s -DT4=%s -Dconvert_T=%s", use_half ? "half" : "float", String opts = format(" -DT=%s -DT4=%s -Dconvert_T=%s -DLOCAL_SIZE=%u", use_half ? "half" : "float",
use_half ? "half4" : "float4", use_half ? "convert_half4" : "convert_float4"); use_half ? "half4" : "float4", use_half ? "convert_half4" : "convert_float4",
LOCAL_SIZE
);
int splitDim = (acrossChannels) ? 1 : 2; int splitDim = (acrossChannels) ? 1 : 2;
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++) for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
...@@ -155,8 +158,8 @@ public: ...@@ -155,8 +158,8 @@ public:
float alpha = 1.0f / s[1]; float alpha = 1.0f / s[1];
String buildopt = "-DNUM=4" + opts; String buildopt = "-DNUM=4" + opts;
ocl::Kernel k("mean_fuse4", ocl::dnn::mvn_oclsrc, buildopt); ocl::Kernel k("mean_fuse4", ocl::dnn::mvn_oclsrc, buildopt + " -DKERNEL_MEAN_FUSE");
size_t localsize[] = { 128 }; size_t localsize[] = { LOCAL_SIZE };
size_t globalsize[] = { (size_t)s[0] / 4 * localsize[0] }; size_t globalsize[] = { (size_t)s[0] / 4 * localsize[0] };
int argId = 0; int argId = 0;
...@@ -165,7 +168,6 @@ public: ...@@ -165,7 +168,6 @@ public:
k.set(argId++, alpha); k.set(argId++, alpha);
k.set(argId++, ocl::KernelArg::PtrWriteOnly(meanMat)); k.set(argId++, ocl::KernelArg::PtrWriteOnly(meanMat));
k.set(argId++, ocl::KernelArg::PtrWriteOnly(tmpMat)); k.set(argId++, ocl::KernelArg::PtrWriteOnly(tmpMat));
k.set(argId++, NULL, localsize[0] * sizeof(cl_float4));
bool ret = k.run(1, globalsize, localsize, false); bool ret = k.run(1, globalsize, localsize, false);
if (!ret) if (!ret)
return false; return false;
...@@ -173,7 +175,7 @@ public: ...@@ -173,7 +175,7 @@ public:
buildopt += format(" %s %s", (fuse_batch_norm) ? "-DFUSE_BATCH_NORM" : "", buildopt += format(" %s %s", (fuse_batch_norm) ? "-DFUSE_BATCH_NORM" : "",
(fuse_relu) ? "-DFUSE_RELU" : ""); (fuse_relu) ? "-DFUSE_RELU" : "");
ocl::Kernel k1("mvn_fuse4", ocl::dnn::mvn_oclsrc, buildopt); ocl::Kernel k1("mvn_fuse4", ocl::dnn::mvn_oclsrc, buildopt + " -DKERNEL_MVN_FUSE");
argId = 0; argId = 0;
k1.set(argId++, ocl::KernelArg::PtrReadOnly(tmpMat)); k1.set(argId++, ocl::KernelArg::PtrReadOnly(tmpMat));
k1.set(argId++, ocl::KernelArg::PtrReadOnly(inpMat)); k1.set(argId++, ocl::KernelArg::PtrReadOnly(inpMat));
...@@ -185,7 +187,6 @@ public: ...@@ -185,7 +187,6 @@ public:
k1.set(argId++, ocl::KernelArg::PtrReadOnly(bnorm_weight)); k1.set(argId++, ocl::KernelArg::PtrReadOnly(bnorm_weight));
k1.set(argId++, ocl::KernelArg::PtrReadOnly(bnorm_bias)); k1.set(argId++, ocl::KernelArg::PtrReadOnly(bnorm_bias));
k1.set(argId++, ocl::KernelArg::PtrWriteOnly(outMat)); k1.set(argId++, ocl::KernelArg::PtrWriteOnly(outMat));
k1.set(argId++, NULL, localsize[0] * sizeof(cl_float4));
ret = k1.run(1, globalsize, localsize, false); ret = k1.run(1, globalsize, localsize, false);
if (!ret) if (!ret)
return false; return false;
...@@ -243,7 +244,7 @@ public: ...@@ -243,7 +244,7 @@ public:
if (normVariance) if (normVariance)
{ {
String kname = format("calc_mean%d", number); String kname = format("calc_mean%d", number);
ocl::Kernel kernel(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt); ocl::Kernel kernel(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt + " -DKERNEL_MEAN");
if (kernel.empty()) if (kernel.empty())
return false; return false;
...@@ -263,7 +264,7 @@ public: ...@@ -263,7 +264,7 @@ public:
} }
String kname = format("mvn%d", number); String kname = format("mvn%d", number);
buildopt += format("%s%s%s", (normVariance) ? " -DNORM_VARIANCE" : "", buildopt += format("%s%s%s -DKERNEL_MVN", (normVariance) ? " -DNORM_VARIANCE" : "",
(fuse_batch_norm) ? " -DFUSE_BATCH_NORM" : "", (fuse_batch_norm) ? " -DFUSE_BATCH_NORM" : "",
(fuse_relu) ? " -DFUSE_RELU" : ""); (fuse_relu) ? " -DFUSE_RELU" : "");
ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt); ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
......
...@@ -74,6 +74,8 @@ ...@@ -74,6 +74,8 @@
#define MVN_FUSE mvn_fuse1 #define MVN_FUSE mvn_fuse1
#endif #endif
#ifdef KERNEL_MEAN
__kernel void CALC_MEAN(__global const Dtype* src, __kernel void CALC_MEAN(__global const Dtype* src,
const int rows, const int rows,
const int cols, const int cols,
...@@ -94,6 +96,8 @@ __kernel void CALC_MEAN(__global const Dtype* src, ...@@ -94,6 +96,8 @@ __kernel void CALC_MEAN(__global const Dtype* src,
store(dst_vec, dst, index); store(dst_vec, dst, index);
} }
#elif defined KERNEL_MVN
__kernel void MVN(__global const Dtype* src, __kernel void MVN(__global const Dtype* src,
const int rows, const int rows,
const int cols, const int cols,
...@@ -140,12 +144,13 @@ __kernel void MVN(__global const Dtype* src, ...@@ -140,12 +144,13 @@ __kernel void MVN(__global const Dtype* src,
store(dst_vec, dst, index); store(dst_vec, dst, index);
} }
#elif defined KERNEL_MEAN_FUSE
__kernel void MEAN_FUSE(__global const T * A, __kernel void MEAN_FUSE(__global const T * A,
unsigned int A_col_size, unsigned int A_col_size,
float alpha, float alpha,
__global T4 * mean, __global T4 * mean,
__global Dtype * tmp, __global Dtype * tmp)
__local Dtype4 * work)
{ {
unsigned int row_gid = get_group_id(0); unsigned int row_gid = get_group_id(0);
unsigned int lid = get_local_id(0); unsigned int lid = get_local_id(0);
...@@ -168,15 +173,16 @@ __kernel void MEAN_FUSE(__global const T * A, ...@@ -168,15 +173,16 @@ __kernel void MEAN_FUSE(__global const T * A,
dot2 += convert_float4(a2); dot2 += convert_float4(a2);
dot3 += convert_float4(a3); dot3 += convert_float4(a3);
i += get_local_size(0); i += LOCAL_SIZE;
} }
__local Dtype4 work[LOCAL_SIZE];
work[lid].s0 = dot(dot0, b0); work[lid].s0 = dot(dot0, b0);
work[lid].s1 = dot(dot1, b0); work[lid].s1 = dot(dot1, b0);
work[lid].s2 = dot(dot2, b0); work[lid].s2 = dot(dot2, b0);
work[lid].s3 = dot(dot3, b0); work[lid].s3 = dot(dot3, b0);
for(unsigned int stride=get_local_size(0)/2 ; stride>0 ; stride>>=1) for(unsigned int stride=LOCAL_SIZE/2 ; stride>0 ; stride>>=1)
{ {
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(lid < stride) if(lid < stride)
...@@ -212,10 +218,12 @@ __kernel void MEAN_FUSE(__global const T * A, ...@@ -212,10 +218,12 @@ __kernel void MEAN_FUSE(__global const T * A,
vstore4(dot2, i, dst0_read + 2 * A_col_size); vstore4(dot2, i, dst0_read + 2 * A_col_size);
vstore4(dot3, i, dst0_read + 3 * A_col_size); vstore4(dot3, i, dst0_read + 3 * A_col_size);
i += get_local_size(0); i += LOCAL_SIZE;
} }
} }
#elif defined KERNEL_MVN_FUSE
__kernel void MVN_FUSE(__global const Dtype * tmp, __kernel void MVN_FUSE(__global const Dtype * tmp,
__global const T * A, __global const T * A,
__global const T4 * mean, __global const T4 * mean,
...@@ -225,8 +233,7 @@ __kernel void MVN_FUSE(__global const Dtype * tmp, ...@@ -225,8 +233,7 @@ __kernel void MVN_FUSE(__global const Dtype * tmp,
const float relu_slope, const float relu_slope,
__global const Dtype4 * bnorm_weight, __global const Dtype4 * bnorm_weight,
__global const Dtype4 * bnorm_bias, __global const Dtype4 * bnorm_bias,
__global T * B, __global T * B)
__local Dtype4 * work)
{ {
unsigned int row_gid = get_group_id(0); unsigned int row_gid = get_group_id(0);
unsigned int lid = get_local_id(0); unsigned int lid = get_local_id(0);
...@@ -250,15 +257,16 @@ __kernel void MVN_FUSE(__global const Dtype * tmp, ...@@ -250,15 +257,16 @@ __kernel void MVN_FUSE(__global const Dtype * tmp,
dot2 += a2; dot2 += a2;
dot3 += a3; dot3 += a3;
i += get_local_size(0); i += LOCAL_SIZE;
} }
__local Dtype4 work[LOCAL_SIZE];
work[lid].s0 = dot(dot0, b0); work[lid].s0 = dot(dot0, b0);
work[lid].s1 = dot(dot1, b0); work[lid].s1 = dot(dot1, b0);
work[lid].s2 = dot(dot2, b0); work[lid].s2 = dot(dot2, b0);
work[lid].s3 = dot(dot3, b0); work[lid].s3 = dot(dot3, b0);
for(unsigned int stride=get_local_size(0)/2 ; stride>0 ; stride>>=1) for(unsigned int stride=LOCAL_SIZE/2 ; stride>0 ; stride>>=1)
{ {
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(lid < stride) if(lid < stride)
...@@ -314,6 +322,10 @@ __kernel void MVN_FUSE(__global const Dtype * tmp, ...@@ -314,6 +322,10 @@ __kernel void MVN_FUSE(__global const Dtype * tmp,
vstore4(convert_T(dot2), i, dst0_read + 2 * A_col_size); vstore4(convert_T(dot2), i, dst0_read + 2 * A_col_size);
vstore4(convert_T(dot3), i, dst0_read + 3 * A_col_size); vstore4(convert_T(dot3), i, dst0_read + 3 * A_col_size);
i += get_local_size(0); i += LOCAL_SIZE;
} }
} }
#else
#error "Configuration error!"
#endif
...@@ -963,22 +963,22 @@ struct YCrCb2RGB_i<ushort> ...@@ -963,22 +963,22 @@ struct YCrCb2RGB_i<ushort>
///////////////////////////////////// YUV420 -> RGB ///////////////////////////////////// ///////////////////////////////////// YUV420 -> RGB /////////////////////////////////////
const int ITUR_BT_601_CY = 1220542; static const int ITUR_BT_601_CY = 1220542;
const int ITUR_BT_601_CUB = 2116026; static const int ITUR_BT_601_CUB = 2116026;
const int ITUR_BT_601_CUG = -409993; static const int ITUR_BT_601_CUG = -409993;
const int ITUR_BT_601_CVG = -852492; static const int ITUR_BT_601_CVG = -852492;
const int ITUR_BT_601_CVR = 1673527; static const int ITUR_BT_601_CVR = 1673527;
const int ITUR_BT_601_SHIFT = 20; static const int ITUR_BT_601_SHIFT = 20;
// Coefficients for RGB to YUV420p conversion // Coefficients for RGB to YUV420p conversion
const int ITUR_BT_601_CRY = 269484; static const int ITUR_BT_601_CRY = 269484;
const int ITUR_BT_601_CGY = 528482; static const int ITUR_BT_601_CGY = 528482;
const int ITUR_BT_601_CBY = 102760; static const int ITUR_BT_601_CBY = 102760;
const int ITUR_BT_601_CRU = -155188; static const int ITUR_BT_601_CRU = -155188;
const int ITUR_BT_601_CGU = -305135; static const int ITUR_BT_601_CGU = -305135;
const int ITUR_BT_601_CBU = 460324; static const int ITUR_BT_601_CBU = 460324;
const int ITUR_BT_601_CGV = -385875; static const int ITUR_BT_601_CGV = -385875;
const int ITUR_BT_601_CBV = -74448; static const int ITUR_BT_601_CBV = -74448;
//R = 1.164(Y - 16) + 1.596(V - 128) //R = 1.164(Y - 16) + 1.596(V - 128)
//G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
...@@ -988,49 +988,146 @@ const int ITUR_BT_601_CBV = -74448; ...@@ -988,49 +988,146 @@ const int ITUR_BT_601_CBV = -74448;
//G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20 //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20
//B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20 //B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20
static inline void uvToRGBuv(const uchar u, const uchar v, int& ruv, int& guv, int& buv)
{
int uu, vv;
uu = int(u) - 128;
vv = int(v) - 128;
ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * vv;
guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * vv + ITUR_BT_601_CUG * uu;
buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * uu;
}
static inline void uvToRGBuv(const v_uint8& u, const v_uint8& v,
v_int32 (&ruv)[4],
v_int32 (&guv)[4],
v_int32 (&buv)[4])
{
v_uint8 v128 = vx_setall_u8(128);
v_int8 su = v_reinterpret_as_s8(v_sub_wrap(u, v128));
v_int8 sv = v_reinterpret_as_s8(v_sub_wrap(v, v128));
v_int16 uu0, uu1, vv0, vv1;
v_expand(su, uu0, uu1);
v_expand(sv, vv0, vv1);
v_int32 uu[4], vv[4];
v_expand(uu0, uu[0], uu[1]); v_expand(uu1, uu[2], uu[3]);
v_expand(vv0, vv[0], vv[1]); v_expand(vv1, vv[2], vv[3]);
v_int32 vshift = vx_setall_s32(1 << (ITUR_BT_601_SHIFT - 1));
v_int32 vr = vx_setall_s32(ITUR_BT_601_CVR);
v_int32 vg = vx_setall_s32(ITUR_BT_601_CVG);
v_int32 ug = vx_setall_s32(ITUR_BT_601_CUG);
v_int32 ub = vx_setall_s32(ITUR_BT_601_CUB);
for (int k = 0; k < 4; k++)
{
ruv[k] = vshift + vr * vv[k];
guv[k] = vshift + vg * vv[k] + ug * uu[k];
buv[k] = vshift + ub * uu[k];
}
}
static inline void yRGBuvToRGBA(const uchar vy, const int ruv, const int guv, const int buv,
uchar& r, uchar& g, uchar& b, uchar& a)
{
int yy = int(vy);
int y = std::max(0, yy - 16) * ITUR_BT_601_CY;
r = saturate_cast<uchar>((y + ruv) >> ITUR_BT_601_SHIFT);
g = saturate_cast<uchar>((y + guv) >> ITUR_BT_601_SHIFT);
b = saturate_cast<uchar>((y + buv) >> ITUR_BT_601_SHIFT);
a = uchar(0xff);
}
static inline void yRGBuvToRGBA(const v_uint8& vy,
const v_int32 (&ruv)[4],
const v_int32 (&guv)[4],
const v_int32 (&buv)[4],
v_uint8& rr, v_uint8& gg, v_uint8& bb)
{
v_uint8 v16 = vx_setall_u8(16);
v_uint8 posY = vy - v16;
v_uint16 yy0, yy1;
v_expand(posY, yy0, yy1);
v_int32 yy[4];
v_int32 yy00, yy01, yy10, yy11;
v_expand(v_reinterpret_as_s16(yy0), yy[0], yy[1]);
v_expand(v_reinterpret_as_s16(yy1), yy[2], yy[3]);
v_int32 vcy = vx_setall_s32(ITUR_BT_601_CY);
v_int32 y[4], r[4], g[4], b[4];
for(int k = 0; k < 4; k++)
{
y[k] = yy[k]*vcy;
r[k] = (y[k] + ruv[k]) >> ITUR_BT_601_SHIFT;
g[k] = (y[k] + guv[k]) >> ITUR_BT_601_SHIFT;
b[k] = (y[k] + buv[k]) >> ITUR_BT_601_SHIFT;
}
v_int16 r0, r1, g0, g1, b0, b1;
r0 = v_pack(r[0], r[1]);
r1 = v_pack(r[2], r[3]);
g0 = v_pack(g[0], g[1]);
g1 = v_pack(g[2], g[3]);
b0 = v_pack(b[0], b[1]);
b1 = v_pack(b[2], b[3]);
rr = v_pack_u(r0, r1);
gg = v_pack_u(g0, g1);
bb = v_pack_u(b0, b1);
}
template<int bIdx, int dcn, bool is420> template<int bIdx, int dcn, bool is420>
static inline void cvtYuv42xxp2RGB8(int u, int v, int vy01, int vy11, int vy02, int vy12, static inline void cvtYuv42xxp2RGB8(const uchar u, const uchar v,
const uchar vy01, const uchar vy11, const uchar vy02, const uchar vy12,
uchar* row1, uchar* row2) uchar* row1, uchar* row2)
{ {
u = u - 128; int ruv, guv, buv;
v = v - 128; uvToRGBuv(u, v, ruv, guv, buv);
int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; uchar r00, g00, b00, a00;
int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; uchar r01, g01, b01, a01;
int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
int y00 = std::max(0, vy01 - 16) * ITUR_BT_601_CY; yRGBuvToRGBA(vy01, ruv, guv, buv, r00, g00, b00, a00);
row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); yRGBuvToRGBA(vy11, ruv, guv, buv, r01, g01, b01, a01);
row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); row1[2-bIdx] = r00;
row1[1] = g00;
row1[bIdx] = b00;
if(dcn == 4) if(dcn == 4)
row1[3] = uchar(0xff); row1[3] = a00;
int y01 = std::max(0, vy11 - 16) * ITUR_BT_601_CY; row1[dcn+2-bIdx] = r01;
row1[dcn+2-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); row1[dcn+1] = g01;
row1[dcn+1] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); row1[dcn+0+bIdx] = b01;
row1[dcn+0+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
if(dcn == 4) if(dcn == 4)
row1[7] = uchar(0xff); row1[7] = a01;
if(is420) if(is420)
{ {
int y10 = std::max(0, vy02 - 16) * ITUR_BT_601_CY; uchar r10, g10, b10, a10;
row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT); uchar r11, g11, b11, a11;
row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT); yRGBuvToRGBA(vy02, ruv, guv, buv, r10, g10, b10, a10);
yRGBuvToRGBA(vy12, ruv, guv, buv, r11, g11, b11, a11);
row2[2-bIdx] = r10;
row2[1] = g10;
row2[bIdx] = b10;
if(dcn == 4) if(dcn == 4)
row2[3] = uchar(0xff); row2[3] = a10;
int y11 = std::max(0, vy12 - 16) * ITUR_BT_601_CY; row2[dcn+2-bIdx] = r11;
row2[dcn+2-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT); row2[dcn+1] = g11;
row2[dcn+1] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT); row2[dcn+0+bIdx] = b11;
row2[dcn+0+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
if(dcn == 4) if(dcn == 4)
row2[7] = uchar(0xff); row2[7] = a11;
} }
} }
// bIdx is 0 or 2, uIdx is 0 or 1, dcn is 3 or 4
template<int bIdx, int uIdx, int dcn> template<int bIdx, int uIdx, int dcn>
struct YUV420sp2RGB8Invoker : ParallelLoopBody struct YUV420sp2RGB8Invoker : ParallelLoopBody
{ {
...@@ -1056,15 +1153,80 @@ struct YUV420sp2RGB8Invoker : ParallelLoopBody ...@@ -1056,15 +1153,80 @@ struct YUV420sp2RGB8Invoker : ParallelLoopBody
uchar* row2 = dst_data + dst_step * (j + 1); uchar* row2 = dst_data + dst_step * (j + 1);
const uchar* y2 = y1 + stride; const uchar* y2 = y1 + stride;
for (int i = 0; i < width; i += 2, row1 += dcn*2, row2 += dcn*2) int i = 0;
#if CV_SIMD
const int vsize = v_uint8::nlanes;
v_uint8 a = vx_setall_u8(uchar(0xff));
for( ; i <= width - 2*vsize;
i += 2*vsize, row1 += vsize*dcn*2, row2 += vsize*dcn*2)
{
v_uint8 u, v;
v_load_deinterleave(uv + i, u, v);
if(uIdx)
{
swap(u, v);
}
v_uint8 vy[4];
v_load_deinterleave(y1 + i, vy[0], vy[1]);
v_load_deinterleave(y2 + i, vy[2], vy[3]);
v_int32 ruv[4], guv[4], buv[4];
uvToRGBuv(u, v, ruv, guv, buv);
v_uint8 r[4], g[4], b[4];
for(int k = 0; k < 4; k++)
{
yRGBuvToRGBA(vy[k], ruv, guv, buv, r[k], g[k], b[k]);
}
if(bIdx)
{
for(int k = 0; k < 4; k++)
swap(r[k], b[k]);
}
// [r0...], [r1...] => [r0, r1, r0, r1...], [r0, r1, r0, r1...]
v_uint8 r0_0, r0_1, r1_0, r1_1;
v_zip(r[0], r[1], r0_0, r0_1);
v_zip(r[2], r[3], r1_0, r1_1);
v_uint8 g0_0, g0_1, g1_0, g1_1;
v_zip(g[0], g[1], g0_0, g0_1);
v_zip(g[2], g[3], g1_0, g1_1);
v_uint8 b0_0, b0_1, b1_0, b1_1;
v_zip(b[0], b[1], b0_0, b0_1);
v_zip(b[2], b[3], b1_0, b1_1);
if(dcn == 4)
{
v_store_interleave(row1 + 0*vsize, b0_0, g0_0, r0_0, a);
v_store_interleave(row1 + 4*vsize, b0_1, g0_1, r0_1, a);
v_store_interleave(row2 + 0*vsize, b1_0, g1_0, r1_0, a);
v_store_interleave(row2 + 4*vsize, b1_1, g1_1, r1_1, a);
}
else //dcn == 3
{
v_store_interleave(row1 + 0*vsize, b0_0, g0_0, r0_0);
v_store_interleave(row1 + 3*vsize, b0_1, g0_1, r0_1);
v_store_interleave(row2 + 0*vsize, b1_0, g1_0, r1_0);
v_store_interleave(row2 + 3*vsize, b1_1, g1_1, r1_1);
}
}
vx_cleanup();
#endif
for ( ; i < width; i += 2, row1 += dcn*2, row2 += dcn*2)
{ {
int u = int(uv[i + 0 + uIdx]); uchar u = uv[i + 0 + uIdx];
int v = int(uv[i + 1 - uIdx]); uchar v = uv[i + 1 - uIdx];
int vy01 = int(y1[i]); uchar vy01 = y1[i];
int vy11 = int(y1[i + 1]); uchar vy11 = y1[i + 1];
int vy02 = int(y2[i]); uchar vy02 = y2[i];
int vy12 = int(y2[i + 1]); uchar vy12 = y2[i + 1];
cvtYuv42xxp2RGB8<bIdx, dcn, true>(u, v, vy01, vy11, vy02, vy12, row1, row2); cvtYuv42xxp2RGB8<bIdx, dcn, true>(u, v, vy01, vy11, vy02, vy12, row1, row2);
} }
...@@ -1108,16 +1270,77 @@ struct YUV420p2RGB8Invoker : ParallelLoopBody ...@@ -1108,16 +1270,77 @@ struct YUV420p2RGB8Invoker : ParallelLoopBody
uchar* row1 = dst_data + dst_step * j; uchar* row1 = dst_data + dst_step * j;
uchar* row2 = dst_data + dst_step * (j + 1); uchar* row2 = dst_data + dst_step * (j + 1);
const uchar* y2 = y1 + stride; const uchar* y2 = y1 + stride;
int i = 0;
for (int i = 0; i < width / 2; i += 1, row1 += dcn*2, row2 += dcn*2) #if CV_SIMD
const int vsize = v_uint8::nlanes;
v_uint8 a = vx_setall_u8(uchar(0xff));
for( ; i <= width/2 - vsize;
i += vsize, row1 += vsize*dcn*2, row2 += vsize*dcn*2)
{ {
int u = int(u1[i]); v_uint8 u, v;
int v = int(v1[i]); u = vx_load(u1 + i);
v = vx_load(v1 + i);
v_uint8 vy[4];
v_load_deinterleave(y1 + 2*i, vy[0], vy[1]);
v_load_deinterleave(y2 + 2*i, vy[2], vy[3]);
v_int32 ruv[4], guv[4], buv[4];
uvToRGBuv(u, v, ruv, guv, buv);
int vy01 = int(y1[2 * i]); v_uint8 r[4], g[4], b[4];
int vy11 = int(y1[2 * i + 1]);
int vy02 = int(y2[2 * i]); for(int k = 0; k < 4; k++)
int vy12 = int(y2[2 * i + 1]); {
yRGBuvToRGBA(vy[k], ruv, guv, buv, r[k], g[k], b[k]);
}
if(bIdx)
{
for(int k = 0; k < 4; k++)
swap(r[k], b[k]);
}
// [r0...], [r1...] => [r0, r1, r0, r1...], [r0, r1, r0, r1...]
v_uint8 r0_0, r0_1, r1_0, r1_1;
v_zip(r[0], r[1], r0_0, r0_1);
v_zip(r[2], r[3], r1_0, r1_1);
v_uint8 g0_0, g0_1, g1_0, g1_1;
v_zip(g[0], g[1], g0_0, g0_1);
v_zip(g[2], g[3], g1_0, g1_1);
v_uint8 b0_0, b0_1, b1_0, b1_1;
v_zip(b[0], b[1], b0_0, b0_1);
v_zip(b[2], b[3], b1_0, b1_1);
if(dcn == 4)
{
v_store_interleave(row1 + 0*vsize, b0_0, g0_0, r0_0, a);
v_store_interleave(row1 + 4*vsize, b0_1, g0_1, r0_1, a);
v_store_interleave(row2 + 0*vsize, b1_0, g1_0, r1_0, a);
v_store_interleave(row2 + 4*vsize, b1_1, g1_1, r1_1, a);
}
else //dcn == 3
{
v_store_interleave(row1 + 0*vsize, b0_0, g0_0, r0_0);
v_store_interleave(row1 + 3*vsize, b0_1, g0_1, r0_1);
v_store_interleave(row2 + 0*vsize, b1_0, g1_0, r1_0);
v_store_interleave(row2 + 3*vsize, b1_1, g1_1, r1_1);
}
}
vx_cleanup();
#endif
for (; i < width / 2; i += 1, row1 += dcn*2, row2 += dcn*2)
{
uchar u = u1[i];
uchar v = v1[i];
uchar vy01 = y1[2 * i];
uchar vy11 = y1[2 * i + 1];
uchar vy02 = y2[2 * i];
uchar vy12 = y2[2 * i + 1];
cvtYuv42xxp2RGB8<bIdx, dcn, true>(u, v, vy01, vy11, vy02, vy12, row1, row2); cvtYuv42xxp2RGB8<bIdx, dcn, true>(u, v, vy01, vy11, vy02, vy12, row1, row2);
} }
...@@ -1150,106 +1373,258 @@ inline void cvtYUV420p2RGB(uchar * dst_data, size_t dst_step, int dst_width, int ...@@ -1150,106 +1373,258 @@ inline void cvtYUV420p2RGB(uchar * dst_data, size_t dst_step, int dst_width, int
///////////////////////////////////// RGB -> YUV420p ///////////////////////////////////// ///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
static inline uchar rgbToY42x(uchar r, uchar g, uchar b)
{
const int shifted16 = (16 << ITUR_BT_601_SHIFT);
const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1));
int yy = ITUR_BT_601_CRY * r + ITUR_BT_601_CGY * g + ITUR_BT_601_CBY * b + halfShift + shifted16;
return saturate_cast<uchar>(yy >> ITUR_BT_601_SHIFT);
}
static inline v_uint8 rgbToY42x(const v_uint8& r, const v_uint8& g, const v_uint8& b)
{
const int shifted16 = (16 << ITUR_BT_601_SHIFT);
const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1));
v_uint16 r0, r1, g0, g1, b0, b1;
v_expand(r, r0, r1);
v_expand(g, g0, g1);
v_expand(b, b0, b1);
v_uint32 rq[4], gq[4], bq[4];
v_expand(r0, rq[0], rq[1]); v_expand(r1, rq[2], rq[3]);
v_expand(g0, gq[0], gq[1]); v_expand(g1, gq[2], gq[3]);
v_expand(b0, bq[0], bq[1]); v_expand(b1, bq[2], bq[3]);
v_uint32 ry = vx_setall_u32(ITUR_BT_601_CRY), gy = vx_setall_u32(ITUR_BT_601_CGY);
v_uint32 by = vx_setall_u32(ITUR_BT_601_CBY), shift = vx_setall_u32(halfShift + shifted16);
v_uint32 y[4];
for(int k = 0; k < 4; k++)
{
y[k] = (rq[k]*ry + gq[k]*gy + bq[k]*by + shift) >> ITUR_BT_601_SHIFT;
}
v_uint16 y0, y1;
y0 = v_pack(y[0], y[1]);
y1 = v_pack(y[2], y[3]);
return v_pack(y0, y1);
}
static inline void rgbToUV42x(uchar r, uchar g, uchar b, uchar& u, uchar& v)
{
const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1));
const int shifted128 = (128 << ITUR_BT_601_SHIFT);
int uu = ITUR_BT_601_CRU * r + ITUR_BT_601_CGU * g + ITUR_BT_601_CBU * b + halfShift + shifted128;
int vv = ITUR_BT_601_CBU * r + ITUR_BT_601_CGV * g + ITUR_BT_601_CBV * b + halfShift + shifted128;
u = saturate_cast<uchar>(uu >> ITUR_BT_601_SHIFT);
v = saturate_cast<uchar>(vv >> ITUR_BT_601_SHIFT);
}
static inline void rgbToUV42x(const v_uint8& r0, const v_uint8& r1, const v_uint8& g0, const v_uint8& g1,
const v_uint8& b0, const v_uint8& b1, v_uint8& u, v_uint8& v)
{
// [r0, r1, r2, r3,..] => [r0, 0, r2, 0,..]
v_int16 vlowByte = vx_setall_s16(0x00ff);
v_int16 rd0, rd1, gd0, gd1, bd0, bd1;
rd0 = v_reinterpret_as_s16(r0) & vlowByte;
rd1 = v_reinterpret_as_s16(r1) & vlowByte;
gd0 = v_reinterpret_as_s16(g0) & vlowByte;
gd1 = v_reinterpret_as_s16(g1) & vlowByte;
bd0 = v_reinterpret_as_s16(b0) & vlowByte;
bd1 = v_reinterpret_as_s16(b1) & vlowByte;
v_int32 rq[4], gq[4], bq[4];
v_expand(rd0, rq[0], rq[1]);
v_expand(rd1, rq[2], rq[3]);
v_expand(gd0, gq[0], gq[1]);
v_expand(gd1, gq[2], gq[3]);
v_expand(bd0, bq[0], bq[1]);
v_expand(bd1, bq[2], bq[3]);
const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1));
const int shifted128 = (128 << ITUR_BT_601_SHIFT);
v_int32 shift = vx_setall_s32(halfShift + shifted128);
v_int32 ru, gu, bu, gv, bv;
ru = vx_setall_s32(ITUR_BT_601_CRU);
gu = vx_setall_s32(ITUR_BT_601_CGU);
gv = vx_setall_s32(ITUR_BT_601_CGV);
bu = vx_setall_s32(ITUR_BT_601_CBU);
bv = vx_setall_s32(ITUR_BT_601_CBV);
v_int32 uq[4], vq[4];
for(int k = 0; k < 4; k++)
{
uq[k] = (ru*rq[k] + gu*gq[k] + bu*bq[k] + shift) >> ITUR_BT_601_SHIFT;
vq[k] = (bu*rq[k] + gv*gq[k] + bv*bq[k] + shift) >> ITUR_BT_601_SHIFT;
}
v_int16 u0, u1, v0, v1;
u0 = v_pack(uq[0], uq[1]);
u1 = v_pack(uq[2], uq[3]);
v0 = v_pack(vq[0], vq[1]);
v1 = v_pack(vq[2], vq[3]);
u = v_pack_u(u0, u1);
v = v_pack_u(v0, v1);
}
struct RGB8toYUV420pInvoker: public ParallelLoopBody struct RGB8toYUV420pInvoker: public ParallelLoopBody
{ {
RGB8toYUV420pInvoker(const uchar * _src_data, size_t _src_step, RGB8toYUV420pInvoker(const uchar * _srcData, size_t _srcStep,
uchar * _y_data, uchar * _uv_data, size_t _dst_step, uchar * _yData, uchar * _uvData, size_t _dstStep,
int _src_width, int _src_height, int _scn, bool swapBlue_, bool swapUV_, bool interleaved_) int _srcWidth, int _srcHeight, int _scn, bool _swapBlue, bool _swapUV, bool _interleave)
: src_data(_src_data), src_step(_src_step), : srcData(_srcData), srcStep(_srcStep),
y_data(_y_data), uv_data(_uv_data), dst_step(_dst_step), yData(_yData), uvData(_uvData), dstStep(_dstStep),
src_width(_src_width), src_height(_src_height), srcWidth(_srcWidth), srcHeight(_srcHeight),
scn(_scn), swapBlue(swapBlue_), swapUV(swapUV_), interleaved(interleaved_) { } srcCn(_scn), swapBlue(_swapBlue), swapUV(_swapUV), interleave(_interleave) { }
void operator()(const Range& rowRange) const CV_OVERRIDE void operator()(const Range& rowRange) const CV_OVERRIDE
{ {
const int w = src_width; const int w = srcWidth;
const int h = src_height; const int h = srcHeight;
const int cn = scn; const int scn = srcCn;
for( int i = rowRange.start; i < rowRange.end; i++ ) const uchar* srcRow = (uchar*)0;
uchar* yRow = (uchar*)0, *uRow = (uchar*)0, *vRow = (uchar*)0, *uvRow = (uchar*)0;
for( int sRow = rowRange.start*2; sRow < rowRange.end*2; sRow++)
{
srcRow = srcData + srcStep*sRow;
yRow = yData + dstStep * sRow;
bool evenRow = (sRow % 2) == 0;
if(evenRow)
{ {
const uchar* brow0 = src_data + src_step * (2 * i); if (interleave)
const uchar* grow0 = brow0 + 1; {
const uchar* rrow0 = brow0 + 2; uvRow = uvData + dstStep*(sRow/2);
const uchar* brow1 = src_data + src_step * (2 * i + 1); }
const uchar* grow1 = brow1 + 1; else
const uchar* rrow1 = brow1 + 2;
if (swapBlue)
{ {
std::swap(brow0, rrow0); uRow = uvData + dstStep * (sRow/4) + ((sRow/2) % 2) * (w/2);
std::swap(brow1, rrow1); vRow = uvData + dstStep * ((sRow + h)/4) + (((sRow + h)/2) % 2) * (w/2);
} }
}
int i = 0;
#if CV_SIMD
const int vsize = v_uint8::nlanes;
uchar* y = y_data + dst_step * (2*i); for( ; i <= w/2 - vsize;
uchar* u; i += vsize)
uchar* v; {
if (interleaved) // processing (2*vsize) pixels at once
v_uint8 b0, b1, g0, g1, r0, r1, a0, a1;
if(scn == 4)
{ {
u = uv_data + dst_step * i; v_load_deinterleave(srcRow + 2*4*i + 0*vsize, b0, g0, r0, a0);
v = uv_data + dst_step * i + 1; v_load_deinterleave(srcRow + 2*4*i + 4*vsize, b1, g1, r1, a1);
} }
else else // scn == 3
{ {
u = uv_data + dst_step * (i/2) + (i % 2) * (w/2); v_load_deinterleave(srcRow + 2*3*i + 0*vsize, b0, g0, r0);
v = uv_data + dst_step * ((i + h/2)/2) + ((i + h/2) % 2) * (w/2); v_load_deinterleave(srcRow + 2*3*i + 3*vsize, b1, g1, r1);
} }
if (swapUV) if(swapBlue)
{ {
std::swap(u, v); swap(b0, r0); swap(b1, r1);
} }
for( int j = 0, k = 0; j < w * cn; j += 2 * cn, k++ ) v_uint8 y0, y1;
y0 = rgbToY42x(r0, g0, b0);
y1 = rgbToY42x(r1, g1, b1);
v_store(yRow + 2*i + 0*vsize, y0);
v_store(yRow + 2*i + 1*vsize, y1);
if(evenRow)
{ {
int r00 = rrow0[j]; int g00 = grow0[j]; int b00 = brow0[j]; v_uint8 u, v;
int r01 = rrow0[cn + j]; int g01 = grow0[cn + j]; int b01 = brow0[cn + j]; rgbToUV42x(r0, r1, g0, g1, b0, b1, u, v);
int r10 = rrow1[j]; int g10 = grow1[j]; int b10 = brow1[j];
int r11 = rrow1[cn + j]; int g11 = grow1[cn + j]; int b11 = brow1[cn + j];
const int shifted16 = (16 << ITUR_BT_601_SHIFT); if(swapUV)
const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1)); {
int y00 = ITUR_BT_601_CRY * r00 + ITUR_BT_601_CGY * g00 + ITUR_BT_601_CBY * b00 + halfShift + shifted16; swap(u, v);
int y01 = ITUR_BT_601_CRY * r01 + ITUR_BT_601_CGY * g01 + ITUR_BT_601_CBY * b01 + halfShift + shifted16; }
int y10 = ITUR_BT_601_CRY * r10 + ITUR_BT_601_CGY * g10 + ITUR_BT_601_CBY * b10 + halfShift + shifted16;
int y11 = ITUR_BT_601_CRY * r11 + ITUR_BT_601_CGY * g11 + ITUR_BT_601_CBY * b11 + halfShift + shifted16; if(interleave)
{
v_store_interleave(uvRow + 2*i, u, v);
}
else
{
v_store(uRow + i, u);
v_store(vRow + i, v);
}
}
}
vx_cleanup();
#endif
// processing two pixels at once
for( ; i < w/2; i++)
{
uchar b0, g0, r0;
uchar b1, g1, r1;
b0 = srcRow[(2*i+0)*scn + 0];
g0 = srcRow[(2*i+0)*scn + 1];
r0 = srcRow[(2*i+0)*scn + 2];
b1 = srcRow[(2*i+1)*scn + 0];
g1 = srcRow[(2*i+1)*scn + 1];
r1 = srcRow[(2*i+1)*scn + 2];
if(swapBlue)
{
swap(b0, r0); swap(b1, r1);
}
y[2*k + 0] = saturate_cast<uchar>(y00 >> ITUR_BT_601_SHIFT); uchar y0 = rgbToY42x(r0, g0, b0);
y[2*k + 1] = saturate_cast<uchar>(y01 >> ITUR_BT_601_SHIFT); uchar y1 = rgbToY42x(r1, g1, b1);
y[2*k + dst_step + 0] = saturate_cast<uchar>(y10 >> ITUR_BT_601_SHIFT);
y[2*k + dst_step + 1] = saturate_cast<uchar>(y11 >> ITUR_BT_601_SHIFT);
const int shifted128 = (128 << ITUR_BT_601_SHIFT); yRow[2*i+0] = y0;
int u00 = ITUR_BT_601_CRU * r00 + ITUR_BT_601_CGU * g00 + ITUR_BT_601_CBU * b00 + halfShift + shifted128; yRow[2*i+1] = y1;
int v00 = ITUR_BT_601_CBU * r00 + ITUR_BT_601_CGV * g00 + ITUR_BT_601_CBV * b00 + halfShift + shifted128;
if (interleaved) if(evenRow)
{ {
u[k*2] = saturate_cast<uchar>(u00 >> ITUR_BT_601_SHIFT); uchar uu, vv;
v[k*2] = saturate_cast<uchar>(v00 >> ITUR_BT_601_SHIFT); rgbToUV42x(r0, g0, b0, uu, vv);
if(swapUV)
{
swap(uu, vv);
}
if(interleave)
{
uvRow[2*i+0] = uu;
uvRow[2*i+1] = vv;
} }
else else
{ {
u[k] = saturate_cast<uchar>(u00 >> ITUR_BT_601_SHIFT); uRow[i] = uu;
v[k] = saturate_cast<uchar>(v00 >> ITUR_BT_601_SHIFT); vRow[i] = vv;
}
} }
} }
} }
} }
const uchar * src_data; const uchar * srcData;
size_t src_step; size_t srcStep;
uchar *y_data, *uv_data; uchar *yData, *uvData;
size_t dst_step; size_t dstStep;
int src_width; int srcWidth;
int src_height; int srcHeight;
const int scn; const int srcCn;
bool swapBlue; bool swapBlue;
bool swapUV; bool swapUV;
bool interleaved; bool interleave;
}; };
///////////////////////////////////// YUV422 -> RGB ///////////////////////////////////// ///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
// bIdx is 0 or 2; [uIdx, yIdx] is [0, 0], [0, 1], [1, 0]; dcn is 3 or 4
template<int bIdx, int uIdx, int yIdx, int dcn> template<int bIdx, int uIdx, int yIdx, int dcn>
struct YUV422toRGB8Invoker : ParallelLoopBody struct YUV422toRGB8Invoker : ParallelLoopBody
{ {
...@@ -1269,6 +1644,10 @@ struct YUV422toRGB8Invoker : ParallelLoopBody ...@@ -1269,6 +1644,10 @@ struct YUV422toRGB8Invoker : ParallelLoopBody
int rangeBegin = range.start; int rangeBegin = range.start;
int rangeEnd = range.end; int rangeEnd = range.end;
// [yIdx, uIdx] | [uidx, vidx]:
// 0, 0 | 1, 3
// 0, 1 | 3, 1
// 1, 0 | 0, 2
const int uidx = 1 - yIdx + uIdx * 2; const int uidx = 1 - yIdx + uIdx * 2;
const int vidx = (2 + uidx) % 4; const int vidx = (2 + uidx) % 4;
const uchar* yuv_src = src_data + rangeBegin * src_step; const uchar* yuv_src = src_data + rangeBegin * src_step;
...@@ -1276,14 +1655,69 @@ struct YUV422toRGB8Invoker : ParallelLoopBody ...@@ -1276,14 +1655,69 @@ struct YUV422toRGB8Invoker : ParallelLoopBody
for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += src_step) for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += src_step)
{ {
uchar* row = dst_data + dst_step * j; uchar* row = dst_data + dst_step * j;
int i = 0;
#if CV_SIMD
const int vsize = v_uint8::nlanes;
v_uint8 a = vx_setall_u8(uchar(0xff));
for(; i <= 2*width - 4*vsize;
i += 4*vsize, row += vsize*dcn*2)
{
v_uint8 u, v, vy[2];
if(yIdx == 1) // UYVY
{
v_load_deinterleave(yuv_src + i, u, vy[0], v, vy[1]);
}
else // YUYV or YVYU
{
v_load_deinterleave(yuv_src + i, vy[0], u, vy[1], v);
if(uIdx == 1) // YVYU
{
swap(u, v);
}
}
v_int32 ruv[4], guv[4], buv[4];
uvToRGBuv(u, v, ruv, guv, buv);
v_uint8 r[2], g[2], b[2];
yRGBuvToRGBA(vy[0], ruv, guv, buv, r[0], g[0], b[0]);
yRGBuvToRGBA(vy[1], ruv, guv, buv, r[1], g[1], b[1]);
if(bIdx)
{
swap(r[0], b[0]);
swap(r[1], b[1]);
}
// [r0...], [r1...] => [r0, r1, r0, r1...], [r0, r1, r0, r1...]
v_uint8 r0_0, r0_1;
v_zip(r[0], r[1], r0_0, r0_1);
v_uint8 g0_0, g0_1;
v_zip(g[0], g[1], g0_0, g0_1);
v_uint8 b0_0, b0_1;
v_zip(b[0], b[1], b0_0, b0_1);
for (int i = 0; i < 2 * width; i += 4, row += dcn*2) if(dcn == 4)
{
v_store_interleave(row + 0*vsize, b0_0, g0_0, r0_0, a);
v_store_interleave(row + 4*vsize, b0_1, g0_1, r0_1, a);
}
else //dcn == 3
{
v_store_interleave(row + 0*vsize, b0_0, g0_0, r0_0);
v_store_interleave(row + 3*vsize, b0_1, g0_1, r0_1);
}
}
vx_cleanup();
#endif
for (; i < 2 * width; i += 4, row += dcn*2)
{ {
int u = int(yuv_src[i + uidx]); uchar u = yuv_src[i + uidx];
int v = int(yuv_src[i + vidx]); uchar v = yuv_src[i + vidx];
int vy0 = int(yuv_src[i + yIdx]); uchar vy0 = yuv_src[i + yIdx];
int vy1 = int(yuv_src[i + yIdx + 2]); uchar vy1 = yuv_src[i + yIdx + 2];
cvtYuv42xxp2RGB8<bIdx, dcn, false>(u, v, vy0, vy1, 0, 0, row, (uchar*)(0)); cvtYuv42xxp2RGB8<bIdx, dcn, false>(u, v, vy0, vy1, 0, 0, row, (uchar*)(0));
} }
......
...@@ -289,13 +289,16 @@ namespace binding_utils ...@@ -289,13 +289,16 @@ namespace binding_utils
float radius; float radius;
}; };
#ifdef HAVE_OPENCV_IMGPROC
Circle minEnclosingCircle(const cv::Mat& points) Circle minEnclosingCircle(const cv::Mat& points)
{ {
Circle circle; Circle circle;
cv::minEnclosingCircle(points, circle.center, circle.radius); cv::minEnclosingCircle(points, circle.center, circle.radius);
return circle; return circle;
} }
#endif
#ifdef HAVE_OPENCV_VIDEO
emscripten::val CamShiftWrapper(const cv::Mat& arg1, Rect& arg2, TermCriteria arg3) emscripten::val CamShiftWrapper(const cv::Mat& arg1, Rect& arg2, TermCriteria arg3)
{ {
RotatedRect rotatedRect = cv::CamShift(arg1, arg2, arg3); RotatedRect rotatedRect = cv::CamShift(arg1, arg2, arg3);
...@@ -313,6 +316,7 @@ namespace binding_utils ...@@ -313,6 +316,7 @@ namespace binding_utils
result.call<void>("push", arg2); result.call<void>("push", arg2);
return result; return result;
} }
#endif // HAVE_OPENCV_VIDEO
std::string getExceptionMsg(const cv::Exception& e) { std::string getExceptionMsg(const cv::Exception& e) {
return e.msg; return e.msg;
...@@ -551,19 +555,25 @@ EMSCRIPTEN_BINDINGS(binding_utils) ...@@ -551,19 +555,25 @@ EMSCRIPTEN_BINDINGS(binding_utils)
function("exceptionFromPtr", &binding_utils::exceptionFromPtr, allow_raw_pointers()); function("exceptionFromPtr", &binding_utils::exceptionFromPtr, allow_raw_pointers());
#ifdef HAVE_OPENCV_IMGPROC
function("minEnclosingCircle", select_overload<binding_utils::Circle(const cv::Mat&)>(&binding_utils::minEnclosingCircle)); function("minEnclosingCircle", select_overload<binding_utils::Circle(const cv::Mat&)>(&binding_utils::minEnclosingCircle));
#endif
function("minMaxLoc", select_overload<binding_utils::MinMaxLoc(const cv::Mat&, const cv::Mat&)>(&binding_utils::minMaxLoc)); function("minMaxLoc", select_overload<binding_utils::MinMaxLoc(const cv::Mat&, const cv::Mat&)>(&binding_utils::minMaxLoc));
function("minMaxLoc", select_overload<binding_utils::MinMaxLoc(const cv::Mat&)>(&binding_utils::minMaxLoc_1)); function("minMaxLoc", select_overload<binding_utils::MinMaxLoc(const cv::Mat&)>(&binding_utils::minMaxLoc_1));
#ifdef HAVE_OPENCV_IMGPROC
function("morphologyDefaultBorderValue", &cv::morphologyDefaultBorderValue); function("morphologyDefaultBorderValue", &cv::morphologyDefaultBorderValue);
#endif
function("CV_MAT_DEPTH", &binding_utils::cvMatDepth); function("CV_MAT_DEPTH", &binding_utils::cvMatDepth);
#ifdef HAVE_OPENCV_VIDEO
function("CamShift", select_overload<emscripten::val(const cv::Mat&, Rect&, TermCriteria)>(&binding_utils::CamShiftWrapper)); function("CamShift", select_overload<emscripten::val(const cv::Mat&, Rect&, TermCriteria)>(&binding_utils::CamShiftWrapper));
function("meanShift", select_overload<emscripten::val(const cv::Mat&, Rect&, TermCriteria)>(&binding_utils::meanShiftWrapper)); function("meanShift", select_overload<emscripten::val(const cv::Mat&, Rect&, TermCriteria)>(&binding_utils::meanShiftWrapper));
#endif
function("getBuildInformation", &binding_utils::getBuildInformation); function("getBuildInformation", &binding_utils::getBuildInformation);
......
...@@ -140,7 +140,7 @@ features2d = {'Feature2D': ['detect', 'compute', 'detectAndCompute', 'descriptor ...@@ -140,7 +140,7 @@ features2d = {'Feature2D': ['detect', 'compute', 'detectAndCompute', 'descriptor
'AKAZE': ['create', 'setDescriptorType', 'getDescriptorType', 'setDescriptorSize', 'getDescriptorSize', 'setDescriptorChannels', 'getDescriptorChannels', 'setThreshold', 'getThreshold', 'setNOctaves', 'getNOctaves', 'setNOctaveLayers', 'getNOctaveLayers', 'setDiffusivity', 'getDiffusivity', 'getDefaultName'], 'AKAZE': ['create', 'setDescriptorType', 'getDescriptorType', 'setDescriptorSize', 'getDescriptorSize', 'setDescriptorChannels', 'getDescriptorChannels', 'setThreshold', 'getThreshold', 'setNOctaves', 'getNOctaves', 'setNOctaveLayers', 'getNOctaveLayers', 'setDiffusivity', 'getDiffusivity', 'getDefaultName'],
'DescriptorMatcher': ['add', 'clear', 'empty', 'isMaskSupported', 'train', 'match', 'knnMatch', 'radiusMatch', 'clone', 'create'], 'DescriptorMatcher': ['add', 'clear', 'empty', 'isMaskSupported', 'train', 'match', 'knnMatch', 'radiusMatch', 'clone', 'create'],
'BFMatcher': ['isMaskSupported', 'create'], 'BFMatcher': ['isMaskSupported', 'create'],
'': ['FAST', 'AGAST', 'drawKeypoints', 'drawMatches']} '': ['drawKeypoints', 'drawMatches']}
photo = {'': ['createAlignMTB', 'createCalibrateDebevec', 'createCalibrateRobertson', \ photo = {'': ['createAlignMTB', 'createCalibrateDebevec', 'createCalibrateRobertson', \
'createMergeDebevec', 'createMergeMertens', 'createMergeRobertson', \ 'createMergeDebevec', 'createMergeMertens', 'createMergeRobertson', \
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
if (typeof module !== 'undefined' && module.exports) {
// The envrionment is Node.js
var cv = require('./opencv.js'); // eslint-disable-line no-var
}
function generateTestFrame(width, height) {
let w = width || 200;
let h = height || 200;
let img = new cv.Mat(h, w, cv.CV_8UC1, new cv.Scalar(0, 0, 0, 0));
let s = new cv.Scalar(255, 255, 255, 255);
let s128 = new cv.Scalar(128, 128, 128, 128);
let rect = new cv.Rect(w / 4, h / 4, w / 2, h / 2);
img.roi(rect).setTo(s);
img.roi(new cv.Rect(w / 2 - w / 8, h / 2 - h / 8, w / 4, h / 4)).setTo(s128);
cv.rectangle(img, new cv.Point(w / 8, h / 8), new cv.Point(w - w / 8, h - h / 8), s, 5);
cv.rectangle(img, new cv.Point(w / 5, h / 5), new cv.Point(w - w / 5, h - h / 5), s128, 3);
cv.line(img, new cv.Point(-w, 0), new cv.Point(w / 2, h / 2), s128, 5);
cv.line(img, new cv.Point(2*w, 0), new cv.Point(w / 2, h / 2), s, 5);
return img;
}
QUnit.module('Features2D', {});
QUnit.test('Detectors', function(assert) {
let image = generateTestFrame();
let kp = new cv.KeyPointVector();
let orb = new cv.ORB();
orb.detect(image, kp);
assert.equal(kp.size(), 67, 'ORB');
let mser = new cv.MSER();
mser.detect(image, kp);
assert.equal(kp.size(), 7, 'MSER');
let brisk = new cv.BRISK();
brisk.detect(image, kp);
assert.equal(kp.size(), 191, 'BRISK');
let ffd = new cv.FastFeatureDetector();
ffd.detect(image, kp);
assert.equal(kp.size(), 12, 'FastFeatureDetector');
let afd = new cv.AgastFeatureDetector();
afd.detect(image, kp);
assert.equal(kp.size(), 67, 'AgastFeatureDetector');
let gftt = new cv.GFTTDetector();
gftt.detect(image, kp);
assert.equal(kp.size(), 168, 'GFTTDetector');
let kaze = new cv.KAZE();
kaze.detect(image, kp);
assert.equal(kp.size(), 159, 'KAZE');
let akaze = new cv.AKAZE();
akaze.detect(image, kp);
assert.equal(kp.size(), 52, 'AKAZE');
});
QUnit.test('BFMatcher', function(assert) {
// Generate key points.
let image = generateTestFrame();
let kp = new cv.KeyPointVector();
let descriptors = new cv.Mat();
let orb = new cv.ORB();
orb.detectAndCompute(image, new cv.Mat(), kp, descriptors);
assert.equal(kp.size(), 67);
// Run a matcher.
let dm = new cv.DMatchVector();
let matcher = new cv.BFMatcher();
matcher.match(descriptors, descriptors, dm);
assert.equal(dm.size(), 67);
});
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
<script type="application/javascript" src="test_objdetect.js"></script> <script type="application/javascript" src="test_objdetect.js"></script>
<script type="application/javascript" src="test_video.js"></script> <script type="application/javascript" src="test_video.js"></script>
<script type="application/javascript" src="test_photo.js"></script> <script type="application/javascript" src="test_photo.js"></script>
<script type="application/javascript" src="test_features2d.js"></script>
<script type='text/javascript'> <script type='text/javascript'>
QUnit.config.autostart = false; QUnit.config.autostart = false;
...@@ -69,11 +70,5 @@ ...@@ -69,11 +70,5 @@
}; };
}; };
</script> </script>
<!--
TODO
<script type="application/javascript" src="test_features2d.js"></script>
-->
</body> </body>
</html> </html>
...@@ -45,7 +45,8 @@ testrunner.run( ...@@ -45,7 +45,8 @@ testrunner.run(
{ {
code: 'opencv.js', code: 'opencv.js',
tests: ['test_mat.js', 'test_utils.js', 'test_imgproc.js', tests: ['test_mat.js', 'test_utils.js', 'test_imgproc.js',
'test_objdetect.js', 'test_video.js', 'test_photo.js'], 'test_objdetect.js', 'test_video.js', 'test_features2d.js',
'test_photo.js'],
}, },
function(err, report) { function(err, report) {
console.log(report.failed + ' failed, ' + report.passed + ' passed'); console.log(report.failed + ' failed, ' + report.passed + ' passed');
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "precomp.hpp" #include "precomp.hpp"
#include "opencl_kernels_video.hpp" #include "opencl_kernels_video.hpp"
#include "opencv2/core/hal/intrin.hpp"
#if defined __APPLE__ || defined __ANDROID__ #if defined __APPLE__ || defined __ANDROID__
#define SMALL_LOCALSIZE #define SMALL_LOCALSIZE
...@@ -433,13 +434,11 @@ FarnebackUpdateFlow_GaussianBlur( const Mat& _R0, const Mat& _R1, ...@@ -433,13 +434,11 @@ FarnebackUpdateFlow_GaussianBlur( const Mat& _R0, const Mat& _R1,
for( i = 0; i <= m; i++ ) for( i = 0; i <= m; i++ )
kernel[i] = (float)(kernel[i]*s); kernel[i] = (float)(kernel[i]*s);
#if CV_SSE2 #if CV_SIMD128
float* simd_kernel = alignPtr(kernel + m+1, 16); float* simd_kernel = alignPtr(kernel + m+1, 16);
volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE);
if( useSIMD )
{ {
for( i = 0; i <= m; i++ ) for( i = 0; i <= m; i++ )
_mm_store_ps(simd_kernel + i*4, _mm_set1_ps(kernel[i])); v_store(simd_kernel + i*4, v_setall_f32(kernel[i]));
} }
#endif #endif
...@@ -457,54 +456,53 @@ FarnebackUpdateFlow_GaussianBlur( const Mat& _R0, const Mat& _R1, ...@@ -457,54 +456,53 @@ FarnebackUpdateFlow_GaussianBlur( const Mat& _R0, const Mat& _R1,
} }
x = 0; x = 0;
#if CV_SSE2 #if CV_SIMD128
if( useSIMD )
{ {
for( ; x <= width*5 - 16; x += 16 ) for( ; x <= width*5 - 16; x += 16 )
{ {
const float *sptr0 = srow[m], *sptr1; const float *sptr0 = srow[m], *sptr1;
__m128 g4 = _mm_load_ps(simd_kernel); v_float32x4 g4 = v_load(simd_kernel);
__m128 s0, s1, s2, s3; v_float32x4 s0, s1, s2, s3;
s0 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x), g4); s0 = v_load(sptr0 + x) * g4;
s1 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x + 4), g4); s1 = v_load(sptr0 + x + 4) * g4;
s2 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x + 8), g4); s2 = v_load(sptr0 + x + 8) * g4;
s3 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x + 12), g4); s3 = v_load(sptr0 + x + 12) * g4;
for( i = 1; i <= m; i++ ) for( i = 1; i <= m; i++ )
{ {
__m128 x0, x1; v_float32x4 x0, x1;
sptr0 = srow[m+i], sptr1 = srow[m-i]; sptr0 = srow[m+i], sptr1 = srow[m-i];
g4 = _mm_load_ps(simd_kernel + i*4); g4 = v_load(simd_kernel + i*4);
x0 = _mm_add_ps(_mm_loadu_ps(sptr0 + x), _mm_loadu_ps(sptr1 + x)); x0 = v_load(sptr0 + x) + v_load(sptr1 + x);
x1 = _mm_add_ps(_mm_loadu_ps(sptr0 + x + 4), _mm_loadu_ps(sptr1 + x + 4)); x1 = v_load(sptr0 + x + 4) + v_load(sptr1 + x + 4);
s0 = _mm_add_ps(s0, _mm_mul_ps(x0, g4)); s0 = v_muladd(x0, g4, s0);
s1 = _mm_add_ps(s1, _mm_mul_ps(x1, g4)); s1 = v_muladd(x1, g4, s1);
x0 = _mm_add_ps(_mm_loadu_ps(sptr0 + x + 8), _mm_loadu_ps(sptr1 + x + 8)); x0 = v_load(sptr0 + x + 8) + v_load(sptr1 + x + 8);
x1 = _mm_add_ps(_mm_loadu_ps(sptr0 + x + 12), _mm_loadu_ps(sptr1 + x + 12)); x1 = v_load(sptr0 + x + 12) + v_load(sptr1 + x + 12);
s2 = _mm_add_ps(s2, _mm_mul_ps(x0, g4)); s2 = v_muladd(x0, g4, s2);
s3 = _mm_add_ps(s3, _mm_mul_ps(x1, g4)); s3 = v_muladd(x1, g4, s3);
} }
_mm_store_ps(vsum + x, s0); v_store(vsum + x, s0);
_mm_store_ps(vsum + x + 4, s1); v_store(vsum + x + 4, s1);
_mm_store_ps(vsum + x + 8, s2); v_store(vsum + x + 8, s2);
_mm_store_ps(vsum + x + 12, s3); v_store(vsum + x + 12, s3);
} }
for( ; x <= width*5 - 4; x += 4 ) for( ; x <= width*5 - 4; x += 4 )
{ {
const float *sptr0 = srow[m], *sptr1; const float *sptr0 = srow[m], *sptr1;
__m128 g4 = _mm_load_ps(simd_kernel); v_float32x4 g4 = v_load(simd_kernel);
__m128 s0 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x), g4); v_float32x4 s0 = v_load(sptr0 + x) * g4;
for( i = 1; i <= m; i++ ) for( i = 1; i <= m; i++ )
{ {
sptr0 = srow[m+i], sptr1 = srow[m-i]; sptr0 = srow[m+i], sptr1 = srow[m-i];
g4 = _mm_load_ps(simd_kernel + i*4); g4 = v_load(simd_kernel + i*4);
__m128 x0 = _mm_add_ps(_mm_loadu_ps(sptr0 + x), _mm_loadu_ps(sptr1 + x)); v_float32x4 x0 = v_load(sptr0 + x) + v_load(sptr1 + x);
s0 = _mm_add_ps(s0, _mm_mul_ps(x0, g4)); s0 = v_muladd(x0, g4, s0);
} }
_mm_store_ps(vsum + x, s0); v_store(vsum + x, s0);
} }
} }
#endif #endif
...@@ -525,28 +523,25 @@ FarnebackUpdateFlow_GaussianBlur( const Mat& _R0, const Mat& _R1, ...@@ -525,28 +523,25 @@ FarnebackUpdateFlow_GaussianBlur( const Mat& _R0, const Mat& _R1,
// horizontal blur // horizontal blur
x = 0; x = 0;
#if CV_SSE2 #if CV_SIMD128
if( useSIMD )
{ {
for( ; x <= width*5 - 8; x += 8 ) for( ; x <= width*5 - 8; x += 8 )
{ {
__m128 g4 = _mm_load_ps(simd_kernel); v_float32x4 g4 = v_load(simd_kernel);
__m128 s0 = _mm_mul_ps(_mm_loadu_ps(vsum + x), g4); v_float32x4 s0 = v_load(vsum + x) * g4;
__m128 s1 = _mm_mul_ps(_mm_loadu_ps(vsum + x + 4), g4); v_float32x4 s1 = v_load(vsum + x + 4) * g4;
for( i = 1; i <= m; i++ ) for( i = 1; i <= m; i++ )
{ {
g4 = _mm_load_ps(simd_kernel + i*4); g4 = v_load(simd_kernel + i*4);
__m128 x0 = _mm_add_ps(_mm_loadu_ps(vsum + x - i*5), v_float32x4 x0 = v_load(vsum + x - i*5) + v_load(vsum + x+ i*5);
_mm_loadu_ps(vsum + x + i*5)); v_float32x4 x1 = v_load(vsum + x - i*5 + 4) + v_load(vsum + x+ i*5 + 4);
__m128 x1 = _mm_add_ps(_mm_loadu_ps(vsum + x - i*5 + 4), s0 = v_muladd(x0, g4, s0);
_mm_loadu_ps(vsum + x + i*5 + 4)); s1 = v_muladd(x1, g4, s1);
s0 = _mm_add_ps(s0, _mm_mul_ps(x0, g4));
s1 = _mm_add_ps(s1, _mm_mul_ps(x1, g4));
} }
_mm_store_ps(hsum + x, s0); v_store(hsum + x, s0);
_mm_store_ps(hsum + x + 4, s1); v_store(hsum + x + 4, s1);
} }
} }
#endif #endif
......
...@@ -113,6 +113,7 @@ class Builder: ...@@ -113,6 +113,7 @@ class Builder:
"-DWITH_GPHOTO2=OFF", "-DWITH_GPHOTO2=OFF",
"-DWITH_LAPACK=OFF", "-DWITH_LAPACK=OFF",
"-DWITH_ITT=OFF", "-DWITH_ITT=OFF",
"-DWITH_QUIRC=OFF",
"-DBUILD_ZLIB=ON", "-DBUILD_ZLIB=ON",
"-DBUILD_opencv_apps=OFF", "-DBUILD_opencv_apps=OFF",
"-DBUILD_opencv_calib3d=ON", # No bindings provided. This module is used as a dependency for other modules. "-DBUILD_opencv_calib3d=ON", # No bindings provided. This module is used as a dependency for other modules.
...@@ -130,9 +131,11 @@ class Builder: ...@@ -130,9 +131,11 @@ class Builder:
"-DBUILD_opencv_superres=OFF", "-DBUILD_opencv_superres=OFF",
"-DBUILD_opencv_stitching=OFF", "-DBUILD_opencv_stitching=OFF",
"-DBUILD_opencv_java=OFF", "-DBUILD_opencv_java=OFF",
"-DBUILD_opencv_java_bindings_generator=OFF",
"-DBUILD_opencv_js=ON", "-DBUILD_opencv_js=ON",
"-DBUILD_opencv_python2=OFF", "-DBUILD_opencv_python2=OFF",
"-DBUILD_opencv_python3=OFF", "-DBUILD_opencv_python3=OFF",
"-DBUILD_opencv_python_bindings_generator=OFF",
"-DBUILD_EXAMPLES=OFF", "-DBUILD_EXAMPLES=OFF",
"-DBUILD_PACKAGE=OFF", "-DBUILD_PACKAGE=OFF",
"-DBUILD_TESTS=OFF", "-DBUILD_TESTS=OFF",
......
...@@ -153,14 +153,16 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net) ...@@ -153,14 +153,16 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
std::vector<int> classIds; std::vector<int> classIds;
std::vector<float> confidences; std::vector<float> confidences;
std::vector<Rect> boxes; std::vector<Rect> boxes;
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN if (outLayerType == "DetectionOutput")
{ {
// Network produces output blob with a shape 1x1xNx7 where N is a number of // Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values // detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom] // [batchId, classId, confidence, left, top, right, bottom]
CV_Assert(outs.size() == 1); CV_Assert(outs.size() > 0);
float* data = (float*)outs[0].data; for (size_t k = 0; k < outs.size(); k++)
for (size_t i = 0; i < outs[0].total(); i += 7) {
float* data = (float*)outs[k].data;
for (size_t i = 0; i < outs[k].total(); i += 7)
{ {
float confidence = data[i + 2]; float confidence = data[i + 2];
if (confidence > confThreshold) if (confidence > confThreshold)
...@@ -171,36 +173,22 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net) ...@@ -171,36 +173,22 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
int bottom = (int)data[i + 6]; int bottom = (int)data[i + 6];
int width = right - left + 1; int width = right - left + 1;
int height = bottom - top + 1; int height = bottom - top + 1;
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id. if (width * height <= 1)
boxes.push_back(Rect(left, top, width, height));
confidences.push_back(confidence);
}
}
}
else if (outLayerType == "DetectionOutput")
{ {
// Network produces output blob with a shape 1x1xNx7 where N is a number of left = (int)(data[i + 3] * frame.cols);
// detections and an every detection is a vector of values top = (int)(data[i + 4] * frame.rows);
// [batchId, classId, confidence, left, top, right, bottom] right = (int)(data[i + 5] * frame.cols);
CV_Assert(outs.size() == 1); bottom = (int)(data[i + 6] * frame.rows);
float* data = (float*)outs[0].data; width = right - left + 1;
for (size_t i = 0; i < outs[0].total(); i += 7) height = bottom - top + 1;
{ }
float confidence = data[i + 2];
if (confidence > confThreshold)
{
int left = (int)(data[i + 3] * frame.cols);
int top = (int)(data[i + 4] * frame.rows);
int right = (int)(data[i + 5] * frame.cols);
int bottom = (int)(data[i + 6] * frame.rows);
int width = right - left + 1;
int height = bottom - top + 1;
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id. classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
boxes.push_back(Rect(left, top, width, height)); boxes.push_back(Rect(left, top, width, height));
confidences.push_back(confidence); confidences.push_back(confidence);
} }
} }
} }
}
else if (outLayerType == "Region") else if (outLayerType == "Region")
{ {
for (size_t i = 0; i < outs.size(); ++i) for (size_t i = 0; i < outs.size(); ++i)
......
...@@ -102,7 +102,7 @@ def postprocess(frame, outs): ...@@ -102,7 +102,7 @@ def postprocess(frame, outs):
classIds = [] classIds = []
confidences = [] confidences = []
boxes = [] boxes = []
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN if lastLayer.type == 'DetectionOutput':
# Network produces output blob with a shape 1x1xNx7 where N is a number of # Network produces output blob with a shape 1x1xNx7 where N is a number of
# detections and an every detection is a vector of values # detections and an every detection is a vector of values
# [batchId, classId, confidence, left, top, right, bottom] # [batchId, classId, confidence, left, top, right, bottom]
...@@ -116,17 +116,7 @@ def postprocess(frame, outs): ...@@ -116,17 +116,7 @@ def postprocess(frame, outs):
bottom = int(detection[6]) bottom = int(detection[6])
width = right - left + 1 width = right - left + 1
height = bottom - top + 1 height = bottom - top + 1
classIds.append(int(detection[1]) - 1) # Skip background label if width * height <= 1:
confidences.append(float(confidence))
boxes.append([left, top, width, height])
elif lastLayer.type == 'DetectionOutput':
# Network produces output blob with a shape 1x1xNx7 where N is a number of
# detections and an every detection is a vector of values
# [batchId, classId, confidence, left, top, right, bottom]
for out in outs:
for detection in out[0, 0]:
confidence = detection[2]
if confidence > confThreshold:
left = int(detection[3] * frameWidth) left = int(detection[3] * frameWidth)
top = int(detection[4] * frameHeight) top = int(detection[4] * frameHeight)
right = int(detection[5] * frameWidth) right = int(detection[5] * frameWidth)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment