Commit f5035150 authored by Dmitry Kurtaev's avatar Dmitry Kurtaev

JavaScript bindings for dnn module

parent 6185f720
...@@ -224,8 +224,7 @@ static const uint64 kuint64max = GOOGLE_ULONGLONG(0xFFFFFFFFFFFFFFFF); ...@@ -224,8 +224,7 @@ static const uint64 kuint64max = GOOGLE_ULONGLONG(0xFFFFFFFFFFFFFFFF);
#if defined(__clang__) && defined(__has_cpp_attribute) \ #if defined(__clang__) && defined(__has_cpp_attribute) \
&& !defined(GOOGLE_PROTOBUF_OS_APPLE) && !defined(GOOGLE_PROTOBUF_OS_APPLE)
# if defined(GOOGLE_PROTOBUF_OS_NACL) || defined(EMSCRIPTEN) || \ # if defined(GOOGLE_PROTOBUF_OS_NACL) || __has_cpp_attribute(clang::fallthrough)
__has_cpp_attribute(clang::fallthrough)
# define GOOGLE_FALLTHROUGH_INTENDED [[clang::fallthrough]] # define GOOGLE_FALLTHROUGH_INTENDED [[clang::fallthrough]]
# endif # endif
#endif #endif
......
# How to run deep networks in browser {#tutorial_dnn_javascript}
## Introduction
This tutorial will show us how to run deep learning models using OpenCV.js right
in a browser. Tutorial refers a sample of face detection and face recognition
models pipeline.
## Face detection
Face detection network gets BGR image as input and produces set of bounding boxes
that might contain faces. All that we need is just select the boxes with a strong
confidence.
## Face recognition
Network is called OpenFace (project https://github.com/cmusatyalab/openface).
Face recognition model receives RGB face image of size `96x96`. Then it returns
`128`-dimensional unit vector that represents input face as a point on the unit
multidimensional sphere. So difference between two faces is an angle between two
output vectors.
## Sample
All the sample is an HTML page that has JavaScript code to use OpenCV.js functionality.
You may see an insertion of this page below. Press `Start` button to begin a demo.
Press `Add a person` to name a person that is recognized as an unknown one.
Next we'll discuss main parts of the code.
@htmlinclude js_face_recognition.html
-# Run face detection network to detect faces on input image.
@snippet dnn/js_face_recognition.html Run face detection model
You may play with input blob sizes to balance detection quality and efficiency.
The bigger input blob the smaller faces may be detected.
-# Run face recognition network to receive `128`-dimensional unit feature vector by input face image.
@snippet dnn/js_face_recognition.html Get 128 floating points feature vector
-# Perform a recognition.
@snippet dnn/js_face_recognition.html Recognize
Match a new feature vector with registered ones. Return a name of the best matched person.
-# The main loop.
@snippet dnn/js_face_recognition.html Define frames processing
A main loop of our application receives a frames from a camera and makes a recognition
of an every detected face on the frame. We start this function ones when OpenCV.js was
initialized and deep learning models were downloaded.
...@@ -25,6 +25,14 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn} ...@@ -25,6 +25,14 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn}
In this tutorial we describe the ways to schedule your networks using Halide backend in OpenCV deep learning module. In this tutorial we describe the ways to schedule your networks using Halide backend in OpenCV deep learning module.
- @subpage tutorial_dnn_android
*Compatibility:* \> OpenCV 3.3
*Author:* Dmitry Kurtaev
This tutorial will show you how to run deep learning model using OpenCV on Android device.
- @subpage tutorial_dnn_yolo - @subpage tutorial_dnn_yolo
*Compatibility:* \> OpenCV 3.3.1 *Compatibility:* \> OpenCV 3.3.1
...@@ -32,3 +40,11 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn} ...@@ -32,3 +40,11 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn}
*Author:* Alessandro de Oliveira Faria *Author:* Alessandro de Oliveira Faria
In this tutorial you will learn how to use opencv_dnn module using yolo_object_detection with device capture, video file or image. In this tutorial you will learn how to use opencv_dnn module using yolo_object_detection with device capture, video file or image.
- @subpage tutorial_dnn_javascript
*Compatibility:* \> OpenCV 3.3.1
*Author:* Dmitry Kurtaev
In this tutorial we'll run deep learning models in browser using OpenCV.js.
...@@ -15,7 +15,7 @@ set(the_description "Deep neural network module. It allows to load models from d ...@@ -15,7 +15,7 @@ set(the_description "Deep neural network module. It allows to load models from d
ocv_add_dispatched_file("layers/layers_common" AVX AVX2) ocv_add_dispatched_file("layers/layers_common" AVX AVX2)
ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java) ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java js)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo
-Wmissing-declarations -Wmissing-prototypes -Wmissing-declarations -Wmissing-prototypes
) )
......
...@@ -221,11 +221,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -221,11 +221,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
class CV_EXPORTS LRNLayer : public Layer class CV_EXPORTS LRNLayer : public Layer
{ {
public: public:
enum Type
{
CHANNEL_NRM,
SPATIAL_NRM
};
int type; int type;
int size; int size;
...@@ -238,14 +233,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -238,14 +233,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
class CV_EXPORTS PoolingLayer : public Layer class CV_EXPORTS PoolingLayer : public Layer
{ {
public: public:
enum Type
{
MAX,
AVE,
STOCHASTIC,
ROI
};
int type; int type;
Size kernel, stride, pad; Size kernel, stride, pad;
bool globalPooling; bool globalPooling;
...@@ -474,13 +461,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -474,13 +461,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
class CV_EXPORTS EltwiseLayer : public Layer class CV_EXPORTS EltwiseLayer : public Layer
{ {
public: public:
enum EltwiseOp
{
PROD = 0,
SUM = 1,
MAX = 2,
};
static Ptr<EltwiseLayer> create(const LayerParams &params); static Ptr<EltwiseLayer> create(const LayerParams &params);
}; };
......
...@@ -423,7 +423,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -423,7 +423,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
* @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames. * @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames.
* @param outBlobNames names for layers which outputs are needed to get * @param outBlobNames names for layers which outputs are needed to get
*/ */
CV_WRAP void forward(std::vector<std::vector<Mat> >& outputBlobs, void forward(std::vector<std::vector<Mat> >& outputBlobs,
const std::vector<String>& outBlobNames); const std::vector<String>& outBlobNames);
//TODO: //TODO:
...@@ -467,7 +467,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -467,7 +467,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
* @note If updating blob is not empty then @p blob must have the same shape, * @note If updating blob is not empty then @p blob must have the same shape,
* because network reshaping is not implemented yet. * because network reshaping is not implemented yet.
*/ */
CV_WRAP void setInput(const Mat &blob, const String& name = ""); CV_WRAP void setInput(InputArray blob, const String& name = "");
/** @brief Sets the new value for the learned param of the layer. /** @brief Sets the new value for the learned param of the layer.
* @param layer name or id of the layer. * @param layer name or id of the layer.
...@@ -733,7 +733,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -733,7 +733,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed. * If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
* @returns 4-dimansional Mat with NCHW dimensions order. * @returns 4-dimansional Mat with NCHW dimensions order.
*/ */
CV_EXPORTS_W Mat blobFromImage(const Mat& image, double scalefactor=1.0, const Size& size = Size(), CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(),
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true); const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
/** @brief Creates 4-dimensional blob from series of images. Optionally resizes and /** @brief Creates 4-dimensional blob from series of images. Optionally resizes and
* crops @p images from center, subtract @p mean values, scales values by @p scalefactor, * crops @p images from center, subtract @p mean values, scales values by @p scalefactor,
......
...@@ -84,11 +84,11 @@ static String toString(const T &v) ...@@ -84,11 +84,11 @@ static String toString(const T &v)
return ss.str(); return ss.str();
} }
Mat blobFromImage(const Mat& image, double scalefactor, const Size& size, Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
const Scalar& mean, bool swapRB, bool crop) const Scalar& mean, bool swapRB, bool crop)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
std::vector<Mat> images(1, image); std::vector<Mat> images(1, image.getMat());
return blobFromImages(images, scalefactor, size, mean, swapRB, crop); return blobFromImages(images, scalefactor, size, mean, swapRB, crop);
} }
...@@ -1910,7 +1910,7 @@ void Net::setInputsNames(const std::vector<String> &inputBlobNames) ...@@ -1910,7 +1910,7 @@ void Net::setInputsNames(const std::vector<String> &inputBlobNames)
impl->netInputLayer->setNames(inputBlobNames); impl->netInputLayer->setNames(inputBlobNames);
} }
void Net::setInput(const Mat &blob_, const String& name) void Net::setInput(InputArray blob, const String& name)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
...@@ -1930,6 +1930,7 @@ void Net::setInput(const Mat &blob_, const String& name) ...@@ -1930,6 +1930,7 @@ void Net::setInput(const Mat &blob_, const String& name)
ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
MatShape prevShape = shape(ld.outputBlobs[pin.oid]); MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
Mat blob_ = blob.getMat();
bool oldShape = prevShape == shape(blob_); bool oldShape = prevShape == shape(blob_);
if (oldShape) if (oldShape)
{ {
......
...@@ -52,22 +52,27 @@ namespace dnn ...@@ -52,22 +52,27 @@ namespace dnn
class EltwiseLayerImpl : public EltwiseLayer class EltwiseLayerImpl : public EltwiseLayer
{ {
public: public:
EltwiseOp op; enum EltwiseOp
{
PROD = 0,
SUM = 1,
MAX = 2,
} op;
std::vector<float> coeffs; std::vector<float> coeffs;
EltwiseLayerImpl(const LayerParams& params) EltwiseLayerImpl(const LayerParams& params)
{ {
setParamsFrom(params); setParamsFrom(params);
op = EltwiseLayer::SUM; op = SUM;
if (params.has("operation")) if (params.has("operation"))
{ {
String operation = params.get<String>("operation").toLowerCase(); String operation = params.get<String>("operation").toLowerCase();
if (operation == "prod") if (operation == "prod")
op = EltwiseLayer::PROD; op = PROD;
else if (operation == "sum") else if (operation == "sum")
op = EltwiseLayer::SUM; op = SUM;
else if (operation == "max") else if (operation == "max")
op = EltwiseLayer::MAX; op = MAX;
else else
CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\""); CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\"");
} }
...@@ -122,7 +127,7 @@ public: ...@@ -122,7 +127,7 @@ public:
int channels; int channels;
size_t planeSize; size_t planeSize;
EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(EltwiseLayer::PROD), nstripes(0), activ(0), channels(0), planeSize(0) {} EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {}
static void run(const Mat** srcs, int nsrcs, Mat& dst, static void run(const Mat** srcs, int nsrcs, Mat& dst,
const std::vector<float>& coeffs, EltwiseOp op, const std::vector<float>& coeffs, EltwiseOp op,
...@@ -150,7 +155,7 @@ public: ...@@ -150,7 +155,7 @@ public:
CV_Assert(dst.total() == dst.size[0] * p.channels * p.planeSize); CV_Assert(dst.total() == dst.size[0] * p.channels * p.planeSize);
bool simpleCoeffs = true; bool simpleCoeffs = true;
if( op == EltwiseLayer::SUM && !coeffs.empty() ) if( op == SUM && !coeffs.empty() )
{ {
CV_Assert( coeffs.size() == (size_t)nsrcs ); CV_Assert( coeffs.size() == (size_t)nsrcs );
...@@ -192,7 +197,7 @@ public: ...@@ -192,7 +197,7 @@ public:
const float* srcptr0 = srcs[0]->ptr<float>() + globalDelta; const float* srcptr0 = srcs[0]->ptr<float>() + globalDelta;
float* dstptr = dstptr0 + globalDelta; float* dstptr = dstptr0 + globalDelta;
if( op == EltwiseLayer::PROD ) if( op == PROD )
{ {
for( k = 1; k < n; k++ ) for( k = 1; k < n; k++ )
{ {
...@@ -204,7 +209,7 @@ public: ...@@ -204,7 +209,7 @@ public:
srcptr0 = (const float*)dstptr; srcptr0 = (const float*)dstptr;
} }
} }
else if( op == EltwiseLayer::MAX ) else if( op == MAX )
{ {
for( k = 1; k < n; k++ ) for( k = 1; k < n; k++ )
{ {
......
...@@ -67,9 +67,9 @@ public: ...@@ -67,9 +67,9 @@ public:
type = -1; type = -1;
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS"); String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
if (nrmType == "ACROSS_CHANNELS") if (nrmType == "ACROSS_CHANNELS")
type = LRNLayer::CHANNEL_NRM; type = CHANNEL_NRM;
else if (nrmType == "WITHIN_CHANNEL") else if (nrmType == "WITHIN_CHANNEL")
type = LRNLayer::SPATIAL_NRM; type = SPATIAL_NRM;
else else
CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\""); CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
...@@ -397,6 +397,13 @@ public: ...@@ -397,6 +397,13 @@ public:
} }
return flops; return flops;
} }
private:
enum Type
{
CHANNEL_NRM,
SPATIAL_NRM
};
}; };
Ptr<LRNLayer> LRNLayer::create(const LayerParams& params) Ptr<LRNLayer> LRNLayer::create(const LayerParams& params)
......
...@@ -63,7 +63,7 @@ class PoolingLayerImpl : public PoolingLayer ...@@ -63,7 +63,7 @@ class PoolingLayerImpl : public PoolingLayer
public: public:
PoolingLayerImpl(const LayerParams& params) PoolingLayerImpl(const LayerParams& params)
{ {
type = PoolingLayer::MAX; type = MAX;
computeMaxIdx = true; computeMaxIdx = true;
globalPooling = false; globalPooling = false;
...@@ -71,11 +71,11 @@ public: ...@@ -71,11 +71,11 @@ public:
{ {
String pool = params.get<String>("pool").toLowerCase(); String pool = params.get<String>("pool").toLowerCase();
if (pool == "max") if (pool == "max")
type = PoolingLayer::MAX; type = MAX;
else if (pool == "ave") else if (pool == "ave")
type = PoolingLayer::AVE; type = AVE;
else if (pool == "stochastic") else if (pool == "stochastic")
type = PoolingLayer::STOCHASTIC; type = STOCHASTIC;
else else
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
...@@ -83,7 +83,7 @@ public: ...@@ -83,7 +83,7 @@ public:
} }
else if (params.has("pooled_w") || params.has("pooled_h") || params.has("spatial_scale")) else if (params.has("pooled_w") || params.has("pooled_h") || params.has("spatial_scale"))
{ {
type = PoolingLayer::ROI; type = ROI;
} }
setParamsFrom(params); setParamsFrom(params);
ceilMode = params.get<bool>("ceil_mode", true); ceilMode = params.get<bool>("ceil_mode", true);
...@@ -115,8 +115,7 @@ public: ...@@ -115,8 +115,7 @@ public:
{ {
return backendId == DNN_BACKEND_DEFAULT || return backendId == DNN_BACKEND_DEFAULT ||
backendId == DNN_BACKEND_HALIDE && haveHalide() && backendId == DNN_BACKEND_HALIDE && haveHalide() &&
(type == PoolingLayer::MAX || (type == MAX || type == AVE && !pad.width && !pad.height);
type == PoolingLayer::AVE && !pad.width && !pad.height);
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
...@@ -200,9 +199,9 @@ public: ...@@ -200,9 +199,9 @@ public:
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
{ {
if (type == PoolingLayer::MAX) if (type == MAX)
return initMaxPoolingHalide(inputs); return initMaxPoolingHalide(inputs);
else if (type == PoolingLayer::AVE) else if (type == AVE)
return initAvePoolingHalide(inputs); return initAvePoolingHalide(inputs);
else else
return Ptr<BackendNode>(); return Ptr<BackendNode>();
...@@ -221,7 +220,7 @@ public: ...@@ -221,7 +220,7 @@ public:
float spatialScale; float spatialScale;
PoolingInvoker() : src(0), rois(0), dst(0), mask(0), nstripes(0), PoolingInvoker() : src(0), rois(0), dst(0), mask(0), nstripes(0),
computeMaxIdx(0), poolingType(PoolingLayer::MAX), spatialScale(0) {} computeMaxIdx(0), poolingType(MAX), spatialScale(0) {}
static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel, static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel,
Size stride, Size pad, int poolingType, float spatialScale, Size stride, Size pad, int poolingType, float spatialScale,
...@@ -698,7 +697,7 @@ public: ...@@ -698,7 +697,7 @@ public:
out.height = 1; out.height = 1;
out.width = 1; out.width = 1;
} }
else if (type == PoolingLayer::ROI) else if (type == ROI)
{ {
out.height = pooledSize.height; out.height = pooledSize.height;
out.width = pooledSize.width; out.width = pooledSize.width;
...@@ -757,6 +756,14 @@ public: ...@@ -757,6 +756,14 @@ public:
} }
return flops; return flops;
} }
private:
enum Type
{
MAX,
AVE,
STOCHASTIC,
ROI
};
}; };
Ptr<PoolingLayer> PoolingLayer::create(const LayerParams& params) Ptr<PoolingLayer> PoolingLayer::create(const LayerParams& params)
......
...@@ -73,11 +73,13 @@ ...@@ -73,11 +73,13 @@
#include "opencv2/video/tracking.hpp" #include "opencv2/video/tracking.hpp"
#include "opencv2/video/background_segm.hpp" #include "opencv2/video/background_segm.hpp"
#include "opencv2/objdetect.hpp" #include "opencv2/objdetect.hpp"
#include "opencv2/dnn.hpp"
#include <emscripten/bind.h> #include <emscripten/bind.h>
using namespace emscripten; using namespace emscripten;
using namespace cv; using namespace cv;
using namespace dnn;
namespace binding_utils namespace binding_utils
{ {
...@@ -339,12 +341,12 @@ EMSCRIPTEN_BINDINGS(binding_utils) ...@@ -339,12 +341,12 @@ EMSCRIPTEN_BINDINGS(binding_utils)
.constructor<int, int, int, const Scalar&>() .constructor<int, int, int, const Scalar&>()
.constructor(&binding_utils::createMat, allow_raw_pointers()) .constructor(&binding_utils::createMat, allow_raw_pointers())
.class_function("eye", select_overload<Mat(int, int, int)>(&binding_utils::matEye))
.class_function("eye", select_overload<Mat(Size, int)>(&binding_utils::matEye)) .class_function("eye", select_overload<Mat(Size, int)>(&binding_utils::matEye))
.class_function("ones", select_overload<Mat(int, int, int)>(&binding_utils::matOnes)) .class_function("eye", select_overload<Mat(int, int, int)>(&binding_utils::matEye))
.class_function("ones", select_overload<Mat(Size, int)>(&binding_utils::matOnes)) .class_function("ones", select_overload<Mat(Size, int)>(&binding_utils::matOnes))
.class_function("zeros", select_overload<Mat(int, int, int)>(&binding_utils::matZeros)) .class_function("ones", select_overload<Mat(int, int, int)>(&binding_utils::matOnes))
.class_function("zeros", select_overload<Mat(Size, int)>(&binding_utils::matZeros)) .class_function("zeros", select_overload<Mat(Size, int)>(&binding_utils::matZeros))
.class_function("zeros", select_overload<Mat(int, int, int)>(&binding_utils::matZeros))
.property("rows", &cv::Mat::rows) .property("rows", &cv::Mat::rows)
.property("cols", &cv::Mat::cols) .property("cols", &cv::Mat::cols)
......
...@@ -125,6 +125,9 @@ video = {'': ['CamShift', 'calcOpticalFlowFarneback', 'calcOpticalFlowPyrLK', 'c ...@@ -125,6 +125,9 @@ video = {'': ['CamShift', 'calcOpticalFlowFarneback', 'calcOpticalFlowPyrLK', 'c
'BackgroundSubtractorMOG2': ['BackgroundSubtractorMOG2', 'apply'], 'BackgroundSubtractorMOG2': ['BackgroundSubtractorMOG2', 'apply'],
'BackgroundSubtractor': ['apply', 'getBackgroundImage']} 'BackgroundSubtractor': ['apply', 'getBackgroundImage']}
dnn = {'dnn_Net': ['setInput', 'forward'],
'': ['readNetFromCaffe', 'readNetFromTensorflow', 'readNetFromTorch', 'readNetFromDarknet', 'blobFromImage']}
def makeWhiteList(module_list): def makeWhiteList(module_list):
wl = {} wl = {}
for m in module_list: for m in module_list:
...@@ -135,7 +138,7 @@ def makeWhiteList(module_list): ...@@ -135,7 +138,7 @@ def makeWhiteList(module_list):
wl[k] = m[k] wl[k] = m[k]
return wl return wl
white_list = makeWhiteList([core, imgproc, objdetect, video]) white_list = makeWhiteList([core, imgproc, objdetect, video, dnn])
# Features to be exported # Features to be exported
export_enums = False export_enums = False
......
...@@ -134,7 +134,7 @@ class Builder: ...@@ -134,7 +134,7 @@ class Builder:
"-DBUILD_ZLIB=ON", "-DBUILD_ZLIB=ON",
"-DBUILD_opencv_apps=OFF", "-DBUILD_opencv_apps=OFF",
"-DBUILD_opencv_calib3d=OFF", "-DBUILD_opencv_calib3d=OFF",
"-DBUILD_opencv_dnn=OFF", "-DBUILD_opencv_dnn=ON",
"-DBUILD_opencv_features2d=OFF", "-DBUILD_opencv_features2d=OFF",
"-DBUILD_opencv_flann=OFF", "-DBUILD_opencv_flann=OFF",
"-DBUILD_opencv_ml=OFF", "-DBUILD_opencv_ml=OFF",
......
<!DOCTYPE html>
<html>
<head>
<script async src="../../opencv.js" type="text/javascript"></script>
<script src="../../utils.js" type="text/javascript"></script>
<script type='text/javascript'>
var netDet = undefined, netRecogn = undefined;
var persons = {};
//! [Run face detection model]
function detectFaces(img) {
var blob = cv.blobFromImage(img, 1, {width: 128, height: 96}, [104, 177, 123, 0], false, false);
netDet.setInput(blob);
var out = netDet.forward();
var faces = [];
for (var i = 0, n = out.data32F.length; i < n; i += 7) {
var confidence = out.data32F[i + 2];
var left = out.data32F[i + 3] * img.cols;
var top = out.data32F[i + 4] * img.rows;
var right = out.data32F[i + 5] * img.cols;
var bottom = out.data32F[i + 6] * img.rows;
left = Math.min(Math.max(0, left), img.cols - 1);
right = Math.min(Math.max(0, right), img.cols - 1);
bottom = Math.min(Math.max(0, bottom), img.rows - 1);
top = Math.min(Math.max(0, top), img.rows - 1);
if (confidence > 0.5 && left < right && top < bottom) {
faces.push({x: left, y: top, width: right - left, height: bottom - top})
}
}
blob.delete();
out.delete();
return faces;
};
//! [Run face detection model]
//! [Get 128 floating points feature vector]
function face2vec(face) {
var blob = cv.blobFromImage(face, 1.0 / 255, {width: 96, height: 96}, [0, 0, 0, 0], true, false)
netRecogn.setInput(blob);
var vec = netRecogn.forward();
blob.delete();
return vec;
};
//! [Get 128 floating points feature vector]
//! [Recognize]
function recognize(face) {
var vec = face2vec(face);
var bestMatchName = 'unknown';
var bestMatchScore = 0.5; // Actually, the minimum is -1 but we use it as a threshold.
for (name in persons) {
var personVec = persons[name];
var score = vec.dot(personVec);
if (score > bestMatchScore) {
bestMatchScore = score;
bestMatchName = name;
}
}
vec.delete();
return bestMatchName;
};
//! [Recognize]
function loadModels(callback) {
var utils = new Utils('');
var proto = 'https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt';
var weights = 'https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel';
var recognModel = 'https://raw.githubusercontent.com/pyannote/pyannote-data/master/openface.nn4.small2.v1.t7';
utils.createFileFromUrl('face_detector.prototxt', proto, () => {
document.getElementById('status').innerHTML = 'Downloading face_detector.caffemodel';
utils.createFileFromUrl('face_detector.caffemodel', weights, () => {
document.getElementById('status').innerHTML = 'Downloading OpenFace model';
utils.createFileFromUrl('face_recognition.t7', recognModel, () => {
document.getElementById('status').innerHTML = '';
netDet = cv.readNetFromCaffe('face_detector.prototxt', 'face_detector.caffemodel');
netRecogn = cv.readNetFromTorch('face_recognition.t7');
callback();
});
});
});
};
function main() {
// Create a camera object.
var output = document.getElementById('output');
var camera = document.createElement("video");
camera.setAttribute("width", output.width);
camera.setAttribute("height", output.height);
// Get a permission from user to use a camera.
navigator.mediaDevices.getUserMedia({video: true, audio: false})
.then(function(stream) {
camera.srcObject = stream;
camera.onloadedmetadata = function(e) {
camera.play();
};
});
//! [Open a camera stream]
var cap = new cv.VideoCapture(camera);
var frame = new cv.Mat(camera.height, camera.width, cv.CV_8UC4);
var frameBGR = new cv.Mat(camera.height, camera.width, cv.CV_8UC3);
//! [Open a camera stream]
//! [Add a person]
document.getElementById('addPersonButton').onclick = function() {
var rects = detectFaces(frameBGR);
if (rects.length > 0) {
var face = frameBGR.roi(rects[0]);
var name = prompt('Say your name:');
var cell = document.getElementById("targetNames").insertCell(0);
cell.innerHTML = name;
persons[name] = face2vec(face).clone();
var canvas = document.createElement("canvas");
canvas.setAttribute("width", 96);
canvas.setAttribute("height", 96);
var cell = document.getElementById("targetImgs").insertCell(0);
cell.appendChild(canvas);
var faceResized = new cv.Mat(canvas.height, canvas.width, cv.CV_8UC3);
cv.resize(face, faceResized, {width: canvas.width, height: canvas.height});
cv.cvtColor(faceResized, faceResized, cv.COLOR_BGR2RGB);
cv.imshow(canvas, faceResized);
faceResized.delete();
}
};
//! [Add a person]
//! [Define frames processing]
var isRunning = false;
const FPS = 30; // Target number of frames processed per second.
function captureFrame() {
var begin = Date.now();
cap.read(frame); // Read a frame from camera
cv.cvtColor(frame, frameBGR, cv.COLOR_RGBA2BGR);
var faces = detectFaces(frameBGR);
faces.forEach(function(rect) {
cv.rectangle(frame, {x: rect.x, y: rect.y}, {x: rect.x + rect.width, y: rect.y + rect.height}, [0, 255, 0, 255]);
var face = frameBGR.roi(rect);
var name = recognize(face);
cv.putText(frame, name, {x: rect.x, y: rect.y}, cv.FONT_HERSHEY_SIMPLEX, 1.0, [0, 255, 0, 255]);
});
cv.imshow(output, frame);
// Loop this function.
if (isRunning) {
var delay = 1000 / FPS - (Date.now() - begin);
setTimeout(captureFrame, delay);
}
};
//! [Define frames processing]
document.getElementById('startStopButton').onclick = function toggle() {
if (isRunning) {
isRunning = false;
document.getElementById('startStopButton').innerHTML = 'Start';
document.getElementById('addPersonButton').disabled = true;
} else {
function run() {
isRunning = true;
captureFrame();
document.getElementById('startStopButton').innerHTML = 'Stop';
document.getElementById('startStopButton').disabled = false;
document.getElementById('addPersonButton').disabled = false;
}
if (netDet == undefined || netRecogn == undefined) {
document.getElementById('startStopButton').disabled = true;
loadModels(run); // Load models and run a pipeline;
} else {
run();
}
}
};
document.getElementById('startStopButton').disabled = false;
};
</script>
</head>
<body onload="main()">
<button id="startStopButton" type="button" disabled="true">Start</button>
<div id="status"></div>
<canvas id="output" width=640 height=480 style="max-width: 100%"></canvas>
<table>
<tr id="targetImgs"></tr>
<tr id="targetNames"></tr>
</table>
<button id="addPersonButton" type="button" disabled="true">Add a person</button>
</body>
</html>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment