Merge branch 'master' of git://code.opencv.org/opencv

3180bbe3 · niko · 8eeacc8c · 27c2aa3a · 3180bbe3 · 3180bbe3
Commit 3180bbe3 authored Aug 03, 2012 by niko
45 changed files
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r2.2.0.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r2.2.0.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r2.3.3.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r2.3.3.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r3.0.1.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r3.0.1.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.0.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.0.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.3.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.3.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.1.1.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.1.1.so
--- a/3rdparty/lib/armeabi/libnative_camera_r2.2.0.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r2.2.0.so
--- a/3rdparty/lib/armeabi/libnative_camera_r2.3.3.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r2.3.3.so
--- a/3rdparty/lib/armeabi/libnative_camera_r3.0.1.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r3.0.1.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.0.0.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.0.0.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.0.3.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.0.3.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.1.1.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.1.1.so
--- a/3rdparty/lib/x86/libnative_camera_r2.3.3.so
+++ b/3rdparty/lib/x86/libnative_camera_r2.3.3.so
--- a/3rdparty/lib/x86/libnative_camera_r3.0.1.so
+++ b/3rdparty/lib/x86/libnative_camera_r3.0.1.so
--- a/3rdparty/lib/x86/libnative_camera_r4.0.3.so
+++ b/3rdparty/lib/x86/libnative_camera_r4.0.3.so
--- a/3rdparty/lib/x86/libnative_camera_r4.1.1.so
+++ b/3rdparty/lib/x86/libnative_camera_r4.1.1.so
--- a/doc/tutorials/introduction/android_binary_package/android_binary_package.rst
+++ b/doc/tutorials/introduction/android_binary_package/android_binary_package.rst
@@ -93,7 +93,6 @@ You need the following to be installed:
     But for successful compilation of some samples the **target** platform should be set to Android 3.0 (API 11) or higher. It will not prevent them from running on  Android 2.2.
     .. image:: images/android_sdk_and_avd_manager.png
-        :height: 500px
        :alt: Android SDK Manager
        :align: center
@@ -330,7 +329,6 @@ Well, running samples from Eclipse is very simple:
 * Here is ``Tutorial 2 - Use OpenCV Camera`` sample, running on top of stock camera-preview of the emulator.
  .. image:: images/emulator_canny.png
-     :height: 600px
     :alt: Tutorial 1 Basic - 1. Add OpenCV - running Canny
     :align: center

--- a/doc/tutorials/introduction/android_binary_package/images/emulator_canny.png
+++ b/doc/tutorials/introduction/android_binary_package/images/emulator_canny.png
--- a/modules/androidcamera/camera_wrapper/CMakeLists.txt
+++ b/modules/androidcamera/camera_wrapper/CMakeLists.txt
@@ -4,18 +4,27 @@ project(${the_target})
 link_directories("${ANDROID_SOURCE_TREE}/out/target/product/generic/system/lib")
-INCLUDE_DIRECTORIES(BEFORE
+if (ANDROID_VERSION VERSION_LESS "4.1")
+    INCLUDE_DIRECTORIES(BEFORE
 	${ANDROID_SOURCE_TREE}
 	${ANDROID_SOURCE_TREE}/frameworks/base/include/ui
 	${ANDROID_SOURCE_TREE}/frameworks/base/include/surfaceflinger
 	${ANDROID_SOURCE_TREE}/frameworks/base/include/camera
 	${ANDROID_SOURCE_TREE}/frameworks/base/include/media
-	${ANDROID_SOURCE_TREE}/frameworks/base/include/camera
 	${ANDROID_SOURCE_TREE}/frameworks/base/include
 	${ANDROID_SOURCE_TREE}/system/core/include
 	${ANDROID_SOURCE_TREE}/hardware/libhardware/include
 	${ANDROID_SOURCE_TREE}/frameworks/base/native/include
 	)
+else()
+    INCLUDE_DIRECTORIES(BEFORE
+	${ANDROID_SOURCE_TREE}
+	${ANDROID_SOURCE_TREE}/frameworks/native/include
+	${ANDROID_SOURCE_TREE}/frameworks/av/include
+	${ANDROID_SOURCE_TREE}/system/core/include
+	${ANDROID_SOURCE_TREE}/hardware/libhardware/include
+	)
+endif()
 set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)

--- a/modules/androidcamera/camera_wrapper/camera_wrapper.cpp
+++ b/modules/androidcamera/camera_wrapper/camera_wrapper.cpp
-#if !defined(ANDROID_r2_2_0) && !defined(ANDROID_r2_3_3) && !defined(ANDROID_r3_0_1) && !defined(ANDROID_r4_0_0) && !defined(ANDROID_r4_0_3)
+#if !defined(ANDROID_r2_2_0) && !defined(ANDROID_r2_3_3) && !defined(ANDROID_r3_0_1) && !defined(ANDROID_r4_0_0) && !defined(ANDROID_r4_0_3) && !defined(ANDROID_r4_1_1)
 # error Building camera wrapper for your version of Android is not supported by OpenCV. You need to modify OpenCV sources in order to compile camera wrapper for your version of Android.
 #endif
@@ -12,13 +12,18 @@
 #include "camera_wrapper.h"
 #include "../include/camera_properties.h"
-#if defined(ANDROID_r3_0_1) || defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3)
+#if defined(ANDROID_r3_0_1) || defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1)
 //Include SurfaceTexture.h file with the SurfaceTexture class
 # include <gui/SurfaceTexture.h>
 # define MAGIC_OPENCV_TEXTURE_ID (0x10)
 #else // defined(ANDROID_r3_0_1) || defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3)
 //TODO: This is either 2.2 or 2.3. Include the headers for ISurface.h access
+#if defined(ANDROID_r4_1_1)
+#include <gui/ISurface.h>
+#include <gui/BufferQueue.h>
+#else
 # include <surfaceflinger/ISurface.h>
+#endif  // defined(ANDROID_r4_1_1)
 #endif  // defined(ANDROID_r3_0_1) || defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3)
 #include <string>
@@ -53,6 +58,21 @@
 using namespace android;
+void debugShowFPS();
+#if defined(ANDROID_r4_1_1)
+class ConsumerListenerStub: public BufferQueue::ConsumerListener
+{
+public:
+    virtual void onFrameAvailable()
+    {
+    }
+    virtual void onBuffersReleased()
+    {
+    }
+};
+#endif
 void debugShowFPS()
 {
    static int mFrameCount = 0;
@@ -260,8 +280,8 @@ public:
    }
    virtual void postData(int32_t msgType, const sp<IMemory>& dataPtr
-#if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3)
+#if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1)
-                          ,camera_frame_metadata_t* metadata
+                          ,camera_frame_metadata_t*
 #endif
                          )
    {
@@ -506,9 +526,16 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
    pdstatus = camera->setPreviewTexture(surfaceTexture);
    if (pdstatus != 0)
        LOGE("initCameraConnect: failed setPreviewTexture call; camera migth not work correctly");
+#elif defined(ANDROID_r4_1_1)
+    sp<BufferQueue> bufferQueue = new BufferQueue();
+    sp<BufferQueue::ConsumerListener> queueListener = new ConsumerListenerStub();
+    bufferQueue->consumerConnect(queueListener);
+    pdstatus = camera->setPreviewTexture(bufferQueue);
+    if (pdstatus != 0)
+	LOGE("initCameraConnect: failed setPreviewTexture call; camera migth not work correctly");
 #endif
-#if !(defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3))
+#if (defined(ANDROID_r2_2_0) || defined(ANDROID_r2_3_3) || defined(ANDROID_r3_0_1))
 # if 1
    ////ATTENTION: switching between two versions: with and without copying memory inside Android OS
    //// see the method  CameraService::Client::copyFrameAndPostCopiedFrame and where it is used
@@ -520,6 +547,7 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
    camera->setPreviewCallbackFlags( CAMERA_FRAME_CALLBACK_FLAG_ENABLE_MASK | CAMERA_FRAME_CALLBACK_FLAG_COPY_OUT_MASK);//with copy
 #endif //!(defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3))
+    LOGD("Starting preview");
    status_t resStart = camera->startPreview();
    if (resStart != 0)
@@ -528,6 +556,10 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
        handler->closeCameraConnect();
        handler = 0;
    }
+    else
+    {
+	LOGD("Preview started successfully");
+    }
    return handler;
 }

--- a/modules/contrib/doc/facerec/facerec_api.rst
+++ b/modules/contrib/doc/facerec/facerec_api.rst
@@ -19,16 +19,19 @@ a unified access to all face recongition algorithms in OpenCV. ::
      // Trains a FaceRecognizer.
      virtual void train(InputArray src, InputArray labels) = 0;
+      // Updates a FaceRecognizer.
+      virtual void update(InputArrayOfArrays src, InputArray labels);
      // Gets a prediction from a FaceRecognizer.
      virtual int predict(InputArray src) const = 0;
      // Predicts the label and confidence for a given sample.
      virtual void predict(InputArray src, int &label, double &confidence) const = 0;
      // Serializes this object to a given filename.
      virtual void save(const string& filename) const;
      // Deserializes this object from a given filename.
      virtual void load(const string& filename);
@@ -39,6 +42,7 @@ a unified access to all face recongition algorithms in OpenCV. ::
      virtual void load(const FileStorage& fs) = 0;
  };
 Description
 +++++++++++
@@ -99,13 +103,6 @@ If you've set the threshold to ``0.0`` as we did above, then:
 is going to yield ``-1`` as predicted label, which states this face is unknown.
-Adding new samples to a trained FaceRecognizer
-++++++++++++++++++++++++++++++++++++++++++++++
-Adding new images to a trained :ocv:class:`FaceRecognizer` is possible, but only if the :ocv:class:`FaceRecognizer` supports it. For the Eigenfaces and Fisherfaces method each call to :ocv:func:`FaceRecognizer::train` empties the old model and estimates a new model on the given data. This is an algorithmic necessity for these two algorithms, no way around that. Please see the tutorial Guide To Face Recognition with OpenCV for details. If you call :ocv:func:`FaceRecognizer::train` on a LBPH model, the internal model is extended with the new samples.
-Please note: A :ocv:class:`FaceRecognizer` does not store your training images (this would be very memory intense), the caller is responsible for maintaining the dataset.
 Getting the name of a FaceRecognizer
 +++++++++++++++++++++++++++++++++++++
@@ -164,6 +161,50 @@ And finally train it on the given dataset (the face images and labels):
    //
    model->train(images, labels);
+FaceRecognizer::update
+----------------------
+Updates a FaceRecognizer with given data and associated labels.
+.. ocv:function:: void FaceRecognizer::update(InputArray src, InputArray labels)
+    :param src: The training images, that means the faces you want to learn. The data has to be given as a ``vector<Mat>``.
+    :param labels: The labels corresponding to the images have to be given either as a ``vector<int>`` or a
+This method updates a (probably trained) :ocv:class:`FaceRecognizer`, but only if the algorithm supports it. The Local Binary Patterns Histograms (LBPH) recognizer (see :ocv:func:`createLBPHFaceRecognizer`) can be updated. For the Eigenfaces and Fisherfaces method, this is algorithmically not possible and you have to re-estimate the model with :ocv:func:`FaceRecognizer::train`. In any case, a call to train empties the existing model and learns a new model, while update does not delete any model data.
+.. code-block:: cpp
+    // Create a new LBPH model (it can be updated) and use the default parameters,
+    // this is the most common usage of this specific FaceRecognizer:
+    //
+    Ptr<FaceRecognizer> model =  createLBPHFaceRecognizer();
+    // This is the common interface to train all of the available cv::FaceRecognizer
+    // implementations:
+    //
+    model->train(images, labels);
+    // Some containers to hold new image:
+    vector<Mat> newImages;
+    vector<int> newLabels;
+    // You should add some images to the containers:
+    //
+    // ...
+    //
+    // Now updating the model is as easy as calling:
+    model->update(newImages,newLabels);
+    // This will preserve the old model data and extend the existing model 
+    // with the new features extracted from newImages!
+Calling update on an Eigenfaces model (see :ocv:func:`createEigenFaceRecognizer`), which doesn't support updating, will throw an error similar to:
+.. code-block:: none
+    OpenCV Error: The function/feature is not implemented (This FaceRecognizer (FaceRecognizer.Eigenfaces) does not support updating, you have to use FaceRecognizer::train to update it.) in update, file /home/philipp/git/opencv/modules/contrib/src/facerec.cpp, line 305
+    terminate called after throwing an instance of 'cv::Exception'
+Please note: The :ocv:class:`FaceRecognizer` does not store your training images, because this would be very memory intense and it's not the responsibility of te :ocv:class:`FaceRecognizer` to do so. The caller is responsible for maintaining the dataset, he want to work with. 
 FaceRecognizer::predict
 -----------------------
@@ -176,8 +217,6 @@ FaceRecognizer::predict
    :param label: The predicted label for the given image.
    :param confidence: Associated confidence (e.g. distance) for the predicted label.
 The suffix ``const`` means that prediction does not affect the internal model
 state, so the method can be safely called from within different threads.
@@ -260,7 +299,7 @@ Notes:
 * Training and prediction must be done on grayscale images, use :ocv:func:`cvtColor` to convert between the color spaces.
 * **THE EIGENFACES METHOD MAKES THE ASSUMPTION, THAT THE TRAINING AND TEST IMAGES ARE OF EQUAL SIZE.** (caps-lock, because I got so many mails asking for this). You have to make sure your input data has the correct shape, else a meaningful exception is thrown. Use :ocv:func:`resize` to resize the images.
-* A call to :ocv:func:`FaceRecognizer::train` empties the Eigenfaces model and re-estimates a model on given data.
+* This model does not support updating.
 Model internal data:
 ++++++++++++++++++++
@@ -287,7 +326,7 @@ Notes:
 * Training and prediction must be done on grayscale images, use :ocv:func:`cvtColor` to convert between the color spaces.
 * **THE FISHERFACES METHOD MAKES THE ASSUMPTION, THAT THE TRAINING AND TEST IMAGES ARE OF EQUAL SIZE.** (caps-lock, because I got so many mails asking for this). You have to make sure your input data has the correct shape, else a meaningful exception is thrown. Use :ocv:func:`resize` to resize the images.
-* A call to :ocv:func:`FaceRecognizer::train` empties the Fisherfaces model and re-estimates a model on given data.
+* This model does not support updating.
 Model internal data:
 ++++++++++++++++++++
@@ -316,7 +355,7 @@ Notes:
 ++++++
 * The Circular Local Binary Patterns (used in training and prediction) expect the data given as grayscale images, use :ocv:func:`cvtColor` to convert between the color spaces.
-* A call to :ocv:func:`FaceRecognizer::train` extends the LBPH model with given data.
+* This model supports updating.
 Model internal data:
 ++++++++++++++++++++

--- a/modules/contrib/include/opencv2/contrib/contrib.hpp
+++ b/modules/contrib/include/opencv2/contrib/contrib.hpp
@@ -927,6 +927,9 @@ namespace cv
        // Trains a FaceRecognizer.
        CV_WRAP virtual void train(InputArrayOfArrays src, InputArray labels) = 0;
+        // Updates a FaceRecognizer.
+        CV_WRAP virtual void update(InputArrayOfArrays src, InputArray labels);
        // Gets a prediction from a FaceRecognizer.
        virtual int predict(InputArray src) const = 0;

--- a/modules/contrib/src/facerec.cpp
+++ b/modules/contrib/src/facerec.cpp
 /*
- * Copyright (c) 2011. Philipp Wagner <bytefish[at]gmx[dot]de>.
+ * Copyright (c) 2011,2012. Philipp Wagner <bytefish[at]gmx[dot]de>.
 * Released to public domain under terms of the BSD Simplified license.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -197,10 +197,10 @@ public:
    void predict(InputArray _src, int &label, double &dist) const;
    // See FaceRecognizer::load.
-    virtual void load(const FileStorage& fs);
+    void load(const FileStorage& fs);
    // See FaceRecognizer::save.
-    virtual void save(FileStorage& fs) const;
+    void save(FileStorage& fs) const;
    AlgorithmInfo* info() const;
 };
@@ -223,6 +223,12 @@ private:
    vector<Mat> _histograms;
    Mat _labels;
+    // Computes a LBPH model with images in src and
+    // corresponding labels in labels, possibly preserving
+    // old model data.
+    void train(InputArrayOfArrays src, InputArray labels, bool preserveData);
 public:
    using FaceRecognizer::save;
    using FaceRecognizer::load;
@@ -265,6 +271,10 @@ public:
    // corresponding labels in labels.
    void train(InputArrayOfArrays src, InputArray labels);
+    // Updates this LBPH model with images in src and
+    // corresponding labels in labels.
+    void update(InputArrayOfArrays src, InputArray labels);
    // Predicts the label of a query image in src.
    int predict(InputArray src) const;
@@ -290,6 +300,11 @@ public:
 //------------------------------------------------------------------------------
 // FaceRecognizer
 //------------------------------------------------------------------------------
+void FaceRecognizer::update(InputArrayOfArrays, InputArray) {
+    string error_msg = format("This FaceRecognizer (%s) does not support updating, you have to use FaceRecognizer::train to update it.", this->name().c_str());
+    CV_Error(CV_StsNotImplemented, error_msg);
+}
 void FaceRecognizer::save(const string& filename) const {
    FileStorage fs(filename, FileStorage::WRITE);
    if (!fs.isOpened())
@@ -563,7 +578,6 @@ void olbp_(InputArray _src, OutputArray _dst) {
    }
 }
 //------------------------------------------------------------------------------
 // cv::elbp
 //------------------------------------------------------------------------------
@@ -607,15 +621,19 @@ inline void elbp_(InputArray _src, OutputArray _dst, int radius, int neighbors)
 static void elbp(InputArray src, OutputArray dst, int radius, int neighbors)
 {
-    switch (src.type()) {
+    int type = src.type();
-        case CV_8SC1:   elbp_<char>(src,dst, radius, neighbors); break;
+    switch (type) {
-        case CV_8UC1:   elbp_<unsigned char>(src, dst, radius, neighbors); break;
+    case CV_8SC1:   elbp_<char>(src,dst, radius, neighbors); break;
-        case CV_16SC1:  elbp_<short>(src,dst, radius, neighbors); break;
+    case CV_8UC1:   elbp_<unsigned char>(src, dst, radius, neighbors); break;
-        case CV_16UC1:  elbp_<unsigned short>(src,dst, radius, neighbors); break;
+    case CV_16SC1:  elbp_<short>(src,dst, radius, neighbors); break;
-        case CV_32SC1:  elbp_<int>(src,dst, radius, neighbors); break;
+    case CV_16UC1:  elbp_<unsigned short>(src,dst, radius, neighbors); break;
-        case CV_32FC1:  elbp_<float>(src,dst, radius, neighbors); break;
+    case CV_32SC1:  elbp_<int>(src,dst, radius, neighbors); break;
-        case CV_64FC1:  elbp_<double>(src,dst, radius, neighbors); break;
+    case CV_32FC1:  elbp_<float>(src,dst, radius, neighbors); break;
-        default: break;
+    case CV_64FC1:  elbp_<double>(src,dst, radius, neighbors); break;
+    default:
+        string error_msg = format("Using Original Local Binary Patterns for feature extraction only works on single-channel images (given %d). Please pass the image data as a grayscale image!", type);
+        CV_Error(CV_StsNotImplemented, error_msg);
+        break;
    }
 }
@@ -727,28 +745,45 @@ void LBPH::save(FileStorage& fs) const {
    fs << "labels" << _labels;
 }
-void LBPH::train(InputArrayOfArrays _src, InputArray _lbls) {
+void LBPH::train(InputArrayOfArrays _in_src, InputArray _in_labels) {
-    if(_src.kind() != _InputArray::STD_VECTOR_MAT && _src.kind() != _InputArray::STD_VECTOR_VECTOR) {
+    this->train(_in_src, _in_labels, false);
+}
+void LBPH::update(InputArrayOfArrays _in_src, InputArray _in_labels) {
+    // got no data, just return
+    if(_in_src.total() == 0)
+        return;
+    this->train(_in_src, _in_labels, true);
+}
+void LBPH::train(InputArrayOfArrays _in_src, InputArray _in_labels, bool preserveData) {
+    if(_in_src.kind() != _InputArray::STD_VECTOR_MAT && _in_src.kind() != _InputArray::STD_VECTOR_VECTOR) {
        string error_message = "The images are expected as InputArray::STD_VECTOR_MAT (a std::vector<Mat>) or _InputArray::STD_VECTOR_VECTOR (a std::vector< vector<...> >).";
        CV_Error(CV_StsBadArg, error_message);
    }
-    if(_src.total() == 0) {
+    if(_in_src.total() == 0) {
        string error_message = format("Empty training data was given. You'll need more than one sample to learn a model.");
        CV_Error(CV_StsUnsupportedFormat, error_message);
-    } else if(_lbls.getMat().type() != CV_32SC1) {
+    } else if(_in_labels.getMat().type() != CV_32SC1) {
-        string error_message = format("Labels must be given as integer (CV_32SC1). Expected %d, but was %d.", CV_32SC1, _lbls.type());
+        string error_message = format("Labels must be given as integer (CV_32SC1). Expected %d, but was %d.", CV_32SC1, _in_labels.type());
        CV_Error(CV_StsUnsupportedFormat, error_message);
    }
    // get the vector of matrices
    vector<Mat> src;
-    _src.getMatVector(src);
+    _in_src.getMatVector(src);
    // get the label matrix
-    Mat labels = _lbls.getMat();
+    Mat labels = _in_labels.getMat();
    // check if data is well- aligned
    if(labels.total() != src.size()) {
        string error_message = format("The number of samples (src) must equal the number of labels (labels). Was len(samples)=%d, len(labels)=%d.", src.size(), _labels.total());
        CV_Error(CV_StsBadArg, error_message);
    }
+    // if this model should be trained without preserving old data, delete old model data
+    if(!preserveData) {
+        _labels.release();
+        _histograms.clear();
+    }
    // append labels to _labels matrix
    for(size_t labelIdx = 0; labelIdx < labels.total(); labelIdx++) {
        _labels.push_back(labels.at<int>((int)labelIdx));

--- a/modules/core/include/opencv2/core/parallel_tool.hpp
+++ b/modules/core/include/opencv2/core/parallel_tool.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#ifndef __OPENCV_PARALLEL_TOOL_HPP__
+#define __OPENCV_PARALLEL_TOOL_HPP__
+#ifdef HAVE_CVCONFIG_H
+# include <cvconfig.h>
+#endif // HAVE_CVCONFIG_H
+/*
+    HAVE_TBB - using TBB
+    HAVE_GCD - using GCD
+    HAVE_OPENMP - using OpenMP
+    HAVE_CONCURRENCY - using visual studio 2010 concurrency
+*/
+#ifdef HAVE_TBB
+#  include "tbb/tbb_stddef.h"
+#  if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
+#    include "tbb/tbb.h"
+#    include "tbb/task.h"
+#    undef min
+#    undef max
+#  else
+#    undef HAVE_TBB
+#   endif // end TBB version
+#endif // HAVE_TBB
+#ifdef __cplusplus
+namespace cv
+{
+    // a base body class
+    class CV_EXPORTS ParallelLoopBody
+    {
+    public:
+        virtual void operator() (const Range& range) const = 0;
+        virtual ~ParallelLoopBody();
+    };
+    CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body);
+    template <typename Iterator, typename Body> inline
+    CV_EXPORTS void parallel_do_(Iterator first, Iterator last, const Body& body)
+    {
+#ifdef HAVE_TBB
+        tbb::parallel_do(first, last, body);
+#else
+        for ( ; first != last; ++first)
+            body(*first);
+#endif // HAVE_TBB
+    }
+    template <typename Body> inline
+    CV_EXPORTS void parallel_reduce_(const Range& range, Body& body)
+    {
+#ifdef HAVE_TBB
+        tbb::parallel_reduce(tbb::blocked_range<int>(range.start, range.end), body);
+#else
+        body(range);
+#endif // end HAVE_TBB
+    }
+} // namespace cv
+#endif // __cplusplus
+#endif // __OPENCV_PARALLEL_TOOL_HPP__
--- a/modules/core/src/parallel_tool.cpp
+++ b/modules/core/src/parallel_tool.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "precomp.hpp"
+#ifdef HAVE_CONCURRENCY
+#  include <ppl.h>
+#elif defined HAVE_OPENMP
+#  include <omp.h>
+#elif defined HAVE_GCD
+#  include <dispatch/dispatch.h>
+#endif // HAVE_CONCURRENCY
+namespace cv
+{
+    ParallelLoopBody::~ParallelLoopBody() { }
+#ifdef HAVE_TBB
+    class TbbProxyLoopBody
+    {
+    public:
+        TbbProxyLoopBody(const ParallelLoopBody& _body) :
+            body(&_body)
+        { }
+        void operator ()(const tbb::blocked_range<int>& range) const
+        {
+            body->operator()(Range(range.begin(), range.end()));
+        }
+    private:
+        const ParallelLoopBody* body;
+    };
+#endif // end HAVE_TBB
+#ifdef HAVE_GCD
+    static
+    void block_function(void* context, size_t index)
+    {
+        ParallelLoopBody* ptr_body = static_cast<ParallelLoopBody*>(context);
+        ptr_body->operator()(Range(index, index + 1));
+    }
+#endif // HAVE_GCD
+    void parallel_for_(const Range& range, const ParallelLoopBody& body)
+    {
+#ifdef HAVE_TBB
+        tbb::parallel_for(tbb::blocked_range<int>(range.start, range.end), TbbProxyLoopBody(body));
+#elif defined HAVE_CONCURRENCY
+        Concurrency::parallel_for(range.start, range.end, body);
+#elif defined HAVE_OPENMP
+#pragma omp parallel for schedule(dynamic)
+        for (int i = range.start; i < range.end; ++i)
+            body(Range(i, i + 1));
+#elif defined (HAVE_GCD)
+        dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
+        dispatch_apply_f(range.end - range.start, concurrent_queue, &const_cast<ParallelLoopBody&>(body), block_function);
+#else
+        body(range);
+#endif // end HAVE_TBB
+    }
+} // namespace cv
--- a/modules/core/src/precomp.hpp
+++ b/modules/core/src/precomp.hpp
@@ -50,6 +50,7 @@
 #include "opencv2/core/core.hpp"
 #include "opencv2/core/core_c.h"
 #include "opencv2/core/internal.hpp"
+#include "opencv2/core/parallel_tool.hpp"
 #include <assert.h>
 #include <ctype.h>

--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@@ -221,39 +221,42 @@ static int countNonZero_(const T* src, int len )
    return nz;
 }
-template <> 
+static int countNonZero8u( const uchar* src, int len )
-int countNonZero_ <uchar> (const uchar* src, int len)
 {
-	int i=0, nz = 0;
+    int i=0, nz = 0;
-	#if (defined CV_SSE4_2 && CV_SSE4_2 && (_WIN64 || __amd64__))
+#if CV_SSE2
-	if(USE_SSE4_2)//5x-6x
+    if(USE_SSE2)//5x-6x
-	{
+    {
-		__m128i pattern = _mm_setzero_si128 ();
+        __m128i pattern = _mm_setzero_si128 ();
-		__m128i inv = _mm_set1_epi8((char)1); 
+        static uchar tab[256];
-		__int64 CV_DECL_ALIGNED(16) buf[2];
+        static volatile bool initialized = false;
-		for (; i<=len-16; i+=16)
+        if( !initialized )
-		{
+        {
-			__m128i r0 = _mm_lddqu_si128((const __m128i*)(src+i));
+            // we compute inverse popcount table,
-			__m128i res = _mm_cmpeq_epi8(r0, pattern);
+            // since we pass (img[x] == 0) mask as index in the table.
-			res =  _mm_add_epi8(res, inv);//11111111+1=00000000, 00000000+1=00000001
+            for( int j = 0; j < 256; j++ )
-			_mm_store_si128 ((__m128i*)buf, res);
+            {
+                int val = 0;
-			__int64 countLow = _mm_popcnt_u64(buf[0]);
+                for( int mask = 1; mask < 256; mask += mask )
-			nz += countLow;
+                    val += (j & mask) == 0;
+                tab[j] = (uchar)val;
-			__int64 countHigh = _mm_popcnt_u64(buf[1]);
+            }
-			nz +=countHigh;
+            initialized = true;
-		}
+        }
-	}
-	#endif
+        for (; i<=len-16; i+=16)
-	for( ; i < len; i++ )
+        {
-		nz += src[i] != 0;
+            __m128i r0 = _mm_loadu_si128((const __m128i*)(src+i));
+            int val = _mm_movemask_epi8(_mm_cmpeq_epi8(r0, pattern));
+            nz += tab[val & 255] + tab[val >> 8];
+        }
+    }
+#endif
+    for( ; i < len; i++ )
+        nz += src[i] != 0;
    return nz;
 }
-static int countNonZero8u( const uchar* src, int len )
-{ return countNonZero_(src, len); }
 static int countNonZero16u( const ushort* src, int len )
 { return countNonZero_(src, len); }

--- a/modules/features2d/include/opencv2/features2d/features2d.hpp
+++ b/modules/features2d/include/opencv2/features2d/features2d.hpp
@@ -473,12 +473,18 @@ protected:
 //! detects corners using FAST algorithm by E. Rosten
 CV_EXPORTS void FAST( InputArray image, CV_OUT vector<KeyPoint>& keypoints,
-                      int threshold, bool nonmaxSupression=true );
+                      int threshold, bool nonmaxSupression=true, int type = 2 );
 class CV_EXPORTS_W FastFeatureDetector : public FeatureDetector
 {
 public:
-    CV_WRAP FastFeatureDetector( int threshold=10, bool nonmaxSuppression=true );
+    enum
+    {
+      TYPE_5_8 = 0, TYPE_7_12 = 1, TYPE_9_16 = 2
+    };
+    CV_WRAP FastFeatureDetector( int threshold=10, bool nonmaxSuppression=true);
+    CV_WRAP FastFeatureDetector( int threshold, bool nonmaxSuppression, int type);
    AlgorithmInfo* info() const;
 protected:
@@ -486,6 +492,7 @@ protected:
    int threshold;
    bool nonmaxSuppression;
+    int type;
 };

--- a/modules/features2d/perf/perf_fast.cpp
+++ b/modules/features2d/perf/perf_fast.cpp
@@ -22,9 +22,13 @@ PERF_TEST_P(fast, detectForORB, testing::Values(FAST_IMAGES))
    declare.in(frame);
-    FastFeatureDetector fd(20, true);
+    FastFeatureDetector fd(20, true, FastFeatureDetector::TYPE_5_8);
    vector<KeyPoint> points;
    TEST_CYCLE() fd.detect(frame, points);
+    fd = FastFeatureDetector(20, true, FastFeatureDetector::TYPE_7_12);
+    TEST_CYCLE() fd.detect(frame, points);
+    fd = FastFeatureDetector(20, true, FastFeatureDetector::TYPE_9_16);
+    TEST_CYCLE() fd.detect(frame, points);
 }
--- a/modules/features2d/src/fast.cpp
+++ b/modules/features2d/src/fast.cpp
--- a/modules/features2d/src/features2d_init.cpp
+++ b/modules/features2d/src/features2d_init.cpp
@@ -58,7 +58,8 @@ CV_INIT_ALGORITHM(BriefDescriptorExtractor, "Feature2D.BRIEF",
 CV_INIT_ALGORITHM(FastFeatureDetector, "Feature2D.FAST",
                  obj.info()->addParam(obj, "threshold", obj.threshold);
-                  obj.info()->addParam(obj, "nonmaxSuppression", obj.nonmaxSuppression));
+                  obj.info()->addParam(obj, "nonmaxSuppression", obj.nonmaxSuppression);
+                  obj.info()->addParam(obj, "type", obj.type, FastFeatureDetector::TYPE_9_16));
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////

--- a/modules/features2d/test/test_fast.cpp
+++ b/modules/features2d/test/test_fast.cpp
@@ -58,6 +58,7 @@ CV_FastTest::~CV_FastTest() {}
 void CV_FastTest::run( int )
 {
+  for(int type=0; type <= 2; ++type) {
    Mat image1 = imread(string(ts->get_data_path()) + "inpaint/orig.jpg");
    Mat image2 = imread(string(ts->get_data_path()) + "cameracalibration/chess9.jpg");
    string xml = string(ts->get_data_path()) + "fast/result.xml";
@@ -74,8 +75,8 @@ void CV_FastTest::run( int )
    vector<KeyPoint> keypoints1;
    vector<KeyPoint> keypoints2;
-    FAST(gray1, keypoints1, 30);
+    FAST(gray1, keypoints1, 30, type);
-    FAST(gray2, keypoints2, 30);
+    FAST(gray2, keypoints2, 30, type);
    for(size_t i = 0; i < keypoints1.size(); ++i)
    {
@@ -109,17 +110,21 @@ void CV_FastTest::run( int )
    read( fs["exp_kps2"], exp_kps2, Mat() );
    fs.release();
+    // We only have testing data for 9_16 but it actually works equally well for 7_12
+    if ((type==1) || (type==2)){
    if ( 0 != norm(exp_kps1, kps1, NORM_L2) || 0 != norm(exp_kps2, kps2, NORM_L2))
    {
        ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH);
        return;
    }
+    }
- /*   cv::namedWindow("Img1"); cv::imshow("Img1", image1);
+    /*cv::namedWindow("Img1"); cv::imshow("Img1", image1);
    cv::namedWindow("Img2"); cv::imshow("Img2", image2);
    cv::waitKey(0);*/
+  }
-    ts->set_failed_test_info(cvtest::TS::OK);
+  ts->set_failed_test_info(cvtest::TS::OK);
 }
 TEST(Features2d_FAST, regression) { CV_FastTest test; test.safe_run(); }

--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -622,6 +622,9 @@ CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0,
 //!            channel order.
 CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null());
+//! Routines for correcting image color gamma
+CV_EXPORTS void gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward = true, Stream& stream = Stream::Null());
 //! applies fixed threshold to the image
 CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());
@@ -1411,7 +1414,7 @@ public:
 };
 ////////////////////////////////// CascadeClassifier_GPU //////////////////////////////////////////
 // The cascade classifier class for object detection: supports old haar and new lbp xlm formats and nvbin for haar cascades olny.
 class CV_EXPORTS CascadeClassifier_GPU
 {
 public:
@@ -1421,28 +1424,28 @@ public:
    bool empty() const;
    bool load(const std::string& filename);
    void release();
    /* returns number of detected objects */
    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
    bool findLargestObject;
    bool visualizeInPlace;
    Size getClassifierSize() const;
 private:
    struct CascadeClassifierImpl;
    CascadeClassifierImpl* impl;
    struct HaarCascade;
    struct LbpCascade;
    friend class CascadeClassifier_GPU_LBP;
 public:
    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
 };
 ////////////////////////////////// SURF //////////////////////////////////////////
 class CV_EXPORTS SURF_GPU
 {

--- a/modules/gpu/src/cascadeclassifier.cpp
+++ b/modules/gpu/src/cascadeclassifier.cpp
--- a/modules/gpu/src/color.cpp
+++ b/modules/gpu/src/color.cpp
--- a/modules/gpu/src/graphcuts.cpp
+++ b/modules/gpu/src/graphcuts.cpp
@@ -71,24 +71,32 @@ namespace
            return pState;
        }
-    private:        
+    private:
        NppiGraphcutState* pState;
    };
 }
 void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
 {
+#if (CUDA_VERSION < 5000)
+    CV_Assert(terminals.type() == CV_32S);
+#else
+    CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F);
+#endif
    Size src_size = terminals.size();
-    CV_Assert(terminals.type() == CV_32S);
    CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
-    CV_Assert(leftTransp.type() == CV_32S);
+    CV_Assert(leftTransp.type() == terminals.type());
    CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
-    CV_Assert(rightTransp.type() == CV_32S);
+    CV_Assert(rightTransp.type() == terminals.type());
    CV_Assert(top.size() == src_size);
-    CV_Assert(top.type() == CV_32S);
+    CV_Assert(top.type() == terminals.type());
    CV_Assert(bottom.size() == src_size);
-    CV_Assert(bottom.type() == CV_32S);
+    CV_Assert(bottom.type() == terminals.type());
    labels.create(src_size, CV_8U);
@@ -106,44 +114,61 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
    NppStreamHandler h(stream);
    NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcutInitAlloc);
+#if (CUDA_VERSION < 5000)
    nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
+#else
+    if (terminals.type() == CV_32S)
+    {
+        nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
+            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
+    }
+    else
+    {
+        nppSafeCall( nppiGraphcut_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(), top.ptr<Npp32f>(), bottom.ptr<Npp32f>(),
+            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
+    }
+#endif
    if (stream == 0)
        cudaSafeCall( cudaDeviceSynchronize() );
 }
-void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight, 
+void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
              GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, GpuMat& labels, GpuMat& buf, Stream& s)
 {
-    Size src_size = terminals.size();
+#if (CUDA_VERSION < 5000)
    CV_Assert(terminals.type() == CV_32S);
+#else
+    CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F);
+#endif
+    Size src_size = terminals.size();
    CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
-    CV_Assert(leftTransp.type() == CV_32S);
+    CV_Assert(leftTransp.type() == terminals.type());
    CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
-    CV_Assert(rightTransp.type() == CV_32S);
+    CV_Assert(rightTransp.type() == terminals.type());
    CV_Assert(top.size() == src_size);
-    CV_Assert(top.type() == CV_32S);
+    CV_Assert(top.type() == terminals.type());
    CV_Assert(topLeft.size() == src_size);
-    CV_Assert(topLeft.type() == CV_32S);
+    CV_Assert(topLeft.type() == terminals.type());
    CV_Assert(topRight.size() == src_size);
-    CV_Assert(topRight.type() == CV_32S);
+    CV_Assert(topRight.type() == terminals.type());
    CV_Assert(bottom.size() == src_size);
-    CV_Assert(bottom.type() == CV_32S);
+    CV_Assert(bottom.type() == terminals.type());
    CV_Assert(bottomLeft.size() == src_size);
-    CV_Assert(bottomLeft.type() == CV_32S);
+    CV_Assert(bottomLeft.type() == terminals.type());
    CV_Assert(bottomRight.size() == src_size);
-    CV_Assert(bottomRight.type() == CV_32S);
+    CV_Assert(bottomRight.type() == terminals.type());
    labels.create(src_size, CV_8U);
@@ -161,11 +186,28 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
    NppStreamHandler h(stream);
    NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcut8InitAlloc);
-    nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), 
+#if (CUDA_VERSION < 5000)
+    nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(),
        top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(),
        bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(),
        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
+#else
+    if (terminals.type() == CV_32S)
+    {
+        nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(),
+            top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(),
+            bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(),
+            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
+    }
+    else
+    {
+        nppSafeCall( nppiGraphcut8_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(),
+            top.ptr<Npp32f>(), topLeft.ptr<Npp32f>(), topRight.ptr<Npp32f>(),
+            bottom.ptr<Npp32f>(), bottomLeft.ptr<Npp32f>(), bottomRight.ptr<Npp32f>(),
+            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
+    }
+#endif
    if (stream == 0)
        cudaSafeCall( cudaDeviceSynchronize() );

--- a/modules/gpu/test/main.cpp
+++ b/modules/gpu/test/main.cpp
@@ -43,8 +43,6 @@
 #ifdef HAVE_CUDA
-#include <cuda_runtime_api.h>
 using namespace std;
 using namespace cv;
 using namespace cv::gpu;

--- a/modules/gpu/test/precomp.hpp
+++ b/modules/gpu/test/precomp.hpp
@@ -72,4 +72,9 @@
 #include "utility.hpp"
 #include "interpolation.hpp"
+#ifdef HAVE_CUDA
+    #include <cuda.h>
+    #include <cuda_runtime.h>
+#endif
 #endif
--- a/modules/gpu/test/test_color.cpp
+++ b/modules/gpu/test/test_color.cpp
@@ -1609,6 +1609,141 @@ TEST_P(CvtColor, RGBA2YUV4)
    EXPECT_MAT_NEAR(dst_gold, h_dst, 1e-5);
 }
+TEST_P(CvtColor, BGR2Lab)
+{
+    if (depth != CV_8U)
+        return;
+    try
+    {
+        cv::Mat src = readImage("stereobm/aloe-L.png");
+        cv::gpu::GpuMat dst_lab = createMat(src.size(), src.type(), useRoi);
+        cv::gpu::cvtColor(loadMat(src, useRoi), dst_lab, cv::COLOR_BGR2Lab);
+        cv::gpu::GpuMat dst_bgr = createMat(src.size(), src.type(), useRoi);
+        cv::gpu::cvtColor(dst_lab, dst_bgr, cv::COLOR_Lab2BGR);
+        EXPECT_MAT_NEAR(src, dst_bgr, 10);
+    }
+    catch (const cv::Exception& e)
+    {
+#if (CUDA_VERSION < 5000)
+        ASSERT_EQ(CV_StsBadFlag, e.code);
+#else
+        FAIL();
+#endif
+    }
+}
+TEST_P(CvtColor, RGB2Lab)
+{
+    if (depth != CV_8U)
+        return;
+    try
+    {
+        cv::Mat src = readImage("stereobm/aloe-L.png");
+        cv::gpu::GpuMat dst_lab = createMat(src.size(), src.type(), useRoi);
+        cv::gpu::cvtColor(loadMat(src, useRoi), dst_lab, cv::COLOR_RGB2Lab);
+        cv::gpu::GpuMat dst_bgr = createMat(src.size(), src.type(), useRoi);
+        cv::gpu::cvtColor(dst_lab, dst_bgr, cv::COLOR_Lab2RGB);
+        EXPECT_MAT_NEAR(src, dst_bgr, 10);
+    }
+    catch (const cv::Exception& e)
+    {
+#if (CUDA_VERSION < 5000)
+        ASSERT_EQ(CV_StsBadFlag, e.code);
+#else
+        FAIL();
+#endif
+    }
+}
+TEST_P(CvtColor, BGR2Luv)
+{
+    if (depth != CV_8U)
+        return;
+    try
+    {
+        cv::Mat src = img;
+        cv::gpu::GpuMat dst_luv = createMat(src.size(), src.type(), useRoi);
+        cv::gpu::cvtColor(loadMat(src, useRoi), dst_luv, cv::COLOR_BGR2Luv);
+        cv::gpu::GpuMat dst_rgb = createMat(src.size(), src.type(), useRoi);
+        cv::gpu::cvtColor(dst_luv, dst_rgb, cv::COLOR_Luv2BGR);
+        EXPECT_MAT_NEAR(src, dst_rgb, 10);
+    }
+    catch (const cv::Exception& e)
+    {
+#if (CUDA_VERSION < 5000)
+        ASSERT_EQ(CV_StsBadFlag, e.code);
+#else
+        FAIL();
+#endif
+    }
+}
+TEST_P(CvtColor, RGB2Luv)
+{
+    if (depth != CV_8U)
+        return;
+    try
+    {
+        cv::Mat src = img;
+        cv::gpu::GpuMat dst_luv = createMat(src.size(), src.type(), useRoi);
+        cv::gpu::cvtColor(loadMat(src, useRoi), dst_luv, cv::COLOR_RGB2Luv);
+        cv::gpu::GpuMat dst_rgb = createMat(src.size(), src.type(), useRoi);
+        cv::gpu::cvtColor(dst_luv, dst_rgb, cv::COLOR_Luv2RGB);
+        EXPECT_MAT_NEAR(src, dst_rgb, 10);
+    }
+    catch (const cv::Exception& e)
+    {
+#if (CUDA_VERSION < 5000)
+        ASSERT_EQ(CV_StsBadFlag, e.code);
+#else
+        FAIL();
+#endif
+    }
+}
+TEST_P(CvtColor, RGBA2mRGBA)
+{
+    if (depth != CV_8U)
+        return;
+    try
+    {
+        cv::Mat src = randomMat(size, CV_MAKE_TYPE(depth, 4));
+        cv::gpu::GpuMat dst = createMat(src.size(), src.type(), useRoi);
+        cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGBA2mRGBA);
+        cv::Mat dst_gold;
+        cv::cvtColor(src, dst_gold, cv::COLOR_RGBA2mRGBA);
+        EXPECT_MAT_NEAR(dst_gold, dst, 1);
+    }
+    catch (const cv::Exception& e)
+    {
+#if (CUDA_VERSION < 5000)
+        ASSERT_EQ(CV_StsBadFlag, e.code);
+#else
+        FAIL();
+#endif
+    }
+}
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CvtColor, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,

--- a/modules/imgproc/perf/perf_bilateral.cpp
+++ b/modules/imgproc/perf/perf_bilateral.cpp
+#include "perf_precomp.hpp"
+using namespace std;
+using namespace cv;
+using namespace perf;
+using namespace testing;
+using std::tr1::make_tuple;
+using std::tr1::get;
+CV_ENUM(Mat_Type, CV_8UC1, CV_8UC3, CV_32FC1, CV_32FC3)
+typedef TestBaseWithParam< tr1::tuple<Size, int, Mat_Type> > TestBilateralFilter;
+PERF_TEST_P( TestBilateralFilter, BilateralFilter,
+             Combine(
+                Values( szVGA, sz1080p ), // image size
+                Values( 3, 5 ), // d
+                ValuesIn( Mat_Type::all() ) // image type
+             )
+)
+{
+    Size sz;
+    int d, type;
+    const double sigmaColor = 1., sigmaSpace = 1.;
+    sz         = get<0>(GetParam());
+    d          = get<1>(GetParam());
+    type       = get<2>(GetParam());
+    Mat src(sz, type);
+    Mat dst(sz, type);
+    declare.in(src, WARMUP_RNG).out(dst).time(20);
+    TEST_CYCLE() bilateralFilter(src, dst, d, sigmaColor, sigmaSpace, BORDER_DEFAULT);
+    SANITY_CHECK(dst);
+}
--- a/modules/imgproc/src/precomp.hpp
+++ b/modules/imgproc/src/precomp.hpp
@@ -50,6 +50,7 @@
 #include "opencv2/imgproc/imgproc.hpp"
 #include "opencv2/imgproc/imgproc_c.h"
 #include "opencv2/core/internal.hpp"
+#include "opencv2/core/parallel_tool.hpp"
 #include <math.h>
 #include <assert.h>
 #include <string.h>

--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
--- a/modules/imgproc/test/test_bilateral_filter.cpp
+++ b/modules/imgproc/test/test_bilateral_filter.cpp
--- a/samples/python2/facerec_demo.py
+++ b/samples/python2/facerec_demo.py
 #!/usr/bin/env python
 # Software License Agreement (BSD License)
 #
-# Copyright (c) 2012, Philipp Wagner
+# Copyright (c) 2012, Philipp Wagner <bytefish[at]gmx[dot]de>.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without

--- a/samples/python2/feature_homography.py
+++ b/samples/python2/feature_homography.py
@@ -19,11 +19,8 @@ import numpy as np
 import cv2
 import video
 import common
-from operator import attrgetter
+from collections import namedtuple
+from common import getsize
-def get_size(a):
-    h, w = a.shape[:2]
-    return w, h
 FLANN_INDEX_KDTREE = 1
@@ -33,12 +30,29 @@ flann_params= dict(algorithm = FLANN_INDEX_LSH,
                   key_size = 12,     # 20
                   multi_probe_level = 1) #2
 MIN_MATCH_COUNT = 10
+ar_verts = np.float32([[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0],
+                       [0, 0, 1], [0, 1, 1], [1, 1, 1], [1, 0, 1], 
+                       [0.5, 0.5, 2]])
+ar_edges = [(0, 1), (1, 2), (2, 3), (3, 0), 
+            (4, 5), (5, 6), (6, 7), (7, 4),
+            (0, 4), (1, 5), (2, 6), (3, 7), 
+            (4, 8), (5, 8), (6, 8), (7, 8)]
+def draw_keypoints(vis, keypoints, color = (0, 255, 255)):
+    for kp in keypoints:
+            x, y = kp.pt
+            cv2.circle(vis, (int(x), int(y)), 2, color)
 class App:
    def __init__(self, src):
        self.cap = video.create_capture(src)
+        self.frame = None
+        self.paused = False
        self.ref_frame  = None
        self.detector = cv2.ORB( nfeatures = 1000 )
@@ -47,19 +61,18 @@ class App:
        cv2.namedWindow('plane')
        self.rect_sel = common.RectSelector('plane', self.on_rect)
-        self.frame = None
    def match_frames(self):
        if len(self.frame_desc) < MIN_MATCH_COUNT or len(self.frame_desc) < MIN_MATCH_COUNT:
            return
-        raw_matches = self.matcher.knnMatch(self.ref_descs, trainDescriptors = self.frame_desc, k = 2)
+        raw_matches = self.matcher.knnMatch(self.frame_desc, k = 2)
        p0, p1 = [], []
        for m in raw_matches:
            if len(m) == 2 and m[0].distance < m[1].distance * 0.75:
                m = m[0]
-                p0.append( self.ref_points[m.queryIdx].pt )
+                p0.append( self.ref_points[m.trainIdx].pt )  # queryIdx
-                p1.append( self.frame_points[m.trainIdx].pt )
+                p1.append( self.frame_points[m.queryIdx].pt )
        p0, p1 = np.float32((p0, p1))
        if len(p0) < MIN_MATCH_COUNT:
            return
@@ -72,44 +85,31 @@ class App:
        return p0, p1, H
-    def on_frame(self, frame):
+    def on_frame(self, vis):
-        if self.frame is None or not self.rect_sel.dragging:
+        match = self.match_frames()
-            self.frame = frame = np.fliplr(frame).copy()
+        if match is None:
-            self.frame_points, self.frame_desc = self.detector.detectAndCompute(self.frame, None)
+            return
-            if self.frame_desc is None:  # detectAndCompute returns descs=None if not keypoints found
+        w, h = getsize(self.frame)
-                self.frame_desc = []
+        p0, p1, H = match
-        else:
+        for (x0, y0), (x1, y1) in zip(np.int32(p0), np.int32(p1)):
-            self.ref_frame = None
+            cv2.line(vis, (x0+w, y0), (x1, y1), (0, 255, 0))
+        x0, y0, x1, y1 = self.ref_rect
-        w, h = get_size(self.frame)
+        corners0 = np.float32([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
-        vis = np.zeros((h, w*2, 3), np.uint8)
+        img_corners = cv2.perspectiveTransform(corners0.reshape(1, -1, 2), H)
-        vis[:h,:w] = self.frame
+        cv2.polylines(vis, [np.int32(img_corners)], True, (255, 255, 255), 2)
-        self.rect_sel.draw(vis)
-        for kp in self.frame_points:
+        corners3d = np.hstack([corners0, np.zeros((4, 1), np.float32)])
-            x, y = kp.pt
+        fx = 0.9
-            cv2.circle(vis, (int(x), int(y)), 2, (0, 255, 255))
+        K = np.float64([[fx*w, 0, 0.5*(w-1)],
+                        [0, fx*w, 0.5*(h-1)],
-        if self.ref_frame is not None:
+                        [0.0,0.0,      1.0]])
-            vis[:h,w:] = self.ref_frame
+        dist_coef = np.zeros(4)
-            x0, y0, x1, y1 = self.ref_rect
+        ret, rvec, tvec = cv2.solvePnP(corners3d, img_corners, K, dist_coef)
-            cv2.rectangle(vis, (x0+w, y0), (x1+w, y1), (0, 255, 0), 2)
+        verts = ar_verts * [(x1-x0), (y1-y0), -(x1-x0)*0.3] + (x0, y0, 0)
+        verts = cv2.projectPoints(verts, rvec, tvec, K, dist_coef)[0].reshape(-1, 2)
-            for kp in self.ref_points:
+        for i, j in ar_edges:
-                x, y = kp.pt
+            (x0, y0), (x1, y1) = verts[i], verts[j]
-                cv2.circle(vis, (int(x+w), int(y)), 2, (0, 255, 255))
+            cv2.line(vis, (int(x0), int(y0)), (int(x1), int(y1)), (255, 255, 0), 2)
-            match = self.match_frames()
-            if match is not None:
-                p0, p1, H = match
-                for (x0, y0), (x1, y1) in zip(np.int32(p0), np.int32(p1)):
-                    cv2.line(vis, (x0+w, y0), (x1, y1), (0, 255, 0))
-                x0, y0, x1, y1 = self.ref_rect
-                corners = np.float32([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
-                corners = np.int32( cv2.perspectiveTransform(corners.reshape(1, -1, 2), H) )
-                cv2.polylines(vis, [corners], True, (255, 255, 255), 2)
-        cv2.imshow('plane', vis)
    def on_rect(self, rect):
        x0, y0, x1, y1 = rect
@@ -123,11 +123,39 @@ class App:
                descs.append(desc)
        self.ref_points, self.ref_descs = points, np.uint8(descs)
+        self.matcher.clear()
+        self.matcher.add([self.ref_descs])
    def run(self):
        while True:
-            ret, frame = self.cap.read()
+            playing = not self.paused and not self.rect_sel.dragging
-            self.on_frame(frame)
+            if playing or self.frame is None:
+                ret, frame = self.cap.read()
+                if not ret:
+                    break
+                self.frame = np.fliplr(frame).copy()
+                self.frame_points, self.frame_desc = self.detector.detectAndCompute(self.frame, None)
+                if self.frame_desc is None:  # detectAndCompute returns descs=None if not keypoints found
+                    self.frame_desc = []
+            w, h = getsize(self.frame)
+            vis = np.zeros((h, w*2, 3), np.uint8)
+            vis[:h,:w] = self.frame
+            if self.ref_frame is not None:
+                vis[:h,w:] = self.ref_frame
+                x0, y0, x1, y1 = self.ref_rect
+                cv2.rectangle(vis, (x0+w, y0), (x1+w, y1), (0, 255, 0), 2)
+                draw_keypoints(vis[:,w:], self.ref_points)
+            draw_keypoints(vis, self.frame_points)
+            if playing and self.ref_frame is not None:
+                self.on_frame(vis)
+            self.rect_sel.draw(vis)
+            cv2.imshow('plane', vis)
            ch = cv2.waitKey(1)
+            if ch == ord(' '):
+                self.paused = not self.paused
            if ch == 27:
                break
@@ -136,5 +164,5 @@ if __name__ == '__main__':
    import sys
    try: video_src = sys.argv[1]
-    except: video_src = '0'
+    except: video_src = 0
    App(video_src).run()