Commit 5e0783e1 authored by Vishal Chiluka's avatar Vishal Chiluka Committed by Vishal Bhaskar Chiluka

NVIDIA Optical Flow Integration in OpenCV

parent f0d30f2c
...@@ -7,3 +7,22 @@ set(the_description "CUDA-accelerated Optical Flow") ...@@ -7,3 +7,22 @@ set(the_description "CUDA-accelerated Optical Flow")
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow) ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow)
ocv_define_module(cudaoptflow opencv_video opencv_optflow opencv_cudaarithm opencv_cudawarping opencv_cudaimgproc OPTIONAL opencv_cudalegacy WRAP python) ocv_define_module(cudaoptflow opencv_video opencv_optflow opencv_cudaarithm opencv_cudawarping opencv_cudaimgproc OPTIONAL opencv_cudalegacy WRAP python)
set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT "79c6cee80a2df9a196f20afd6b598a9810964c32")
set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_MD5 "ca5acedee6cb45d0ec610a6732de5c15")
set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH "${OpenCV_BINARY_DIR}/3rdparty/NVIDIAOpticalFlowSDK_1_0_Headers")
ocv_download(FILENAME "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT}.zip"
HASH ${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_MD5}
URL
"https://github.com/NVIDIA/NVIDIAOpticalFlowSDK/archive/"
DESTINATION_DIR "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH}"
STATUS NVIDIA_OPTICAL_FLOW_1_0_HEADERS_DOWNLOAD_SUCCESS
ID "NVIDIA_OPTICAL_FLOW"
UNPACK RELATIVE_URL)
if(NOT NVIDIA_OPTICAL_FLOW_1_0_HEADERS_DOWNLOAD_SUCCESS)
message(STATUS "Failed to download NVIDIA_Optical_Flow_1_0 Headers")
else()
add_definitions(-DHAVE_NVIDIA_OPTFLOW=1)
ocv_include_directories(SYSTEM "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH}/NVIDIAOpticalFlowSDK-${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT}")
endif()
\ No newline at end of file
...@@ -102,6 +102,47 @@ public: ...@@ -102,6 +102,47 @@ public:
OutputArray err = cv::noArray(), OutputArray err = cv::noArray(),
Stream& stream = Stream::Null()) = 0; Stream& stream = Stream::Null()) = 0;
}; };
/** @brief Base Interface for optical flow algorithms using NVIDIA Optical Flow SDK.
*/
class CV_EXPORTS_W NvidiaHWOpticalFlow : public Algorithm
{
public:
/** @brief Calculates Optical Flow using NVIDIA Optical Flow SDK.
* NVIDIA GPUs starting with Turing contain a dedicated hardware accelerator for computing optical flow vectors between pairs of images.
* The optical flow hardware accelerator generates block-based optical flow vectors.
* The size of the block depends on hardware in use, and can be queried using the function getGridSize().
* The block-based flow vectors generated by the hardware can be converted to dense representation (i.e. per-pixel flow vectors) using upSampler() helper function, if needed.
* The flow vectors are stored in CV_16SC2 format with x and y components of each flow vector in 16-bit signed fixed point representation S10.5.
@param inputImage Input image.
@param referenceImage Reference image of the same size and the same type as input image.
@param flow A buffer consisting of inputImage.Size() / getGridSize() flow vectors in CV_16SC2 format.
@param stream Stream for the asynchronous version.
@param hint Hint buffer if client provides external hints. Must have same size as flow buffer.
Caller can provide flow vectors as hints for optical flow calculation.
@param cost Cost buffer contains numbers indicating the confidence associated with each of the generated flow vectors.
Higher the cost, lower the confidence. Cost buffer is of type CV_32SC1.
@note
- Client must use critical sections around each calc() function if calling it from multiple threads.
*/
CV_WRAP virtual void calc(
InputArray inputImage,
InputArray referenceImage,
InputOutputArray flow,
Stream& stream = Stream::Null(),
InputArray hint = cv::noArray(),
OutputArray cost = cv::noArray()) = 0;
/** @brief Releases all buffers, contexts and device pointers.
*/
CV_WRAP virtual void collectGarbage() = 0;
/** @brief Returns grid size of output buffer as per the hardware's capability.
*/
CV_WRAP virtual int getGridSize() const = 0;
};
// //
// BroxOpticalFlow // BroxOpticalFlow
...@@ -342,6 +383,70 @@ public: ...@@ -342,6 +383,70 @@ public:
bool useInitialFlow = false); bool useInitialFlow = false);
}; };
//
// NvidiaOpticalFlow
//
/** @brief Class for computing the optical flow vectors between two images using NVIDIA Optical Flow hardware and Optical Flow SDK 1.0.
@note
- A sample application demonstrating the use of NVIDIA Optical Flow can be found at
opencv_source_code/samples/gpu/nvidia_optical_flow.cpp
- An example application comparing accuracy and performance of NVIDIA Optical Flow with other optical flow algorithms in OpenCV can be found at
opencv_source_code/samples/gpu/optical_flow.cpp
*/
class CV_EXPORTS_W NvidiaOpticalFlow_1_0 : public NvidiaHWOpticalFlow
{
public:
/**
* Supported optical flow performance levels.
*/
enum NVIDIA_OF_PERF_LEVEL
{
NV_OF_PERF_LEVEL_UNDEFINED,
NV_OF_PERF_LEVEL_SLOW = 5, /**< Slow perf level results in lowest performance and best quality */
NV_OF_PERF_LEVEL_MEDIUM = 10, /**< Medium perf level results in low performance and medium quality */
NV_OF_PERF_LEVEL_FAST = 20, /**< Fast perf level results in high performance and low quality */
NV_OF_PERF_LEVEL_MAX
};
/** @brief The NVIDIA optical flow hardware generates flow vectors at granularity gridSize, which can be queried via function getGridSize().
* Upsampler() helper function converts the hardware-generated flow vectors to dense representation (1 flow vector for each pixel)
* using nearest neighbour upsampling method.
@param flow Buffer of type CV_16FC2 containing flow vectors generated by calc().
@param width Width of the input image in pixels for which these flow vectors were generated.
@param height Height of the input image in pixels for which these flow vectors were generated.
@param gridSize Granularity of the optical flow vectors returned by calc() function. Can be queried using getGridSize().
@param upsampledFlow Buffer of type CV_32FC2, containing upsampled flow vectors, each flow vector for 1 pixel, in the pitch-linear layout.
*/
CV_WRAP virtual void upSampler(InputArray flow, int width, int height,
int gridSize, InputOutputArray upsampledFlow) = 0;
/** @brief Instantiate NVIDIA Optical Flow
@param width Width of input image in pixels.
@param height Height of input image in pixels.
@param perfPreset Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about presets.
Defaults to NV_OF_PERF_LEVEL_SLOW.
@param enableTemporalHints Optional parameter. Flag to enable temporal hints. When set to true, the hardware uses the flow vectors
generated in previous call to calc() as internal hints for the current call to calc().
Useful when computing flow vectors between successive video frames. Defaults to false.
@param enableExternalHints Optional Parameter. Flag to enable passing external hints buffer to calc(). Defaults to false.
@param enableCostBuffer Optional Parameter. Flag to enable cost buffer output from calc(). Defaults to false.
@param gpuId Optional parameter to select the GPU ID on which the optical flow should be computed. Useful in multi-GPU systems. Defaults to 0.
*/
CV_WRAP static Ptr<NvidiaOpticalFlow_1_0> create(
int width,
int height,
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL perfPreset
= cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW,
bool enableTemporalHints = false,
bool enableExternalHints = false,
bool enableCostBuffer = false,
int gpuId = 0);
};
//! @} //! @}
}} // namespace cv { namespace cuda { }} // namespace cv { namespace cuda {
......
...@@ -326,4 +326,57 @@ PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1, ...@@ -326,4 +326,57 @@ PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1,
} }
} }
//////////////////////////////////////////////////////
// NvidiaOpticalFlow_1_0
PERF_TEST_P(ImagePair, NvidiaOpticalFlow_1_0,
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
{
declare.time(10);
const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
const int width = frame0.size().width;
const int height = frame0.size().height;
const bool enableTemporalHints = false;
const bool enableExternalHints = false;
const bool enableCostBuffer = false;
const int gpuid = 0;
if (PERF_RUN_CUDA())
{
const cv::cuda::GpuMat d_frame0(frame0);
const cv::cuda::GpuMat d_frame1(frame1);
cv::cuda::GpuMat d_flow;
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height,
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST,
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid);
}
catch (const cv::Exception& e)
{
if(e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr)
throw SkipTestException("Current configuration is not supported");
throw;
}
TEST_CYCLE() d_nvof->calc(d_frame0, d_frame1, d_flow);
cv::cuda::GpuMat flow[2];
cv::cuda::split(d_flow, flow);
cv::cuda::GpuMat u = flow[0];
cv::cuda::GpuMat v = flow[1];
CUDA_SANITY_CHECK(u, 1e-10);
CUDA_SANITY_CHECK(v, 1e-10);
}
}
}} // namespace }} // namespace
#include <unordered_map>
#include <iostream>
#include <fstream>
#include <iomanip>
#include "opencv2/core.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/cudaoptflow.hpp"
#include "opencv2/cudaarithm.hpp"
#include "opencv2/video/tracking.hpp"
using namespace std;
using namespace cv;
using namespace cv::cuda;
//this function is taken from opencv/samples/gpu/optical_flow.cpp
inline bool isFlowCorrect(Point2f u)
{
return !cvIsNaN(u.x) && !cvIsNaN(u.y) && fabs(u.x) < 1e9 && fabs(u.y) < 1e9;
}
//this function is taken from opencv/samples/gpu/optical_flow.cpp
static Vec3b computeColor(float fx, float fy)
{
static bool first = true;
// relative lengths of color transitions:
// these are chosen based on perceptual similarity
// (e.g. one can distinguish more shades between red and yellow
// than between yellow and green)
const int RY = 15;
const int YG = 6;
const int GC = 4;
const int CB = 11;
const int BM = 13;
const int MR = 6;
const int NCOLS = RY + YG + GC + CB + BM + MR;
static Vec3i colorWheel[NCOLS];
if (first)
{
int k = 0;
for (int i = 0; i < RY; ++i, ++k)
colorWheel[k] = Vec3i(255, 255 * i / RY, 0);
for (int i = 0; i < YG; ++i, ++k)
colorWheel[k] = Vec3i(255 - 255 * i / YG, 255, 0);
for (int i = 0; i < GC; ++i, ++k)
colorWheel[k] = Vec3i(0, 255, 255 * i / GC);
for (int i = 0; i < CB; ++i, ++k)
colorWheel[k] = Vec3i(0, 255 - 255 * i / CB, 255);
for (int i = 0; i < BM; ++i, ++k)
colorWheel[k] = Vec3i(255 * i / BM, 0, 255);
for (int i = 0; i < MR; ++i, ++k)
colorWheel[k] = Vec3i(255, 0, 255 - 255 * i / MR);
first = false;
}
const float rad = sqrt(fx * fx + fy * fy);
const float a = atan2(-fy, -fx) / (float)CV_PI;
const float fk = (a + 1.0f) / 2.0f * (NCOLS - 1);
const int k0 = static_cast<int>(fk);
const int k1 = (k0 + 1) % NCOLS;
const float f = fk - k0;
Vec3b pix;
for (int b = 0; b < 3; b++)
{
const float col0 = colorWheel[k0][b] / 255.0f;
const float col1 = colorWheel[k1][b] / 255.0f;
float col = (1 - f) * col0 + f * col1;
if (rad <= 1)
col = 1 - rad * (1 - col); // increase saturation with radius
else
col *= .75; // out of range
pix[2 - b] = static_cast<uchar>(255.0 * col);
}
return pix;
}
//this function is taken from opencv/samples/gpu/optical_flow.cpp
static void drawOpticalFlow(const Mat_<float>& flowx, const Mat_<float>& flowy
, Mat& dst, float maxmotion = -1)
{
dst.create(flowx.size(), CV_8UC3);
dst.setTo(Scalar::all(0));
// determine motion range:
float maxrad = maxmotion;
if (maxmotion <= 0)
{
maxrad = 1;
for (int y = 0; y < flowx.rows; ++y)
{
for (int x = 0; x < flowx.cols; ++x)
{
Point2f u(flowx(y, x), flowy(y, x));
if (!isFlowCorrect(u))
continue;
maxrad = max(maxrad, sqrt(u.x * u.x + u.y * u.y));
}
}
}
for (int y = 0; y < flowx.rows; ++y)
{
for (int x = 0; x < flowx.cols; ++x)
{
Point2f u(flowx(y, x), flowy(y, x));
if (isFlowCorrect(u))
dst.at<Vec3b>(y, x) = computeColor(u.x / maxrad, u.y / maxrad);
}
}
}
int main(int argc, char **argv)
{
std::unordered_map<std::string, NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL> presetMap = {
{ "slow", NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW },
{ "medium", NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_MEDIUM },
{ "fast", NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST } };
try
{
CommandLineParser cmd(argc, argv,
"{ l left | ../data/basketball1.png | specify left image }"
"{ r right | ../data/basketball2.png | specify right image }"
"{ g gpuid | 0 | cuda device index}"
"{ p preset | slow | perf preset for OF algo [ options : slow, medium, fast ]}"
"{ o output | OpenCVNvOF.flo | output flow vector file in middlebury format}"
"{ th enableTemporalHints | false | Enable temporal hints}"
"{ eh enableExternalHints | false | Enable external hints}"
"{ cb enableCostBuffer | false | Enable output cost buffer}"
"{ h help | | print help message }");
cmd.about("Nvidia's optical flow sample.");
if (cmd.has("help") || !cmd.check())
{
cmd.printMessage();
cmd.printErrors();
return 0;
}
string pathL = cmd.get<string>("left");
string pathR = cmd.get<string>("right");
string preset = cmd.get<string>("preset");
string output = cmd.get<string>("output");
bool enableExternalHints = cmd.get<bool>("enableExternalHints");
bool enableTemporalHints = cmd.get<bool>("enableTemporalHints");
bool enableCostBuffer = cmd.get<bool>("enableCostBuffer");
int gpuId = cmd.get<int>("gpuid");
if (pathL.empty()) cout << "Specify left image path\n";
if (pathR.empty()) cout << "Specify right image path\n";
if (preset.empty()) cout << "Specify perf preset for OpticalFlow algo\n";
if (pathL.empty() || pathR.empty()) return 0;
auto search = presetMap.find(preset);
if (search == presetMap.end())
{
std::cout << "Invalid preset level : " << preset << std::endl;
return 0;
}
NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL perfPreset = search->second;
Mat frameL = imread(pathL, IMREAD_GRAYSCALE);
Mat frameR = imread(pathR, IMREAD_GRAYSCALE);
if (frameL.empty()) cout << "Can't open '" << pathL << "'\n";
if (frameR.empty()) cout << "Can't open '" << pathR << "'\n";
if (frameL.empty() || frameR.empty()) return -1;
Ptr<NvidiaOpticalFlow_1_0> nvof = NvidiaOpticalFlow_1_0::create(
frameL.size().width, frameL.size().height, perfPreset,
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuId);
Mat flowx, flowy, flowxy, upsampledFlowXY, image;
nvof->calc(frameL, frameR, flowxy);
nvof->upSampler(flowxy, frameL.size().width, frameL.size().height,
nvof->getGridSize(), upsampledFlowXY);
if (output.size() != 0)
{
if (!writeOpticalFlow(output, upsampledFlowXY))
cout << "Failed to save Flow Vector" << endl;
else
cout << "Flow vector saved as '" << output << "'\n";
}
Mat planes[] = { flowx, flowy };
split(upsampledFlowXY, planes);
flowx = planes[0]; flowy = planes[1];
drawOpticalFlow(flowx, flowy, image, 10);
imshow("Colorize image",image);
waitKey(0);
nvof->collectGarbage();
}
catch (const std::exception &ex)
{
std::cout << ex.what() << std::endl;
return 1;
}
return 0;
}
\ No newline at end of file
#include <iostream>
#include <fstream>
#include "opencv2/core.hpp"
#include <opencv2/core/utility.hpp>
#include "opencv2/highgui.hpp"
#include "opencv2/cudaoptflow.hpp"
#include "opencv2/cudaarithm.hpp"
using namespace std;
using namespace cv;
using namespace cv::cuda;
inline bool isFlowCorrect(Point2f u)
{
return !cvIsNaN(u.x) && !cvIsNaN(u.y) && fabs(u.x) < 1e9 && fabs(u.y) < 1e9;
}
static Vec3b computeColor(float fx, float fy)
{
static bool first = true;
// relative lengths of color transitions:
// these are chosen based on perceptual similarity
// (e.g. one can distinguish more shades between red and yellow
// than between yellow and green)
const int RY = 15;
const int YG = 6;
const int GC = 4;
const int CB = 11;
const int BM = 13;
const int MR = 6;
const int NCOLS = RY + YG + GC + CB + BM + MR;
static Vec3i colorWheel[NCOLS];
if (first)
{
int k = 0;
for (int i = 0; i < RY; ++i, ++k)
colorWheel[k] = Vec3i(255, 255 * i / RY, 0);
for (int i = 0; i < YG; ++i, ++k)
colorWheel[k] = Vec3i(255 - 255 * i / YG, 255, 0);
for (int i = 0; i < GC; ++i, ++k)
colorWheel[k] = Vec3i(0, 255, 255 * i / GC);
for (int i = 0; i < CB; ++i, ++k)
colorWheel[k] = Vec3i(0, 255 - 255 * i / CB, 255);
for (int i = 0; i < BM; ++i, ++k)
colorWheel[k] = Vec3i(255 * i / BM, 0, 255);
for (int i = 0; i < MR; ++i, ++k)
colorWheel[k] = Vec3i(255, 0, 255 - 255 * i / MR);
first = false;
}
const float rad = sqrt(fx * fx + fy * fy);
const float a = atan2(-fy, -fx) / (float)CV_PI;
const float fk = (a + 1.0f) / 2.0f * (NCOLS - 1);
const int k0 = static_cast<int>(fk);
const int k1 = (k0 + 1) % NCOLS;
const float f = fk - k0;
Vec3b pix;
for (int b = 0; b < 3; b++)
{
const float col0 = colorWheel[k0][b] / 255.0f;
const float col1 = colorWheel[k1][b] / 255.0f;
float col = (1 - f) * col0 + f * col1;
if (rad <= 1)
col = 1 - rad * (1 - col); // increase saturation with radius
else
col *= .75; // out of range
pix[2 - b] = static_cast<uchar>(255.0 * col);
}
return pix;
}
static void drawOpticalFlow(const Mat_<float>& flowx, const Mat_<float>& flowy, Mat& dst, float maxmotion = -1)
{
dst.create(flowx.size(), CV_8UC3);
dst.setTo(Scalar::all(0));
// determine motion range:
float maxrad = maxmotion;
if (maxmotion <= 0)
{
maxrad = 1;
for (int y = 0; y < flowx.rows; ++y)
{
for (int x = 0; x < flowx.cols; ++x)
{
Point2f u(flowx(y, x), flowy(y, x));
if (!isFlowCorrect(u))
continue;
maxrad = max(maxrad, sqrt(u.x * u.x + u.y * u.y));
}
}
}
for (int y = 0; y < flowx.rows; ++y)
{
for (int x = 0; x < flowx.cols; ++x)
{
Point2f u(flowx(y, x), flowy(y, x));
if (isFlowCorrect(u))
dst.at<Vec3b>(y, x) = computeColor(u.x / maxrad, u.y / maxrad);
}
}
}
static void showFlow(const char* name, const GpuMat& d_flow)
{
GpuMat planes[2];
cuda::split(d_flow, planes);
Mat flowx(planes[0]);
Mat flowy(planes[1]);
Mat out;
drawOpticalFlow(flowx, flowy, out, 10);
imshow(name, out);
}
int main(int argc, const char* argv[])
{
string filename1, filename2;
if (argc < 3)
{
cerr << "Usage : " << argv[0] << " <frame0> <frame1>" << endl;
filename1 = "../data/basketball1.png";
filename2 = "../data/basketball2.png";
}
else
{
filename1 = argv[1];
filename2 = argv[2];
}
Mat frame0 = imread(filename1, IMREAD_GRAYSCALE);
Mat frame1 = imread(filename2, IMREAD_GRAYSCALE);
if (frame0.empty())
{
cerr << "Can't open image [" << filename1 << "]" << endl;
return -1;
}
if (frame1.empty())
{
cerr << "Can't open image [" << filename2 << "]" << endl;
return -1;
}
if (frame1.size() != frame0.size())
{
cerr << "Images should be of equal sizes" << endl;
return -1;
}
GpuMat d_frame0(frame0);
GpuMat d_frame1(frame1);
GpuMat d_flow(frame0.size(), CV_32FC2), d_flowxy;
Ptr<cuda::BroxOpticalFlow> brox = cuda::BroxOpticalFlow::create(0.197f, 50.0f, 0.8f, 10, 77, 10);
Ptr<cuda::DensePyrLKOpticalFlow> lk = cuda::DensePyrLKOpticalFlow::create(Size(7, 7));
Ptr<cuda::FarnebackOpticalFlow> farn = cuda::FarnebackOpticalFlow::create();
Ptr<cuda::OpticalFlowDual_TVL1> tvl1 = cuda::OpticalFlowDual_TVL1::create();
Ptr<cuda::NvidiaOpticalFlow_1_0> nvof = cuda::NvidiaOpticalFlow_1_0::create(
frame0.size().width, frame0.size().height, NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST);
{
GpuMat d_frame0f;
GpuMat d_frame1f;
d_frame0.convertTo(d_frame0f, CV_32F, 1.0 / 255.0);
d_frame1.convertTo(d_frame1f, CV_32F, 1.0 / 255.0);
const int64 start = getTickCount();
brox->calc(d_frame0f, d_frame1f, d_flow);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "Brox : " << timeSec << " sec" << endl;
showFlow("Brox", d_flow);
}
{
const int64 start = getTickCount();
lk->calc(d_frame0, d_frame1, d_flow);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "LK : " << timeSec << " sec" << endl;
showFlow("LK", d_flow);
}
{
const int64 start = getTickCount();
farn->calc(d_frame0, d_frame1, d_flow);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "Farn : " << timeSec << " sec" << endl;
showFlow("Farn", d_flow);
}
{
const int64 start = getTickCount();
tvl1->calc(d_frame0, d_frame1, d_flow);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "TVL1 : " << timeSec << " sec" << endl;
showFlow("TVL1", d_flow);
}
{
//The timing displayed below includes the time taken to copy the input buffers to the OF CUDA input buffers
//and to copy the output buffers from the OF CUDA output buffer to the output buffer.
//Hence it is expected to be more than what is displayed in the NVIDIA Optical Flow SDK documentation.
const int64 start = getTickCount();
nvof->calc(d_frame0, d_frame1, d_flowxy);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "NVIDIAOpticalFlow : " << timeSec << " sec" << endl;
nvof->upSampler(d_flowxy, frame0.size().width, frame0.size().height,
nvof->getGridSize(), d_flow);
showFlow("NVIDIAOpticalFlow", d_flow);
}
imshow("Frame 0", frame0);
imshow("Frame 1", frame1);
waitKey();
return 0;
}
//
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
//M*/
#include "precomp.hpp"
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
Ptr<NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, int, NVIDIA_OF_PERF_LEVEL, bool, bool) { throw_no_cuda(); return Ptr<NvidiaOpticalFlow_1_0>(); }
#elif !defined HAVE_NVIDIA_OPTFLOW
CV_Error(cv::Error::HeaderIsNull, "Nvidia Optical Flow headers not found. Make sure cmake downloads it properly");
#else
#include "nvOpticalFlowCommon.h"
#include "nvOpticalFlowCuda.h"
#if defined(_WIN32) || defined(_WIN64)
#include <Windows.h>
#else
#define HMODULE void *
#define _stricmp strcasecmp
#include <dlfcn.h>
#endif
//macro for dll loading
#if defined(_WIN64)
#define MODULENAME TEXT("nvofapi64.dll")
#elif defined(_WIN32)
#define MODULENAME TEXT("nvofapi.dll")
#else
#define MODULENAME "libnvidia-opticalflow.so.1"
#endif
#define NVOF_API_CALL(nvOFAPI) \
do \
{ \
NV_OF_STATUS errorCode = nvOFAPI; \
std::ostringstream errorLog; \
if(errorCode != NV_OF_SUCCESS) \
{ \
switch (errorCode) \
{ \
case 1: \
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \
errorLog << ":NV_OF_ERR_OF_NOT_AVAILABLE"; \
CV_Error(Error::StsBadFunc, errorLog.str()); \
break; \
case 2: \
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \
errorLog << ":NV_OF_ERR_UNSUPPORTED_DEVICE"; \
CV_Error(Error::StsBadArg, errorLog.str()); \
break; \
case 3: \
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \
errorLog << ":NV_OF_ERR_DEVICE_DOES_NOT_EXIST"; \
CV_Error(Error::StsBadArg, errorLog.str()); \
break; \
case 4: \
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \
errorLog << ":NV_OF_ERR_INVALID_PTR"; \
CV_Error(Error::StsNullPtr, errorLog.str()); \
break; \
case 5: \
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \
errorLog << ":NV_OF_ERR_INVALID_PARAM"; \
CV_Error(Error::StsBadArg, errorLog.str()); \
break; \
case 6: \
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \
errorLog << ":NV_OF_ERR_INVALID_CALL"; \
CV_Error(Error::BadCallBack, errorLog.str()); \
break; \
case 7: \
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \
errorLog << ":NV_OF_ERR_INVALID_VERSION"; \
CV_Error(Error::StsError, errorLog.str()); \
break; \
case 8: \
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \
errorLog << ":NV_OF_ERR_OUT_OF_MEMORY"; \
CV_Error(Error::StsNoMem, errorLog.str()); \
break; \
case 9: \
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \
errorLog << ":NV_OF_ERR_NOT_INITIALIZED"; \
CV_Error(Error::StsBadArg, errorLog.str()); \
break; \
case 10: \
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \
errorLog << ":NV_OF_ERR_UNSUPPORTED_FEATURE"; \
CV_Error(Error::StsBadArg, errorLog.str()); \
break; \
case 11: \
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \
errorLog << ":NV_OF_ERR_GENERIC"; \
CV_Error(Error::StsInternal, errorLog.str()); \
break; \
default: \
break; \
} \
} \
} while (0) \
using namespace std;
using namespace cv;
using namespace cv::cuda;
namespace
{
class NvidiaOpticalFlowImpl : public cv::cuda::NvidiaOpticalFlow_1_0
{
private:
int m_width;
int m_height;
NV_OF_PERF_LEVEL m_preset;
bool m_enableTemporalHints;
bool m_enableExternalHints;
bool m_enableCostBuffer;
int m_gpuId;
CUcontext m_cuContext;
NV_OF_BUFFER_FORMAT m_format;
NV_OF_OUTPUT_VECTOR_GRID_SIZE m_gridSize;
NV_OF_BUFFER_DESCRIPTOR m_inputBufferDesc;
NV_OF_BUFFER_DESCRIPTOR m_outputBufferDesc;
NV_OF_BUFFER_DESCRIPTOR m_hintBufferDesc;
NV_OF_BUFFER_DESCRIPTOR m_costBufferDesc;
uint32_t m_outputElementSize;
uint32_t m_costBufElementSize;
uint32_t m_hintBufElementSize;
NV_OF_INIT_PARAMS m_initParams;
std::unique_ptr<NV_OF_CUDA_API_FUNCTION_LIST> m_ofAPI;
NvOFHandle m_hOF; //nvof handle
NvOFGPUBufferHandle m_hInputBuffer;
NvOFGPUBufferHandle m_hReferenceBuffer;
NvOFGPUBufferHandle m_hOutputBuffer;
NvOFGPUBufferHandle m_hHintBuffer;
NvOFGPUBufferHandle m_hCostBuffer;
CUdeviceptr m_frame0cuDevPtr;
CUdeviceptr m_frame1cuDevPtr;
CUdeviceptr m_flowXYcuDevPtr;
CUdeviceptr m_hintcuDevPtr;
CUdeviceptr m_costcuDevPtr;
NV_OF_CUDA_BUFFER_STRIDE_INFO m_inputBufferStrideInfo;
NV_OF_CUDA_BUFFER_STRIDE_INFO m_referenceBufferStrideInfo;
NV_OF_CUDA_BUFFER_STRIDE_INFO m_outputBufferStrideInfo;
NV_OF_CUDA_BUFFER_STRIDE_INFO m_hintBufferStrideInfo;
NV_OF_CUDA_BUFFER_STRIDE_INFO m_costBufferStrideInfo;
NV_OF_CUDA_API_FUNCTION_LIST* GetAPI()
{
std::lock_guard<std::mutex> lock(m_lock);
return m_ofAPI.get();
}
NvOFHandle GetHandle() { return m_hOF; }
protected:
HMODULE m_hModule; //module handle to load nvof dll
std::mutex m_lock;
public:
NvidiaOpticalFlowImpl(int width, int height, NV_OF_PERF_LEVEL perfPreset,
bool bEnableTemporalHints, bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId);
virtual void calc(InputArray inputImage, InputArray referenceImage,
InputOutputArray flow, Stream& stream = Stream::Null(),
InputArray hint = cv::noArray(), OutputArray cost = cv::noArray());
virtual void collectGarbage();
virtual void upSampler(InputArray flow, int width, int height,
int gridSize, InputOutputArray upsampledFlow);
virtual int getGridSize() const { return m_gridSize; }
};
NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
int width, int height, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints,
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId) :
m_width(width), m_height(height), m_preset(perfPreset),
m_enableTemporalHints((NV_OF_BOOL)bEnableTemporalHints),
m_enableExternalHints((NV_OF_BOOL)bEnableExternalHints),
m_enableCostBuffer((NV_OF_BOOL)bEnableCostBuffer), m_gpuId(gpuId),
m_cuContext(nullptr), m_format(NV_OF_BUFFER_FORMAT_GRAYSCALE8),
m_gridSize(NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
{
int nGpu = 0;
cuSafeCall(cudaGetDeviceCount(&nGpu));
if (m_gpuId < 0 || m_gpuId >= nGpu)
{
CV_Error(Error::StsBadArg, "Invalid GPU Ordinal");
}
cuSafeCall(cudaSetDevice(m_gpuId));
cuSafeCall(cudaFree(m_cuContext));
cuSafeCall(cuCtxGetCurrent(&m_cuContext));
if (m_gridSize != NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
{
CV_Error(Error::StsBadArg, "Unsupported grid size");
}
auto nOutWidth = (m_width + m_gridSize - 1) / m_gridSize;
auto nOutHeight = (m_height + m_gridSize - 1) / m_gridSize;
auto outBufFmt = NV_OF_BUFFER_FORMAT_SHORT2;
memset(&m_inputBufferDesc, 0, sizeof(m_inputBufferDesc));
m_inputBufferDesc.width = m_width;
m_inputBufferDesc.height = m_height;
m_inputBufferDesc.bufferFormat = m_format;
m_inputBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_INPUT;
memset(&m_outputBufferDesc, 0, sizeof(m_outputBufferDesc));
m_outputBufferDesc.width = nOutWidth;
m_outputBufferDesc.height = nOutHeight;
m_outputBufferDesc.bufferFormat = outBufFmt;
m_outputBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_OUTPUT;
m_outputElementSize = sizeof(NV_OF_FLOW_VECTOR);
if (m_enableExternalHints)
{
memset(&m_hintBufferDesc, 0, sizeof(m_hintBufferDesc));
m_hintBufferDesc.width = nOutWidth;
m_hintBufferDesc.height = nOutHeight;
m_hintBufferDesc.bufferFormat = outBufFmt;
m_hintBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_HINT;
m_hintBufElementSize = m_outputElementSize;
}
if (m_enableCostBuffer)
{
memset(&m_costBufferDesc, 0, sizeof(m_costBufferDesc));
m_costBufferDesc.width = nOutWidth;
m_costBufferDesc.height = nOutHeight;
m_costBufferDesc.bufferFormat = NV_OF_BUFFER_FORMAT_UINT;
m_costBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_COST;
m_costBufElementSize = sizeof(uint32_t);
}
#if defined(_WIN32) || defined(_WIN64)
HMODULE hModule = LoadLibrary(MODULENAME);
#else
void *hModule = dlopen(MODULENAME, RTLD_LAZY);
#endif
if (hModule == NULL)
{
CV_Error(Error::StsBadFunc,
"Cannot find NvOF library.");
}
m_hModule = hModule;
typedef NV_OF_STATUS(NVOFAPI *PFNNvOFAPICreateInstanceCuda)
(uint32_t apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf);
#if defined(_WIN32)
PFNNvOFAPICreateInstanceCuda NvOFAPICreateInstanceCuda
= (PFNNvOFAPICreateInstanceCuda)GetProcAddress(m_hModule, "NvOFAPICreateInstanceCuda");
#else
PFNNvOFAPICreateInstanceCuda NvOFAPICreateInstanceCuda
= (PFNNvOFAPICreateInstanceCuda)dlsym(m_hModule, "NvOFAPICreateInstanceCuda");
#endif
if (!NvOFAPICreateInstanceCuda)
{
CV_Error(Error::StsBadFunc,
"Cannot find NvOFAPICreateInstanceCuda() entry in NVOF library");
}
m_ofAPI.reset(new NV_OF_CUDA_API_FUNCTION_LIST());
NVOF_API_CALL(NvOFAPICreateInstanceCuda(NV_OF_API_VERSION, m_ofAPI.get()));
NVOF_API_CALL(GetAPI()->nvCreateOpticalFlowCuda(m_cuContext, &m_hOF));
memset(&m_initParams, 0, sizeof(m_initParams));
m_initParams.width = m_inputBufferDesc.width;
m_initParams.height = m_inputBufferDesc.height;
m_initParams.enableExternalHints = (NV_OF_BOOL)m_enableExternalHints;
m_initParams.enableOutputCost = (NV_OF_BOOL)m_enableCostBuffer;
m_initParams.hintGridSize = (NV_OF_BOOL)m_enableExternalHints == NV_OF_TRUE ?
NV_OF_HINT_VECTOR_GRID_SIZE_4 : NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED;
m_initParams.outGridSize = m_gridSize;
m_initParams.mode = NV_OF_MODE_OPTICALFLOW;
m_initParams.perfLevel = m_preset;
NVOF_API_CALL(GetAPI()->nvOFInit(GetHandle(), &m_initParams));
//Input Buffer 1
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
&m_inputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hInputBuffer));
m_frame0cuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hInputBuffer);
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo(
m_hInputBuffer, &m_inputBufferStrideInfo));
//Input Buffer 2
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
&m_inputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hReferenceBuffer));
m_frame1cuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hReferenceBuffer);
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo(
m_hReferenceBuffer, &m_referenceBufferStrideInfo));
//Output Buffer
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
&m_outputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hOutputBuffer));
m_flowXYcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hOutputBuffer);
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo(
m_hOutputBuffer, &m_outputBufferStrideInfo));
//Hint Buffer
if (m_enableExternalHints)
{
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
&m_hintBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hHintBuffer));
m_hintcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hHintBuffer);
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo(
m_hHintBuffer, &m_hintBufferStrideInfo));
}
//Cost Buffer
if (m_enableCostBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
&m_costBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hCostBuffer));
m_costcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hCostBuffer);
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo(
m_hCostBuffer, &m_costBufferStrideInfo));
}
}
void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow,
Stream& stream, InputArray hint, OutputArray cost)
{
Stream inputStream = {};
Stream outputStream = {};
if (stream)
inputStream = stream;
NVOF_API_CALL(GetAPI()->nvOFSetIOCudaStreams(GetHandle(),
StreamAccessor::getStream(inputStream), StreamAccessor::getStream(outputStream)));
GpuMat frame0GpuMat(_frame0.size(), _frame0.type(), (void*)m_frame0cuDevPtr,
m_inputBufferStrideInfo.strideInfo[0].strideXInBytes);
GpuMat frame1GpuMat(_frame1.size(), _frame1.type(), (void*)m_frame1cuDevPtr,
m_referenceBufferStrideInfo.strideInfo[0].strideXInBytes);
GpuMat flowXYGpuMat(Size((m_width + m_gridSize - 1) / m_gridSize,
(m_height + m_gridSize - 1) / m_gridSize), CV_16SC2,
(void*)m_flowXYcuDevPtr, m_outputBufferStrideInfo.strideInfo[0].strideXInBytes);
//check whether frame0 is Mat or GpuMat
if (_frame0.isMat())
{
//Get Mats from InputArrays
frame0GpuMat.upload(_frame0);
}
else if (_frame0.isGpuMat())
{
//Get GpuMats from InputArrays
_frame0.copyTo(frame0GpuMat);
}
else
{
CV_Error(Error::StsBadArg,
"Incorrect input. Pass input image (frame0) as Mat or GpuMat");
}
//check whether frame1 is Mat or GpuMat
if (_frame1.isMat())
{
//Get Mats from InputArrays
frame1GpuMat.upload(_frame1);
}
else if (_frame1.isGpuMat())
{
//Get GpuMats from InputArrays
_frame1.copyTo(frame1GpuMat);
}
else
{
CV_Error(Error::StsBadArg,
"Incorrect input. Pass reference image (frame1) as Mat or GpuMat");
}
if (m_enableExternalHints)
{
GpuMat hintGpuMat(hint.size(), hint.type(), (void*)m_hintcuDevPtr,
m_hintBufferStrideInfo.strideInfo[0].strideXInBytes);
if (hint.isMat())
{
//Get Mat from InputArray hint
hintGpuMat.upload(hint);
}
else if(hint.isGpuMat())
{
//Get GpuMat from InputArray hint
hint.copyTo(hintGpuMat);
}
else
{
CV_Error(Error::StsBadArg,"Incorrect hint buffer passed. Pass Mat or GpuMat");
}
}
cuSafeCall(cuCtxPushCurrent(m_cuContext));
inputStream.waitForCompletion();
cuSafeCall(cuCtxPopCurrent(&m_cuContext));
//Execute Call
NV_OF_EXECUTE_INPUT_PARAMS exeInParams;
NV_OF_EXECUTE_OUTPUT_PARAMS exeOutParams;
memset(&exeInParams, 0, sizeof(exeInParams));
exeInParams.inputFrame = m_hInputBuffer;
exeInParams.referenceFrame = m_hReferenceBuffer;
exeInParams.disableTemporalHints = (NV_OF_BOOL)m_enableTemporalHints == NV_OF_TRUE ?
NV_OF_FALSE : NV_OF_TRUE;
exeInParams.externalHints = m_initParams.enableExternalHints == NV_OF_TRUE ?
m_hHintBuffer : nullptr;
memset(&exeOutParams, 0, sizeof(exeOutParams));
exeOutParams.outputBuffer = m_hOutputBuffer;
exeOutParams.outputCostBuffer = m_initParams.enableOutputCost == NV_OF_TRUE ?
m_hCostBuffer : nullptr;;
NVOF_API_CALL(GetAPI()->nvOFExecute(GetHandle(), &exeInParams, &exeOutParams));
cuSafeCall(cuCtxPushCurrent(m_cuContext));
outputStream.waitForCompletion();
cuSafeCall(cuCtxPopCurrent(&m_cuContext));
if (_flow.isMat())
flowXYGpuMat.download(_flow);
else if(_flow.isGpuMat())
flowXYGpuMat.copyTo(_flow);
else
CV_Error(Error::StsBadArg, "Incorrect flow buffer passed. Pass Mat or GpuMat");
if (m_enableCostBuffer)
{
GpuMat costGpuMat(Size((m_width + m_gridSize - 1) / m_gridSize,
(m_height + m_gridSize - 1) / m_gridSize), CV_32SC1, (void*)m_costcuDevPtr,
m_costBufferStrideInfo.strideInfo[0].strideXInBytes);
if (cost.isMat())
costGpuMat.download(cost);
else if(cost.isGpuMat())
costGpuMat.copyTo(cost);
else
CV_Error(Error::StsBadArg, "Incorrect cost buffer passed. Pass Mat or GpuMat");
}
cuSafeCall(cuCtxSynchronize());
}
void NvidiaOpticalFlowImpl::collectGarbage()
{
if (m_hInputBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hInputBuffer));
}
if (m_hReferenceBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hReferenceBuffer));
}
if (m_hOutputBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hOutputBuffer));
}
if (m_enableExternalHints)
{
if (m_hHintBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hHintBuffer));
}
}
if (m_enableCostBuffer)
{
if (m_hCostBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hCostBuffer));
}
}
if (m_hOF)
{
NVOF_API_CALL(GetAPI()->nvOFDestroy(m_hOF));
}
if (m_cuContext)
{
cuSafeCall(cudaDeviceReset());
m_cuContext = nullptr;
}
}
void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, int width, int height,
int gridSize, InputOutputArray upsampledFlow)
{
Mat flow;
if (_flow.isMat())
{
_flow.copyTo(flow);
}
else if (_flow.isGpuMat())
{
GpuMat __flow = _flow.getGpuMat();
__flow.download(flow);
}
else
{
CV_Error(Error::StsBadArg,
"Incorrect flow buffer passed. Pass either Mat or GpuMat");
}
std::unique_ptr<float[]> flowVectors = nullptr;
const NV_OF_FLOW_VECTOR* _flowVectors = static_cast<const NV_OF_FLOW_VECTOR*>((const void*)flow.data);
flowVectors.reset(new float[2 * width * height]);
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
{
uint32_t blockIdX = x / gridSize;
uint32_t blockIdY = y / gridSize;
uint32_t widthInBlocks = ((width + gridSize - 1) / gridSize);
uint32_t heightInBlocks = ((height + gridSize - 1) / gridSize);;
if ((blockIdX < widthInBlocks) && (blockIdY < heightInBlocks))
{
flowVectors[(y * 2 * width) + 2 * x] = (float)
(_flowVectors[blockIdX + (blockIdY * widthInBlocks)].flowx / (float)(1 << 5));
flowVectors[(y * 2 * width) + 2 * x + 1] = (float)
(_flowVectors[blockIdX + (blockIdY * widthInBlocks)].flowy / (float)(1 << 5));
}
}
}
Mat output(Size(width, height), CV_32FC2, flowVectors.get());
if (upsampledFlow.isMat())
{
output.copyTo(upsampledFlow);
}
else if (upsampledFlow.isGpuMat())
{
GpuMat _output(output);
_output.copyTo(upsampledFlow);
}
else
{
CV_Error(Error::StsBadArg,
"Incorrect flow buffer passed for upsampled flow. Pass either Mat or GpuMat");
}
}}
Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(
int width, int height, NVIDIA_OF_PERF_LEVEL perfPreset,
bool bEnableTemporalHints, bool bEnableExternalHints,
bool bEnableCostBuffer, int gpuId)
{
return makePtr<NvidiaOpticalFlowImpl>(
width,
height,
(NV_OF_PERF_LEVEL)perfPreset,
bEnableTemporalHints,
bEnableExternalHints,
bEnableCostBuffer,
gpuId);
}
#endif
\ No newline at end of file
...@@ -409,6 +409,106 @@ INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, OpticalFlowDual_TVL1, testing::Combine( ...@@ -409,6 +409,106 @@ INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, OpticalFlowDual_TVL1, testing::Combine(
ALL_DEVICES, ALL_DEVICES,
testing::Values(Gamma(0.0), Gamma(1.0)))); testing::Values(Gamma(0.0), Gamma(1.0))));
//////////////////////////////////////////////////////
// NvidiaOpticalFlow_1_0
struct NvidiaOpticalFlow_1_0 : testing::TestWithParam<cv::cuda::DeviceInfo>
{
cv::cuda::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GetParam();
cv::cuda::setDevice(devInfo.deviceID());
}
};
CUDA_TEST_P(NvidiaOpticalFlow_1_0, Regression)
{
cv::Mat frame0 = readImage("opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage("opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
const int width = frame0.size().width;
const int height = frame0.size().height;
const bool enableTemporalHints = false;
const bool enableExternalHints = false;
const bool enableCostBuffer = false;
const int gpuid = 0;
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height,
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW,
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid);
}
catch (const cv::Exception& e)
{
if (e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr)
throw SkipTestException("Current configuration is not supported");
throw;
}
const int gridSize = d_nvof->getGridSize();
Mat flow, upsampledFlow;
d_nvof->calc(loadMat(frame0), loadMat(frame1), flow);
d_nvof->upSampler(flow, width, height, gridSize, upsampledFlow);
std::string fname(cvtest::TS::ptr()->get_data_path());
fname += "opticalflow/nvofGolden.flo";
cv::Mat golden = cv::readOpticalFlow(fname.c_str());
ASSERT_FALSE(golden.empty());
EXPECT_MAT_SIMILAR(golden, upsampledFlow, 1e-10);
}
CUDA_TEST_P(NvidiaOpticalFlow_1_0, OpticalFlowNan)
{
cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
cv::Mat r_frame0, r_frame1;
const int width = frame0.size().width;
const int height = frame0.size().height;
const bool enableTemporalHints = false;
const bool enableExternalHints = false;
const bool enableCostBuffer = false;
const int gpuid = 0;
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height,
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW,
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid);
}
catch (const cv::Exception& e)
{
if (e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr)
throw SkipTestException("Current configuration is not supported");
throw;
}
Mat flow, flowx, flowy;
d_nvof->calc(loadMat(frame0), loadMat(frame1), flow);
Mat planes[] = { flowx, flowy };
split(flow, planes);
flowx = planes[0]; flowy = planes[1];
EXPECT_TRUE(cv::checkRange(flowx));
EXPECT_TRUE(cv::checkRange(flowy));
};
INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, NvidiaOpticalFlow_1_0, ALL_DEVICES);
}} // namespace }} // namespace
#endif // HAVE_CUDA #endif // HAVE_CUDA
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment