Commit 9034a2d6 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

fixed gpu arithm functions (mismatch with cpu version)

parent 15902284
...@@ -488,11 +488,29 @@ namespace cv { namespace gpu { namespace device ...@@ -488,11 +488,29 @@ namespace cv { namespace gpu { namespace device
template <typename T, typename D> struct Multiply : binary_function<T, T, D> template <typename T, typename D> struct Multiply : binary_function<T, T, D>
{ {
Multiply(double scale_) : scale(scale_) {} Multiply(float scale_) : scale(scale_) {}
__device__ __forceinline__ D operator ()(T a, T b) const __device__ __forceinline__ D operator ()(T a, T b) const
{ {
return saturate_cast<D>(scale * a * b); return saturate_cast<D>(scale * a * b);
} }
const float scale;
};
template <typename T> struct Multiply<T, double> : binary_function<T, T, double>
{
Multiply(double scale_) : scale(scale_) {}
__device__ __forceinline__ double operator ()(T a, T b) const
{
return scale * a * b;
}
const double scale;
};
template <> struct Multiply<int, int> : binary_function<int, int, int>
{
Multiply(double scale_) : scale(scale_) {}
__device__ __forceinline__ int operator ()(int a, int b) const
{
return saturate_cast<int>(scale * a * b);
}
const double scale; const double scale;
}; };
...@@ -517,11 +535,36 @@ namespace cv { namespace gpu { namespace device ...@@ -517,11 +535,36 @@ namespace cv { namespace gpu { namespace device
enum { smart_shift = 4 }; enum { smart_shift = 4 };
}; };
template <typename T, typename D> struct MultiplyCaller
{
static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
{
Multiply<T, D> op(static_cast<float>(scale));
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
}
};
template <typename T> struct MultiplyCaller<T, double>
{
static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
{
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
Multiply<T, double> op(scale);
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<double>)dst, op, WithOutMask(), stream);
}
};
template <> struct MultiplyCaller<int, int>
{
static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
{
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
Multiply<int, int> op(scale);
cv::gpu::device::transform((DevMem2D_<int>)src1, (DevMem2D_<int>)src2, (DevMem2D_<int>)dst, op, WithOutMask(), stream);
}
};
template <typename T, typename D> void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream) template <typename T, typename D> void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
{ {
cudaSafeCall( cudaSetDoubleForDevice(&scale) ); MultiplyCaller<T, D>::call(src1, src2, dst, scale, stream);
Multiply<T, D> op(scale);
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
} }
template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream); template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
...@@ -729,7 +772,7 @@ namespace cv { namespace gpu { namespace device ...@@ -729,7 +772,7 @@ namespace cv { namespace gpu { namespace device
Divide(double scale_) : scale(scale_) {} Divide(double scale_) : scale(scale_) {}
__device__ __forceinline__ D operator ()(T a, T b) const __device__ __forceinline__ D operator ()(T a, T b) const
{ {
return b != 0 ? saturate_cast<D>(scale * a / b) : 0; return b != 0 ? saturate_cast<D>(a * scale / b) : 0;
} }
const double scale; const double scale;
}; };
......
This diff is collapsed.
...@@ -55,7 +55,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, ...@@ -55,7 +55,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&,
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
namespace stereobm namespace stereobm
{ {
...@@ -65,10 +65,13 @@ namespace cv { namespace gpu { namespace device ...@@ -65,10 +65,13 @@ namespace cv { namespace gpu { namespace device
} }
}}} }}}
const float defaultAvgTexThreshold = 3; namespace
{
const float defaultAvgTexThreshold = 3;
}
cv::gpu::StereoBM_GPU::StereoBM_GPU() cv::gpu::StereoBM_GPU::StereoBM_GPU()
: preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold) : preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold)
{ {
} }
...@@ -100,9 +103,9 @@ namespace ...@@ -100,9 +103,9 @@ namespace
{ {
using namespace ::cv::gpu::device::stereobm; using namespace ::cv::gpu::device::stereobm;
CV_DbgAssert(left.rows == right.rows && left.cols == right.cols); CV_Assert(left.rows == right.rows && left.cols == right.cols);
CV_DbgAssert(left.type() == CV_8UC1); CV_Assert(left.type() == CV_8UC1);
CV_DbgAssert(right.type() == CV_8UC1); CV_Assert(right.type() == CV_8UC1);
disparity.create(left.size(), CV_8U); disparity.create(left.size(), CV_8U);
minSSD.create(left.size(), CV_32S); minSSD.create(left.size(), CV_32S);
...@@ -115,7 +118,7 @@ namespace ...@@ -115,7 +118,7 @@ namespace
leBuf.create( left.size(), left.type()); leBuf.create( left.size(), left.type());
riBuf.create(right.size(), right.type()); riBuf.create(right.size(), right.type());
prefilter_xsobel( left, leBuf, 31, stream); prefilter_xsobel( left, leBuf, 31, stream);
prefilter_xsobel(right, riBuf, 31, stream); prefilter_xsobel(right, riBuf, 31, stream);
le_for_bm = leBuf; le_for_bm = leBuf;
......
This diff is collapsed.
...@@ -254,7 +254,7 @@ static void cvImageWidget_set_size(GtkWidget * widget, int max_width, int max_he ...@@ -254,7 +254,7 @@ static void cvImageWidget_set_size(GtkWidget * widget, int max_width, int max_he
} }
static void static void
cvImageWidget_size_allocate (GtkWidget *widget, cvImageWidget_size_allocate (GtkWidget *widget,
GtkAllocation *allocation) GtkAllocation *allocation)
{ {
CvImageWidget *image_widget; CvImageWidget *image_widget;
...@@ -719,7 +719,7 @@ namespace ...@@ -719,7 +719,7 @@ namespace
void generateBitmapFont(const std::string& family, int height, int weight, bool italic, bool underline, int start, int count, int base) const; void generateBitmapFont(const std::string& family, int height, int weight, bool italic, bool underline, int start, int count, int base) const;
bool isGlContextInitialized() const; bool isGlContextInitialized() const;
PFNGLGENBUFFERSPROC glGenBuffersExt; PFNGLGENBUFFERSPROC glGenBuffersExt;
PFNGLDELETEBUFFERSPROC glDeleteBuffersExt; PFNGLDELETEBUFFERSPROC glDeleteBuffersExt;
...@@ -866,22 +866,22 @@ namespace ...@@ -866,22 +866,22 @@ namespace
CV_FUNCNAME( "GlFuncTab_GTK::generateBitmapFont" ); CV_FUNCNAME( "GlFuncTab_GTK::generateBitmapFont" );
__BEGIN__; __BEGIN__;
fontDecr = pango_font_description_new(); fontDecr = pango_font_description_new();
pango_font_description_set_size(fontDecr, height); pango_font_description_set_size(fontDecr, height);
pango_font_description_set_family_static(fontDecr, family.c_str()); pango_font_description_set_family_static(fontDecr, family.c_str());
pango_font_description_set_weight(fontDecr, static_cast<PangoWeight>(weight)); pango_font_description_set_weight(fontDecr, static_cast<PangoWeight>(weight));
pango_font_description_set_style(fontDecr, italic ? PANGO_STYLE_ITALIC : PANGO_STYLE_NORMAL); pango_font_description_set_style(fontDecr, italic ? PANGO_STYLE_ITALIC : PANGO_STYLE_NORMAL);
pangoFont = gdk_gl_font_use_pango_font(fontDecr, start, count, base); pangoFont = gdk_gl_font_use_pango_font(fontDecr, start, count, base);
pango_font_description_free(fontDecr); pango_font_description_free(fontDecr);
if (!pangoFont) if (!pangoFont)
CV_ERROR(CV_OpenGlApiCallError, "Can't create font"); CV_ERROR(CV_OpenGlApiCallError, "Can't create font");
...@@ -960,13 +960,13 @@ namespace ...@@ -960,13 +960,13 @@ namespace
void releaseGlContext(CvWindow* window) void releaseGlContext(CvWindow* window)
{ {
CV_FUNCNAME( "releaseGlContext" ); //CV_FUNCNAME( "releaseGlContext" );
__BEGIN__; //__BEGIN__;
window->useGl = false; window->useGl = false;
__END__; //__END__;
} }
void drawGl(CvWindow* window) void drawGl(CvWindow* window)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment