Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
fa322bf4
Commit
fa322bf4
authored
Dec 08, 2010
by
Alexey Spizhevoy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added gpu::columnSum, fixed compile error (if there is no cuda), refactored
parent
b1c5b929
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
73 additions
and
21 deletions
+73
-21
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+3
-0
imgproc.cu
modules/gpu/src/cuda/imgproc.cu
+31
-1
match_template.cu
modules/gpu/src/cuda/match_template.cu
+7
-6
imgproc_gpu.cpp
modules/gpu/src/imgproc_gpu.cpp
+17
-0
match_template.cpp
modules/gpu/src/match_template.cpp
+6
-5
match_template.cpp
tests/gpu/src/match_template.cpp
+9
-9
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
fa322bf4
...
...
@@ -638,6 +638,9 @@ namespace cv
//! supports only CV_8UC1 source type
CV_EXPORTS
void
integral
(
GpuMat
&
src
,
GpuMat
&
sum
,
GpuMat
&
sqsum
);
//! computes vertical sum, supports only CV_32FC1 images
CV_EXPORTS
void
columnSum
(
const
GpuMat
&
src
,
GpuMat
&
sum
);
//! computes the standard deviation of integral images
//! supports only CV_32SC1 source type and CV_32FC1 sqr type
//! output will have CV_32FC1 type
...
...
modules/gpu/src/cuda/imgproc.cu
View file @
fa322bf4
...
...
@@ -42,7 +42,6 @@
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
...
...
@@ -717,5 +716,36 @@ namespace cv { namespace gpu { namespace imgproc
cudaSafeCall(cudaUnbindTexture(minEigenValDxTex));
cudaSafeCall(cudaUnbindTexture(minEigenValDyTex));
}
////////////////////////////// Column Sum //////////////////////////////////////
__global__ void columnSumKernel_32F(int cols, int rows, const PtrStep src, const PtrStep dst)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
const float* src_data = (const float*)src.data + x;
float* dst_data = (float*)dst.data + x;
if (x < cols)
{
float sum = 0.f;
for (int y = 0; y < rows; ++y)
{
sum += src_data[y];
dst_data[y] = sum;
}
}
}
void columnSum_32F(const DevMem2D src, const DevMem2D dst)
{
dim3 threads(256);
dim3 grid(divUp(src.cols, threads.x));
columnSumKernel_32F<<<grid, threads>>>(src.cols, src.rows, src, dst);
cudaSafeCall(cudaThreadSynchronize());
}
}}}
modules/gpu/src/cuda/match_template.cu
View file @
fa322bf4
...
...
@@ -55,7 +55,7 @@ texture<unsigned char, 2> imageTex_8U;
texture<unsigned char, 2> templTex_8U;
__global__ void matchTemplateKernel_8U_SQDIFF(int w, int h, DevMem2Df result)
__global__ void matchTemplate
Naive
Kernel_8U_SQDIFF(int w, int h, DevMem2Df result)
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
...
...
@@ -80,7 +80,7 @@ __global__ void matchTemplateKernel_8U_SQDIFF(int w, int h, DevMem2Df result)
}
void matchTemplate_8U_SQDIFF(const DevMem2D image, const DevMem2D templ, DevMem2Df result)
void matchTemplate
Naive
_8U_SQDIFF(const DevMem2D image, const DevMem2D templ, DevMem2Df result)
{
dim3 threads(32, 8);
dim3 grid(divUp(image.cols - templ.cols + 1, threads.x),
...
...
@@ -92,7 +92,7 @@ void matchTemplate_8U_SQDIFF(const DevMem2D image, const DevMem2D templ, DevMem2
imageTex_8U.filterMode = cudaFilterModePoint;
templTex_8U.filterMode = cudaFilterModePoint;
matchTemplateKernel_8U_SQDIFF<<<grid, threads>>>(templ.cols, templ.rows, result);
matchTemplate
Naive
Kernel_8U_SQDIFF<<<grid, threads>>>(templ.cols, templ.rows, result);
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaUnbindTexture(imageTex_8U));
cudaSafeCall(cudaUnbindTexture(templTex_8U));
...
...
@@ -103,7 +103,7 @@ texture<float, 2> imageTex_32F;
texture<float, 2> templTex_32F;
__global__ void matchTemplateKernel_32F_SQDIFF(int w, int h, DevMem2Df result)
__global__ void matchTemplate
Naive
Kernel_32F_SQDIFF(int w, int h, DevMem2Df result)
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
...
...
@@ -128,7 +128,7 @@ __global__ void matchTemplateKernel_32F_SQDIFF(int w, int h, DevMem2Df result)
}
void matchTemplate_32F_SQDIFF(const DevMem2D image, const DevMem2D templ, DevMem2Df result)
void matchTemplate
Naive
_32F_SQDIFF(const DevMem2D image, const DevMem2D templ, DevMem2Df result)
{
dim3 threads(32, 8);
dim3 grid(divUp(image.cols - templ.cols + 1, threads.x),
...
...
@@ -140,7 +140,7 @@ void matchTemplate_32F_SQDIFF(const DevMem2D image, const DevMem2D templ, DevMem
imageTex_8U.filterMode = cudaFilterModePoint;
templTex_8U.filterMode = cudaFilterModePoint;
matchTemplateKernel_32F_SQDIFF<<<grid, threads>>>(templ.cols, templ.rows, result);
matchTemplate
Naive
Kernel_32F_SQDIFF<<<grid, threads>>>(templ.cols, templ.rows, result);
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaUnbindTexture(imageTex_32F));
cudaSafeCall(cudaUnbindTexture(templTex_32F));
...
...
@@ -165,6 +165,7 @@ void multiplyAndNormalizeSpects(int n, float scale, const cufftComplex* a, const
dim3 threads(256);
dim3 grid(divUp(n, threads.x));
multiplyAndNormalizeSpectsKernel<<<grid, threads>>>(n, scale, a, b, c);
cudaSafeCall(cudaThreadSynchronize());
}
...
...
modules/gpu/src/imgproc_gpu.cpp
View file @
fa322bf4
...
...
@@ -61,6 +61,7 @@ void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_
void
cv
::
gpu
::
warpPerspective
(
const
GpuMat
&
,
GpuMat
&
,
const
Mat
&
,
Size
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
rotate
(
const
GpuMat
&
,
GpuMat
&
,
Size
,
double
,
double
,
double
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
integral
(
GpuMat
&
,
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
columnSum
(
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
rectStdDev
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
Rect
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
,
GpuMat
&
,
double
,
double
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
evenLevels
(
GpuMat
&
,
int
,
int
,
int
)
{
throw_nogpu
();
}
...
...
@@ -555,6 +556,22 @@ void cv::gpu::integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum)
sum
.
step
,
sqsum
.
ptr
<
Npp32f
>
(),
sqsum
.
step
,
sz
,
0
,
0.0
f
,
h
)
);
}
//////////////////////////////////////////////////////////////////////////////
// columnSum
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
void
columnSum_32F
(
const
DevMem2D
src
,
const
DevMem2D
dst
);
}}}
void
cv
::
gpu
::
columnSum
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
CV_Assert
(
src
.
type
()
==
CV_32F
);
dst
.
create
(
src
.
size
(),
CV_32F
);
imgproc
::
columnSum_32F
(
src
,
dst
);
}
void
cv
::
gpu
::
rectStdDev
(
const
GpuMat
&
src
,
const
GpuMat
&
sqr
,
GpuMat
&
dst
,
const
Rect
&
rect
)
{
CV_Assert
(
src
.
type
()
==
CV_32SC1
&&
sqr
.
type
()
==
CV_32FC1
);
...
...
modules/gpu/src/match_template.cpp
View file @
fa322bf4
...
...
@@ -41,7 +41,6 @@
//M*/
#include "precomp.hpp"
#include <cufft.h>
#include <iostream>
#include <utility>
...
...
@@ -56,12 +55,14 @@ void cv::gpu::matchTemplate(const GpuMat&, const GpuMat&, GpuMat&, int) { throw_
#else
#include <cufft.h>
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
void
multiplyAndNormalizeSpects
(
int
n
,
float
scale
,
const
cufftComplex
*
a
,
const
cufftComplex
*
b
,
cufftComplex
*
c
);
void
matchTemplate_8U_SQDIFF
(
const
DevMem2D
image
,
const
DevMem2D
templ
,
DevMem2Df
result
);
void
matchTemplate_32F_SQDIFF
(
const
DevMem2D
image
,
const
DevMem2D
templ
,
DevMem2Df
result
);
void
matchTemplate
Naive
_8U_SQDIFF
(
const
DevMem2D
image
,
const
DevMem2D
templ
,
DevMem2Df
result
);
void
matchTemplate
Naive
_32F_SQDIFF
(
const
DevMem2D
image
,
const
DevMem2D
templ
,
DevMem2Df
result
);
}}}
...
...
@@ -90,7 +91,7 @@ namespace
void
matchTemplate
<
CV_8U
,
CV_TM_SQDIFF
>
(
const
GpuMat
&
image
,
const
GpuMat
&
templ
,
GpuMat
&
result
)
{
result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
);
imgproc
::
matchTemplate_8U_SQDIFF
(
image
,
templ
,
result
);
imgproc
::
matchTemplate
Naive
_8U_SQDIFF
(
image
,
templ
,
result
);
}
...
...
@@ -98,7 +99,7 @@ namespace
void
matchTemplate
<
CV_32F
,
CV_TM_SQDIFF
>
(
const
GpuMat
&
image
,
const
GpuMat
&
templ
,
GpuMat
&
result
)
{
result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
);
imgproc
::
matchTemplate_32F_SQDIFF
(
image
,
templ
,
result
);
imgproc
::
matchTemplate
Naive
_32F_SQDIFF
(
image
,
templ
,
result
);
}
...
...
tests/gpu/src/match_template.cpp
View file @
fa322bf4
...
...
@@ -97,15 +97,15 @@ struct CV_GpuMatchTemplateTest: CvTest
F
(
cout
<<
"gpu_block: "
<<
clock
()
-
t
<<
endl
;)
if
(
!
check
(
dst_gold
,
Mat
(
dst
),
0.25
f
*
h
*
w
*
1e-5
f
))
return
;
gen
(
image
,
n
,
m
,
CV_32F
);
gen
(
templ
,
h
,
w
,
CV_32F
);
F
(
t
=
clock
();)
matchTemplate
(
image
,
templ
,
dst_gold
,
CV_TM_CCORR
);
F
(
cout
<<
"cpu:"
<<
clock
()
-
t
<<
endl
;)
F
(
t
=
clock
();)
gpu
::
matchTemplate
(
gpu
::
GpuMat
(
image
),
gpu
::
GpuMat
(
templ
),
dst
,
CV_TM_CCORR
);
F
(
cout
<<
"gpu_block: "
<<
clock
()
-
t
<<
endl
;)
if
(
!
check
(
dst_gold
,
Mat
(
dst
),
0.25
f
*
h
*
w
*
1e-5
f
))
return
;
//
gen(image, n, m, CV_32F);
//
gen(templ, h, w, CV_32F);
//
F(t = clock();)
//
matchTemplate(image, templ, dst_gold, CV_TM_CCORR);
//
F(cout << "cpu:" << clock() - t << endl;)
//
F(t = clock();)
//
gpu::matchTemplate(gpu::GpuMat(image), gpu::GpuMat(templ), dst, CV_TM_CCORR);
//
F(cout << "gpu_block: " << clock() - t << endl;)
//
if (!check(dst_gold, Mat(dst), 0.25f * h * w * 1e-5f)) return;
}
}
catch
(
const
Exception
&
e
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment