Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e7cf541f
Commit
e7cf541f
authored
Dec 14, 2010
by
Alexey Spizhevoy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed bug in matchTemplate when template size is (1,1), refactored
parent
1887b7d2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
58 additions
and
59 deletions
+58
-59
match_template.cu
modules/gpu/src/cuda/match_template.cu
+58
-58
gputest_main.cpp
tests/gpu/src/gputest_main.cpp
+0
-1
No files found.
modules/gpu/src/cuda/match_template.cu
View file @
e7cf541f
...
@@ -47,12 +47,12 @@ using namespace cv::gpu;
...
@@ -47,12 +47,12 @@ using namespace cv::gpu;
namespace cv { namespace gpu { namespace imgproc {
namespace cv { namespace gpu { namespace imgproc {
texture<
unsigned char, 2> imageTex_8U
_CCORR;
texture<
float, 2> imageTex_32F
_CCORR;
texture<
unsigned char, 2> templTex_8U
_CCORR;
texture<
float, 2> templTex_32F
_CCORR;
__global__ void matchTemplateNaiveKernel_
8U
_CCORR(int w, int h,
__global__ void matchTemplateNaiveKernel_
32F
_CCORR(int w, int h,
DevMem2Df result)
DevMem2Df result)
{
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
int y = blockDim.y * blockIdx.y + threadIdx.y;
...
@@ -63,40 +63,40 @@ __global__ void matchTemplateNaiveKernel_8U_CCORR(int w, int h,
...
@@ -63,40 +63,40 @@ __global__ void matchTemplateNaiveKernel_8U_CCORR(int w, int h,
for (int i = 0; i < h; ++i)
for (int i = 0; i < h; ++i)
for (int j = 0; j < w; ++j)
for (int j = 0; j < w; ++j)
sum +=
(float)tex2D(imageTex_8U
_CCORR, x + j, y + i) *
sum +=
tex2D(imageTex_32F
_CCORR, x + j, y + i) *
(float)tex2D(templTex_8U
_CCORR, j, i);
tex2D(templTex_32F
_CCORR, j, i);
result.ptr(y)[x] = sum;
result.ptr(y)[x] = sum;
}
}
}
}
void matchTemplateNaive_
8U
_CCORR(const DevMem2D image, const DevMem2D templ,
void matchTemplateNaive_
32F
_CCORR(const DevMem2D image, const DevMem2D templ,
DevMem2Df result)
DevMem2Df result)
{
{
dim3 threads(32, 8);
dim3 threads(32, 8);
dim3 grid(divUp(image.cols - templ.cols + 1, threads.x),
dim3 grid(divUp(image.cols - templ.cols + 1, threads.x),
divUp(image.rows - templ.rows + 1, threads.y));
divUp(image.rows - templ.rows + 1, threads.y));
cudaChannelFormatDesc desc = cudaCreateChannelDesc<
unsigned char
>();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<
float
>();
cudaBindTexture2D(0, imageTex_
8U
_CCORR, image.data, desc, image.cols, image.rows, image.step);
cudaBindTexture2D(0, imageTex_
32F
_CCORR, image.data, desc, image.cols, image.rows, image.step);
cudaBindTexture2D(0, templTex_
8U
_CCORR, templ.data, desc, templ.cols, templ.rows, templ.step);
cudaBindTexture2D(0, templTex_
32F
_CCORR, templ.data, desc, templ.cols, templ.rows, templ.step);
imageTex_
8U
_CCORR.filterMode = cudaFilterModePoint;
imageTex_
32F
_CCORR.filterMode = cudaFilterModePoint;
templTex_
8U
_CCORR.filterMode = cudaFilterModePoint;
templTex_
32F
_CCORR.filterMode = cudaFilterModePoint;
matchTemplateNaiveKernel_
8U
_CCORR<<<grid, threads>>>(templ.cols, templ.rows, result);
matchTemplateNaiveKernel_
32F
_CCORR<<<grid, threads>>>(templ.cols, templ.rows, result);
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaUnbindTexture(imageTex_
8U
_CCORR));
cudaSafeCall(cudaUnbindTexture(imageTex_
32F
_CCORR));
cudaSafeCall(cudaUnbindTexture(templTex_
8U
_CCORR));
cudaSafeCall(cudaUnbindTexture(templTex_
32F
_CCORR));
}
}
texture<float, 2> imageTex_32F_
CCORR
;
texture<float, 2> imageTex_32F_
SQDIFF
;
texture<float, 2> templTex_32F_
CCORR
;
texture<float, 2> templTex_32F_
SQDIFF
;
__global__ void matchTemplateNaiveKernel_32F_
CCORR
(int w, int h,
__global__ void matchTemplateNaiveKernel_32F_
SQDIFF
(int w, int h,
DevMem2Df result)
DevMem2Df result)
{
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
int y = blockDim.y * blockIdx.y + threadIdx.y;
...
@@ -104,34 +104,40 @@ __global__ void matchTemplateNaiveKernel_32F_CCORR(int w, int h,
...
@@ -104,34 +104,40 @@ __global__ void matchTemplateNaiveKernel_32F_CCORR(int w, int h,
if (x < result.cols && y < result.rows)
if (x < result.cols && y < result.rows)
{
{
float sum = 0.f;
float sum = 0.f;
float delta;
for (int i = 0; i < h; ++i)
for (int i = 0; i < h; ++i)
{
for (int j = 0; j < w; ++j)
for (int j = 0; j < w; ++j)
sum += tex2D(imageTex_32F_CCORR, x + j, y + i) *
{
tex2D(templTex_32F_CCORR, j, i);
delta = tex2D(imageTex_32F_SQDIFF, x + j, y + i) -
tex2D(templTex_32F_SQDIFF, j, i);
sum += delta * delta;
}
}
result.ptr(y)[x] = sum;
result.ptr(y)[x] = sum;
}
}
}
}
void matchTemplateNaive_32F_
CCORR
(const DevMem2D image, const DevMem2D templ,
void matchTemplateNaive_32F_
SQDIFF
(const DevMem2D image, const DevMem2D templ,
DevMem2Df result)
DevMem2Df result)
{
{
dim3 threads(32, 8);
dim3 threads(32, 8);
dim3 grid(divUp(image.cols - templ.cols + 1, threads.x),
dim3 grid(divUp(image.cols - templ.cols + 1, threads.x),
divUp(image.rows - templ.rows + 1, threads.y));
divUp(image.rows - templ.rows + 1, threads.y));
cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();
cudaBindTexture2D(0, imageTex_32F_
CCORR
, image.data, desc, image.cols, image.rows, image.step);
cudaBindTexture2D(0, imageTex_32F_
SQDIFF
, image.data, desc, image.cols, image.rows, image.step);
cudaBindTexture2D(0, templTex_32F_
CCORR
, templ.data, desc, templ.cols, templ.rows, templ.step);
cudaBindTexture2D(0, templTex_32F_
SQDIFF
, templ.data, desc, templ.cols, templ.rows, templ.step);
imageTex_32F_
CCORR
.filterMode = cudaFilterModePoint;
imageTex_32F_
SQDIFF
.filterMode = cudaFilterModePoint;
templTex_32F_
CCORR
.filterMode = cudaFilterModePoint;
templTex_32F_
SQDIFF
.filterMode = cudaFilterModePoint;
matchTemplateNaiveKernel_32F_
CCORR
<<<grid, threads>>>(templ.cols, templ.rows, result);
matchTemplateNaiveKernel_32F_
SQDIFF
<<<grid, threads>>>(templ.cols, templ.rows, result);
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaUnbindTexture(imageTex_32F_
CCORR
));
cudaSafeCall(cudaUnbindTexture(imageTex_32F_
SQDIFF
));
cudaSafeCall(cudaUnbindTexture(templTex_32F_
CCORR
));
cudaSafeCall(cudaUnbindTexture(templTex_32F_
SQDIFF
));
}
}
...
@@ -185,12 +191,12 @@ void matchTemplateNaive_8U_SQDIFF(const DevMem2D image, const DevMem2D templ,
...
@@ -185,12 +191,12 @@ void matchTemplateNaive_8U_SQDIFF(const DevMem2D image, const DevMem2D templ,
}
}
texture<
float, 2> imageTex_32F_SQDIFF
;
texture<
unsigned char, 2> imageTex_8U_CCORR
;
texture<
float, 2> templTex_32F_SQDIFF
;
texture<
unsigned char, 2> templTex_8U_CCORR
;
__global__ void matchTemplateNaiveKernel_
32F_SQDIFF
(int w, int h,
__global__ void matchTemplateNaiveKernel_
8U_CCORR
(int w, int h,
DevMem2Df result)
DevMem2Df result)
{
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
int y = blockDim.y * blockIdx.y + threadIdx.y;
...
@@ -198,40 +204,34 @@ __global__ void matchTemplateNaiveKernel_32F_SQDIFF(int w, int h,
...
@@ -198,40 +204,34 @@ __global__ void matchTemplateNaiveKernel_32F_SQDIFF(int w, int h,
if (x < result.cols && y < result.rows)
if (x < result.cols && y < result.rows)
{
{
float sum = 0.f;
float sum = 0.f;
float delta;
for (int i = 0; i < h; ++i)
for (int i = 0; i < h; ++i)
{
for (int j = 0; j < w; ++j)
for (int j = 0; j < w; ++j)
{
sum += (float)tex2D(imageTex_8U_CCORR, x + j, y + i) *
delta = tex2D(imageTex_32F_SQDIFF, x + j, y + i) -
(float)tex2D(templTex_8U_CCORR, j, i);
tex2D(templTex_32F_SQDIFF, j, i);
sum += delta * delta;
}
}
result.ptr(y)[x] = sum;
result.ptr(y)[x] = sum;
}
}
}
}
void matchTemplateNaive_
32F_SQDIFF
(const DevMem2D image, const DevMem2D templ,
void matchTemplateNaive_
8U_CCORR
(const DevMem2D image, const DevMem2D templ,
DevMem2Df result)
DevMem2Df result)
{
{
dim3 threads(32, 8);
dim3 threads(32, 8);
dim3 grid(divUp(image.cols - templ.cols + 1, threads.x),
dim3 grid(divUp(image.cols - templ.cols + 1, threads.x),
divUp(image.rows - templ.rows + 1, threads.y));
divUp(image.rows - templ.rows + 1, threads.y));
cudaChannelFormatDesc desc = cudaCreateChannelDesc<
float
>();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<
unsigned char
>();
cudaBindTexture2D(0, imageTex_
32F_SQDIFF
, image.data, desc, image.cols, image.rows, image.step);
cudaBindTexture2D(0, imageTex_
8U_CCORR
, image.data, desc, image.cols, image.rows, image.step);
cudaBindTexture2D(0, templTex_
32F_SQDIFF
, templ.data, desc, templ.cols, templ.rows, templ.step);
cudaBindTexture2D(0, templTex_
8U_CCORR
, templ.data, desc, templ.cols, templ.rows, templ.step);
imageTex_8U_
SQDIFF
.filterMode = cudaFilterModePoint;
imageTex_8U_
CCORR
.filterMode = cudaFilterModePoint;
templTex_8U_
SQDIFF
.filterMode = cudaFilterModePoint;
templTex_8U_
CCORR
.filterMode = cudaFilterModePoint;
matchTemplateNaiveKernel_
32F_SQDIFF
<<<grid, threads>>>(templ.cols, templ.rows, result);
matchTemplateNaiveKernel_
8U_CCORR
<<<grid, threads>>>(templ.cols, templ.rows, result);
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaUnbindTexture(imageTex_
32F_SQDIFF
));
cudaSafeCall(cudaUnbindTexture(imageTex_
8U_CCORR
));
cudaSafeCall(cudaUnbindTexture(templTex_
32F_SQDIFF
));
cudaSafeCall(cudaUnbindTexture(templTex_
8U_CCORR
));
}
}
...
@@ -301,8 +301,8 @@ __global__ void matchTemplatePreparedKernel_8U_SQDIFF_NORMED(
...
@@ -301,8 +301,8 @@ __global__ void matchTemplatePreparedKernel_8U_SQDIFF_NORMED(
(image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) -
(image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) -
(image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x]));
(image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x]));
float ccorr = result.ptr(y)[x];
float ccorr = result.ptr(y)[x];
result.ptr(y)[x] = (image_sqsum_ - 2.f * ccorr + templ_sqsum) *
result.ptr(y)[x] =
min(1.f,
(image_sqsum_ - 2.f * ccorr + templ_sqsum) *
rsqrtf(image_sqsum_ * templ_sqsum);
rsqrtf(image_sqsum_ * templ_sqsum)
)
;
}
}
}
}
...
@@ -368,8 +368,8 @@ __global__ void matchTemplatePreparedKernel_8U_CCOEFF_NORMED(
...
@@ -368,8 +368,8 @@ __global__ void matchTemplatePreparedKernel_8U_CCOEFF_NORMED(
float image_sqsum_ = (float)(
float image_sqsum_ = (float)(
(image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) -
(image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) -
(image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x]));
(image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x]));
result.ptr(y)[x] = (ccorr - image_sum_ * templ_sum_scale) *
result.ptr(y)[x] =
min(1.f,
(ccorr - image_sum_ * templ_sum_scale) *
rsqrtf(templ_sqsum_scale * (image_sqsum_ - weight * image_sum_ * image_sum_));
rsqrtf(templ_sqsum_scale * (image_sqsum_ - weight * image_sum_ * image_sum_))
)
;
}
}
}
}
...
@@ -405,7 +405,7 @@ __global__ void normalizeKernel_8U(
...
@@ -405,7 +405,7 @@ __global__ void normalizeKernel_8U(
float image_sqsum_ = (float)(
float image_sqsum_ = (float)(
(image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) -
(image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) -
(image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x]));
(image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x]));
result.ptr(y)[x]
*= rsqrtf(image_sqsum_ * templ_sqsum
);
result.ptr(y)[x]
= min(1.f, result.ptr(y)[x] * rsqrtf(image_sqsum_ * templ_sqsum)
);
}
}
}
}
...
...
tests/gpu/src/gputest_main.cpp
View file @
e7cf541f
...
@@ -51,7 +51,6 @@ const char* blacklist[] =
...
@@ -51,7 +51,6 @@ const char* blacklist[] =
};
};
int
main
(
int
argc
,
char
**
argv
)
int
main
(
int
argc
,
char
**
argv
)
{
{
return
test_system
.
run
(
argc
,
argv
,
blacklist
);
return
test_system
.
run
(
argc
,
argv
,
blacklist
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment