Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
8c6dc17a
Commit
8c6dc17a
authored
Jun 13, 2012
by
Marina Kolpakova
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
scan based area interpolation for naive cases
parent
7cccc93b
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
243 additions
and
19 deletions
+243
-19
resize.cu
modules/gpu/src/cuda/resize.cu
+217
-12
resize.cpp
modules/gpu/src/resize.cpp
+3
-3
test_resize.cpp
modules/gpu/test/test_resize.cpp
+23
-4
No files found.
modules/gpu/src/cuda/resize.cu
View file @
8c6dc17a
...
...
@@ -282,27 +282,232 @@ namespace cv { namespace gpu { namespace device
template<> struct scan_traits<uchar>
{
typedef
in
t scan_line_type;
typedef
floa
t scan_line_type;
};
template <typename Ptr2D, typename T>
__global__ void resize_area_scan(const Ptr2D src, int fx, int fy, DevMem2D_<T> dst, DevMem2D_<T> buffer)
// template <typename T>
// __global__ void resize_area_scan(const DevMem2D_<T> src, DevMem2D_<T> dst, int fx, int fy, DevMem2D_<T> buffer)
// {
// typedef typename scan_traits<T>::scan_line_type W;
// extern __shared__ W line[];
// const int x = threadIdx.x;
// const int y = blockIdx.x;
// if (y >= src.rows) return;
// int offset = 1;
// line[2 * x + 0] = src(y, 2 * x + 0);
// line[2 * x + 1] = src(y, 2 * x + 1);
// __syncthreads();//???
// // reduction
// for (int d = blockDim.x; d > 0; d >>= 1)
// {
// __syncthreads();
// if (x < d)
// {
// int ai = 2 * x * offset -1 + 1 * offset;
// int bi = 2 * x * offset -1 + 2 * offset;
// line[bi] += line[ai];
// }
// offset *= 2;
// }
// __syncthreads();
// // convolution
// if (x == 0) { line[(blockDim.x << 1) - 1] = 0; printf("offset: %d!!!!!!!!!!!!!\n", fx);}
// for (int d = 1; d < (blockDim.x << 1); d *= 2)
// {
// offset >>= 1;
// __syncthreads();
// if (x < d)
// {
// int ai = offset * 2 * x + 1 * offset - 1;
// int bi = offset * 2 * x + 2 * offset - 1;
// W t = line[ai];
// line[ai] = line[bi];
// line[bi] += t;
// }
// }
// __syncthreads();
// // calculate sum
// int start = 0;
// int out_idx = 0;
// int end = start + fx;
// while (start < (blockDim.x << 1) && end < (blockDim.x << 1))
// {
// buffer(y, out_idx) = saturate_cast<T>((line[end] - line[start]) / fx);
// start = end;
// end = start + fx;
// out_idx++;
// }
// }
template <typename T>
__device__ void scan_y(DevMem2D_<typename scan_traits<T>::scan_line_type> buffer,int fx, int fy, DevMem2D_<T> dst,
typename scan_traits<T>::scan_line_type* line, int g_base)
{
typedef typename scan_traits<T>::scan_line_type W;
const int y = threadIdx.x;
const int x = blockIdx.x;
float scale = 1.f / (fx * fy);
if (x >= buffer.cols) return;
int offset = 1;
line[2 * y + 0] = buffer((g_base * fy) + 2 * y + 1, x);
if (y != (blockDim.x -1) )
line[2 * y + 1] = buffer((g_base * fy) + 2 * y + 2, x);
else
line[2 * y + 1] = 0;
__syncthreads();
// reduction
for (int d = blockDim.x; d > 0; d >>= 1)
{
__syncthreads();
if (y < d)
{
int ai = 2 * y * offset -1 + 1 * offset;
int bi = 2 * y * offset -1 + 2 * offset;
line[bi] += line[ai];
}
offset *= 2;
}
__syncthreads();
// convolution
if (y == 0) line[(blockDim.x << 1) - 1] = (W)buffer(0, x);
for (int d = 1; d < (blockDim.x << 1); d *= 2)
{
offset >>= 1;
__syncthreads();
if (y < d)
{
int ai = offset * 2 * y + 1 * offset - 1;
int bi = offset * 2 * y + 2 * offset - 1;
W t = line[ai];
line[ai] = line[bi];
line[bi] += t;
}
}
__syncthreads();
if (y < dst.rows)
{
W start = (y == 0)? (W)0:line[y * fy -1];
W end = line[y * fy + fy - 1];
dst(g_base + y ,x) = saturate_cast<T>((end - start) * scale);
}
}
template <typename T>
__device__ void scan_x(const DevMem2D_<T> src, int fx, int fy, DevMem2D_<typename scan_traits<T>::scan_line_type> buffer,
typename scan_traits<T>::scan_line_type* line, int g_base)
{
typedef typename scan_traits<T>::scan_line_type W;
const int x = threadIdx.x;
const int y = blockIdx.x;
float scale = 1.f / (fx * fy);
if (y >= src.rows) return;
int offset = 1;
line[2 * x + 0] = (W)src(y, (g_base * fx) + 2 * x + 1);
if (x != (blockDim.x -1) )
line[2 * x + 1] = (W)src(y, (g_base * fx) + 2 * x + 2);
else
line[2 * x + 1] = 0;
__syncthreads();
// reduction
for (int d = blockDim.x; d > 0; d >>= 1)
{
__syncthreads();
if (x < d)
{
int ai = 2 * x * offset -1 + 1 * offset;
int bi = 2 * x * offset -1 + 2 * offset;
line[bi] += line[ai];
}
offset *= 2;
}
__syncthreads();
// convolution
if (x == 0) line[(blockDim.x << 1) - 1] = (W)src(y, 0);
for (int d = 1; d < (blockDim.x << 1); d *= 2)
{
offset >>= 1;
__syncthreads();
if (x < d)
{
int ai = offset * 2 * x + 1 * offset - 1;
int bi = offset * 2 * x + 2 * offset - 1;
W t = line[ai];
line[ai] = line[bi];
line[bi] += t;
}
}
__syncthreads();
if (x < buffer.cols)
{
W start = (x == 0)? (W)0:line[x * fx -1];
W end = line[x * fx + fx - 1];
buffer(y, g_base + x) =(end - start);
}
}
template <typename T>
__global__ void resize_area_scan_x(const DevMem2D_<T> src, DevMem2D_<T> dst, int fx, int fy, DevMem2D_<typename scan_traits<T>::scan_line_type> buffer)
{
typedef typename scan_traits<T>::scan_line_type W;
extern __shared__ W line[];
scan_x(src,fx,fy, buffer,line, 0);
}
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
template <typename T>
__global__ void resize_area_scan_y(const DevMem2D_<T> src, DevMem2D_<T> dst, int fx, int fy, DevMem2D_<typename scan_traits<T>::scan_line_type> buffer)
{
typedef typename scan_traits<T>::scan_line_type W;
extern __shared__ W line[];
scan_y(buffer,fx, fy, dst, line, 0);
}
template <typename T> struct InterAreaDispatcherStream
{
static void call(
DevMem2D_<T> src, int fx, int fy, DevMem2D_<T> dst, DevMem2D_<T
> buffer, cudaStream_t stream)
static void call(
const DevMem2D_<T> src, int fx, int fy, DevMem2D_<T> dst, DevMem2D_<typename scan_traits<T>::scan_line_type
> buffer, cudaStream_t stream)
{
dim3 block(256, 1);
dim3 grid(divUp(dst.cols, block.x), 1);
resize_area_scan_x<T><<<src.rows, (src.cols >> 1), src.cols * sizeof(typename scan_traits<T>::scan_line_type) >>>(src, dst, fx, fy, buffer);
resize_area_scan
<<<grid, block, 256 * 2 * sizeof(typename scan_traits<T>::scan_line_type) >>>(src, fx, fy, dst
, buffer);
resize_area_scan
_y<T><<<dst.cols, (src.rows >> 1), src.rows * sizeof(typename scan_traits<T>::scan_line_type) >>>(src, dst, fx, fy
, buffer);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
...
...
@@ -311,8 +516,8 @@ namespace cv { namespace gpu { namespace device
};
template <typename T>
void resize_area_gpu(DevMem2Db src, DevMem2Db dst,float fx, float fy,
int interpolation, DevMem2D
b
buffer, cudaStream_t stream)
void resize_area_gpu(
const
DevMem2Db src, DevMem2Db dst,float fx, float fy,
int interpolation, DevMem2D
f
buffer, cudaStream_t stream)
{
(void)interpolation;
...
...
@@ -322,7 +527,7 @@ namespace cv { namespace gpu { namespace device
InterAreaDispatcherStream<T>::call(src, iscale_x, iscale_y, dst, buffer, stream);
}
template void resize_area_gpu<uchar>(DevMem2Db src, DevMem2Db dst, float fx, float fy, int interpolation, DevMem2D
b
buffer, cudaStream_t stream);
template void resize_area_gpu<uchar>(DevMem2Db src, DevMem2Db dst, float fx, float fy, int interpolation, DevMem2D
f
buffer, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
modules/gpu/src/resize.cpp
View file @
8c6dc17a
...
...
@@ -82,8 +82,8 @@ namespace cv { namespace gpu { namespace device
DevMem2Db
dst
,
int
interpolation
,
cudaStream_t
stream
);
template
<
typename
T
>
void
resize_area_gpu
(
DevMem2Db
src
,
DevMem2Db
dst
,
float
fx
,
float
fy
,
int
interpolation
,
DevMem2D
b
buffer
,
cudaStream_t
stream
);
void
resize_area_gpu
(
const
DevMem2Db
src
,
DevMem2Db
dst
,
float
fx
,
float
fy
,
int
interpolation
,
DevMem2D
f
buffer
,
cudaStream_t
stream
);
}
}}}
...
...
@@ -107,7 +107,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, GpuMat& buffer,
fy
=
static_cast
<
float
>
(
1.0
/
fy
);
dst
.
create
(
dsize
,
src
.
type
());
buffer
.
create
(
cv
::
Size
(
dsize
.
width
,
src
.
rows
),
src
.
type
()
);
buffer
.
create
(
cv
::
Size
(
dsize
.
width
,
src
.
rows
),
CV_32FC1
);
if
(
dsize
==
src
.
size
())
{
...
...
modules/gpu/test/test_resize.cpp
View file @
8c6dc17a
...
...
@@ -40,6 +40,7 @@
//M*/
#include "precomp.hpp"
#include <iostream>
#ifdef HAVE_CUDA
...
...
@@ -186,19 +187,37 @@ TEST_P(ResizeArea, Accuracy)
cv
::
Mat
src
=
randomMat
(
size
,
type
);
cv
::
gpu
::
GpuMat
dst
=
createMat
(
cv
::
Size
(
cv
::
saturate_cast
<
int
>
(
src
.
cols
*
coeff
),
cv
::
saturate_cast
<
int
>
(
src
.
rows
*
coeff
)),
type
,
useRoi
);
cv
::
gpu
::
resize
(
loadMat
(
src
,
useRoi
),
dst
,
cv
::
Size
(),
coeff
,
coeff
,
interpolation
);
cv
::
gpu
::
GpuMat
buffer
=
createMat
(
cv
::
Size
(
dst
.
cols
,
src
.
rows
),
CV_32FC1
);
cv
::
gpu
::
resize
(
loadMat
(
src
,
useRoi
),
dst
,
cv
::
Size
(),
buffer
,
coeff
,
coeff
,
interpolation
);
cv
::
Mat
dst_cpu
;
cv
::
resize
(
src
,
dst_cpu
,
cv
::
Size
(),
coeff
,
coeff
,
interpolation
);
// cv::Mat gpu_buff;
// buffer.download(gpu_buff);
// cv::Mat gpu;
// dst.download(gpu);
// std::cout << src
// << std::endl << std::endl
// << gpu_buff
// << std::endl << std::endl
// << gpu
// << std::endl << std::endl
// << dst_cpu<< std::endl;
EXPECT_MAT_NEAR
(
dst_cpu
,
dst
,
src
.
depth
()
==
CV_32F
?
1e-2
:
1.0
);
}
INSTANTIATE_TEST_CASE_P
(
GPU_ImgProc
,
ResizeArea
,
testing
::
Combine
(
ALL_DEVICES
,
DIFFERENT_SIZES
,
testing
::
Values
(
MatType
(
CV_8UC
3
),
MatType
(
CV_16UC1
),
MatType
(
CV_16UC3
),
MatType
(
CV_16UC4
),
MatType
(
CV_32FC1
),
MatType
(
CV_32FC3
),
MatType
(
CV_32FC4
)
),
testing
::
Values
(
0.
3
,
0.
5
),
testing
::
Values
(
cv
::
Size
(
512
,
256
)),
//
DIFFERENT_SIZES,
testing
::
Values
(
MatType
(
CV_8UC
1
)
/*MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)*/
),
testing
::
Values
(
0.5
),
testing
::
Values
(
Interpolation
(
cv
::
INTER_AREA
)),
WHOLE_SUBMAT
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment