Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
282e01cb
Commit
282e01cb
authored
Nov 24, 2010
by
Alexey Spizhevoy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added support of all data types into gpu::minMax
parent
3c0cc087
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
229 additions
and
73 deletions
+229
-73
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+0
-2
arithm.cpp
modules/gpu/src/arithm.cpp
+34
-44
mathfunc.cu
modules/gpu/src/cuda/mathfunc.cu
+128
-0
arithm.cpp
tests/gpu/src/arithm.cpp
+67
-26
gputest_main.cpp
tests/gpu/src/gputest_main.cpp
+0
-1
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
282e01cb
...
...
@@ -422,8 +422,6 @@ namespace cv
CV_EXPORTS
Scalar
sum
(
const
GpuMat
&
m
);
//! finds global minimum and maximum array elements and returns their values
//! supports CV_8UC1 and CV_8UC4 type
//! disabled until fix npp bug
CV_EXPORTS
void
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
=
0
);
//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
...
...
modules/gpu/src/arithm.cpp
View file @
282e01cb
...
...
@@ -489,55 +489,45 @@ Scalar cv::gpu::sum(const GpuMat& src)
////////////////////////////////////////////////////////////////////////
// minMax
namespace
{
void
minMax_c1
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Npp8u
min_res
,
max_res
;
nppSafeCall
(
nppiMinMax_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
sz
,
&
min_res
,
&
max_res
)
);
if
(
minVal
)
*
minVal
=
min_res
;
if
(
maxVal
)
*
maxVal
=
max_res
;
}
void
minMax_c4
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Npp8u
*
cuMem
;
cuMem
=
nppsMalloc_8u
(
8
);
nppSafeCall
(
nppiMinMax_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
sz
,
cuMem
,
cuMem
+
4
)
);
if
(
minVal
)
cudaMemcpy
(
minVal
,
cuMem
,
4
*
sizeof
(
Npp8u
),
cudaMemcpyDeviceToHost
);
if
(
maxVal
)
cudaMemcpy
(
maxVal
,
cuMem
+
4
,
4
*
sizeof
(
Npp8u
),
cudaMemcpyDeviceToHost
);
nppsFree
(
cuMem
);
}
}
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
{
template
<
typename
T
>
void
min_max_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
);
}}}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
)
{
typedef
void
(
*
minMax_t
)(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
);
static
const
minMax_t
minMax_callers
[]
=
{
0
,
minMax_c1
,
0
,
0
,
minMax_c4
};
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
!
"disabled until fix npp bug"
);
CV_Assert
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_8UC4
);
double
maxVal_
;
if
(
!
maxVal
)
maxVal
=
&
maxVal_
;
minMax_callers
[
src
.
channels
()](
src
,
minVal
,
maxVal
);
switch
(
src
.
type
())
{
case
CV_8U
:
mathfunc
::
min_max_caller
<
unsigned
char
>
(
src
,
minVal
,
maxVal
);
break
;
case
CV_8S
:
mathfunc
::
min_max_caller
<
signed
char
>
(
src
,
minVal
,
maxVal
);
break
;
case
CV_16U
:
mathfunc
::
min_max_caller
<
unsigned
short
>
(
src
,
minVal
,
maxVal
);
break
;
case
CV_16S
:
mathfunc
::
min_max_caller
<
signed
short
>
(
src
,
minVal
,
maxVal
);
break
;
case
CV_32S
:
mathfunc
::
min_max_caller
<
int
>
(
src
,
minVal
,
maxVal
);
break
;
case
CV_32F
:
mathfunc
::
min_max_caller
<
float
>
(
src
,
minVal
,
maxVal
);
break
;
case
CV_64F
:
mathfunc
::
min_max_caller
<
double
>
(
src
,
minVal
,
maxVal
);
break
;
default
:
CV_Error
(
CV_StsBadArg
,
"Unsupported type"
);
}
}
////////////////////////////////////////////////////////////////////////
...
...
modules/gpu/src/cuda/mathfunc.cu
View file @
282e01cb
...
...
@@ -393,4 +393,132 @@ namespace cv { namespace gpu { namespace mathfunc
{
bitwise_bin_op<BIN_OP_XOR>(rows, cols, src1, src2, dst, elem_size, Mask8U(mask), stream);
}
//////////////////////////////////////////////////////////////////////////////
// Min max
enum { MIN, MAX };
template <typename T, int op> struct Cmp {};
template <typename T>
struct Cmp<T, MIN>
{
static __device__ void call(unsigned int tid, unsigned int offset, volatile T* optval)
{
T val = optval[tid + offset];
if (val < optval[tid]) optval[tid] = val;
//optval[tid] = min(optval[tid], optval[tid + offset]);
}
};
template <typename T>
struct Cmp<T, MAX>
{
static __device__ void call(unsigned int tid, unsigned int offset, volatile T* optval)
{
T val = optval[tid + offset];
if (val > optval[tid]) optval[tid] = val;
//optval[tid] = max(optval[tid], optval[tid + offset]);
}
};
template <int nthreads, typename Cmp, typename T>
__global__ void opt_kernel(int cols, int rows, const PtrStep src, PtrStep optval)
{
__shared__ T soptval[nthreads];
unsigned int x0 = blockIdx.x * blockDim.x;
unsigned int y0 = blockIdx.y * blockDim.y;
unsigned int tid = threadIdx.y * blockDim.x + threadIdx.x;
if (x0 + threadIdx.x < cols && y0 + threadIdx.y < rows)
soptval[tid] = ((const T*)src.ptr(y0 + threadIdx.y))[x0 + threadIdx.x];
else
soptval[tid] = ((const T*)src.ptr(y0))[x0];
__syncthreads();
if (nthreads >= 512) if (tid < 256) { Cmp::call(tid, 256, soptval); __syncthreads(); }
if (nthreads >= 256) if (tid < 128) { Cmp::call(tid, 128, soptval); __syncthreads(); }
if (nthreads >= 128) if (tid < 64) { Cmp::call(tid, 64, soptval); __syncthreads(); }
if (tid < 32)
{
if (nthreads >= 64) Cmp::call(tid, 32, soptval);
if (nthreads >= 32) Cmp::call(tid, 16, soptval);
if (nthreads >= 16) Cmp::call(tid, 8, soptval);
if (nthreads >= 8) Cmp::call(tid, 4, soptval);
if (nthreads >= 4) Cmp::call(tid, 2, soptval);
if (nthreads >= 2) Cmp::call(tid, 1, soptval);
}
if (tid == 0) ((T*)optval.ptr(blockIdx.y))[blockIdx.x] = soptval[0];
}
template <typename T>
void min_max_caller(const DevMem2D src, double* minval, double* maxval)
{
dim3 threads(32, 8);
// Allocate memory for aux. buffers
DevMem2D minval_buf[2]; DevMem2D maxval_buf[2];
minval_buf[0].cols = divUp(src.cols, threads.x);
minval_buf[0].rows = divUp(src.rows, threads.y);
minval_buf[1].cols = divUp(minval_buf[0].cols, threads.x);
minval_buf[1].rows = divUp(minval_buf[0].rows, threads.y);
maxval_buf[0].cols = divUp(src.cols, threads.x);
maxval_buf[0].rows = divUp(src.rows, threads.y);
maxval_buf[1].cols = divUp(maxval_buf[0].cols, threads.x);
maxval_buf[1].rows = divUp(maxval_buf[0].rows, threads.y);
cudaSafeCall(cudaMallocPitch(&minval_buf[0].data, &minval_buf[0].step, minval_buf[0].cols * sizeof(T), minval_buf[0].rows));
cudaSafeCall(cudaMallocPitch(&minval_buf[1].data, &minval_buf[1].step, minval_buf[1].cols * sizeof(T), minval_buf[1].rows));
cudaSafeCall(cudaMallocPitch(&maxval_buf[0].data, &maxval_buf[0].step, maxval_buf[0].cols * sizeof(T), maxval_buf[0].rows));
cudaSafeCall(cudaMallocPitch(&maxval_buf[1].data, &maxval_buf[1].step, maxval_buf[1].cols * sizeof(T), maxval_buf[1].rows));
int curbuf = 0;
dim3 cursize(src.cols, src.rows);
dim3 grid(divUp(cursize.x, threads.x), divUp(cursize.y, threads.y));
opt_kernel<256, Cmp<T, MIN>, T><<<grid, threads>>>(cursize.x, cursize.y, src, minval_buf[curbuf]);
opt_kernel<256, Cmp<T, MAX>, T><<<grid, threads>>>(cursize.x, cursize.y, src, maxval_buf[curbuf]);
cursize = grid;
while (cursize.x > 1 || cursize.y > 1)
{
grid.x = divUp(cursize.x, threads.x);
grid.y = divUp(cursize.y, threads.y);
opt_kernel<256, Cmp<T, MIN>, T><<<grid, threads>>>(cursize.x, cursize.y, minval_buf[curbuf], minval_buf[1 - curbuf]);
opt_kernel<256, Cmp<T, MAX>, T><<<grid, threads>>>(cursize.x, cursize.y, maxval_buf[curbuf], maxval_buf[1 - curbuf]);
curbuf = 1 - curbuf;
cursize = grid;
}
cudaSafeCall(cudaThreadSynchronize());
// Copy results from device to host
T minval_, maxval_;
cudaSafeCall(cudaMemcpy(&minval_, minval_buf[curbuf].ptr(0), sizeof(T), cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(&maxval_, maxval_buf[curbuf].ptr(0), sizeof(T), cudaMemcpyDeviceToHost));
*minval = minval_;
*maxval = maxval_;
// Release aux. buffers
cudaSafeCall(cudaFree(minval_buf[0].data));
cudaSafeCall(cudaFree(minval_buf[1].data));
cudaSafeCall(cudaFree(maxval_buf[0].data));
cudaSafeCall(cudaFree(maxval_buf[1].data));
}
template void min_max_caller<unsigned char>(const DevMem2D, double*, double*);
template void min_max_caller<signed char>(const DevMem2D, double*, double*);
template void min_max_caller<unsigned short>(const DevMem2D, double*, double*);
template void min_max_caller<signed short>(const DevMem2D, double*, double*);
template void min_max_caller<int>(const DevMem2D, double*, double*);
template void min_max_caller<float>(const DevMem2D, double*, double*);
template void min_max_caller<double>(const DevMem2D, double*, double*);
}}}
tests/gpu/src/arithm.cpp
View file @
282e01cb
...
...
@@ -48,6 +48,11 @@ using namespace cv;
using
namespace
std
;
using
namespace
gpu
;
#define CHECK(pred, err) if (!(pred)) { \
ts
->
printf
(
CvTS
::
LOG
,
"Fail:
\"
%s
\"
at line: %d
\n
"
,
#
pred
,
__LINE__
);
\
ts
->
set_failed_test_info
(
err
);
\
return
;
}
class
CV_GpuArithmTest
:
public
CvTest
{
public
:
...
...
@@ -476,31 +481,6 @@ struct CV_GpuNppImageSumTest : public CV_GpuArithmTest
}
};
////////////////////////////////////////////////////////////////////////////////
// minNax
struct
CV_GpuNppImageMinNaxTest
:
public
CV_GpuArithmTest
{
CV_GpuNppImageMinNaxTest
()
:
CV_GpuArithmTest
(
"GPU-NppImageMinNax"
,
"minNax"
)
{}
int
test
(
const
Mat
&
mat1
,
const
Mat
&
)
{
if
(
mat1
.
type
()
!=
CV_8UC1
)
{
ts
->
printf
(
CvTS
::
LOG
,
"
\n
Unsupported type
\n
"
);
return
CvTS
::
OK
;
}
double
cpumin
,
cpumax
;
cv
::
minMaxLoc
(
mat1
,
&
cpumin
,
&
cpumax
);
GpuMat
gpu1
(
mat1
);
double
gpumin
,
gpumax
;
cv
::
gpu
::
minMax
(
gpu1
,
&
gpumin
,
&
gpumax
);
return
(
CheckNorm
(
cpumin
,
gpumin
)
==
CvTS
::
OK
&&
CheckNorm
(
cpumax
,
gpumax
)
==
CvTS
::
OK
)
?
CvTS
::
OK
:
CvTS
::
FAIL_GENERIC
;
}
};
////////////////////////////////////////////////////////////////////////////////
// LUT
struct
CV_GpuNppImageLUTTest
:
public
CV_GpuArithmTest
...
...
@@ -689,6 +669,67 @@ struct CV_GpuNppImagePolarToCartTest : public CV_GpuArithmTest
}
};
////////////////////////////////////////////////////////////////////////////////
// Min max
struct
CV_GpuMinMaxTest
:
public
CvTest
{
CV_GpuMinMaxTest
()
:
CvTest
(
"GPU-MinMaxTest"
,
"minMax"
)
{}
void
run
(
int
)
{
for
(
int
type
=
CV_8U
;
type
<=
CV_64F
;
++
type
)
{
int
rows
=
1
,
cols
=
3
;
test
(
rows
,
cols
,
type
);
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
int
rows
=
1
+
rand
()
%
1000
;
int
cols
=
1
+
rand
()
%
1000
;
test
(
rows
,
cols
,
type
);
}
}
}
void
test
(
int
rows
,
int
cols
,
int
type
)
{
cv
::
Mat
src
(
rows
,
cols
,
type
);
cv
::
RNG
rng
;
for
(
int
i
=
0
;
i
<
src
.
rows
;
++
i
)
{
Mat
row
(
1
,
src
.
cols
*
src
.
elemSize
(),
CV_8U
,
src
.
ptr
(
i
));
rng
.
fill
(
row
,
RNG
::
UNIFORM
,
Scalar
(
0
),
Scalar
(
255
));
}
double
minVal
,
maxVal
;
if
(
type
!=
CV_8S
)
{
cv
::
Point
minLoc
,
maxLoc
;
cv
::
minMaxLoc
(
src
,
&
minVal
,
&
maxVal
,
&
minLoc
,
&
maxLoc
);
}
else
{
// OpenCV's minMaxLoc doesn't support CV_8S type
minVal
=
std
::
numeric_limits
<
double
>::
max
();
maxVal
=
std
::
numeric_limits
<
double
>::
min
();
for
(
int
i
=
0
;
i
<
src
.
rows
;
++
i
)
for
(
int
j
=
0
;
j
<
src
.
cols
;
++
j
)
{
char
val
=
src
.
at
<
char
>
(
i
,
j
);
if
(
val
<
minVal
)
minVal
=
val
;
if
(
val
>
maxVal
)
maxVal
=
val
;
}
}
double
minVal_
,
maxVal_
;
cv
::
Point
minLoc_
,
maxLoc_
;
cv
::
gpu
::
minMax
(
cv
::
gpu
::
GpuMat
(
src
),
&
minVal_
,
&
maxVal_
);
CHECK
(
minVal
==
minVal_
,
CvTS
::
FAIL_INVALID_OUTPUT
);
CHECK
(
maxVal
==
maxVal_
,
CvTS
::
FAIL_INVALID_OUTPUT
);
}
};
/////////////////////////////////////////////////////////////////////////////
/////////////////// tests registration /////////////////////////////////////
...
...
@@ -709,7 +750,6 @@ CV_GpuNppImageMeanStdDevTest CV_GpuNppImageMeanStdDev_test;
CV_GpuNppImageNormTest
CV_GpuNppImageNorm_test
;
CV_GpuNppImageFlipTest
CV_GpuNppImageFlip_test
;
CV_GpuNppImageSumTest
CV_GpuNppImageSum_test
;
CV_GpuNppImageMinNaxTest
CV_GpuNppImageMinNax_test
;
CV_GpuNppImageLUTTest
CV_GpuNppImageLUT_test
;
CV_GpuNppImageExpTest
CV_GpuNppImageExp_test
;
CV_GpuNppImageLogTest
CV_GpuNppImageLog_test
;
...
...
@@ -717,3 +757,4 @@ CV_GpuNppImageMagnitudeTest CV_GpuNppImageMagnitude_test;
CV_GpuNppImagePhaseTest
CV_GpuNppImagePhase_test
;
CV_GpuNppImageCartToPolarTest
CV_GpuNppImageCartToPolar_test
;
CV_GpuNppImagePolarToCartTest
CV_GpuNppImagePolarToCart_test
;
CV_GpuMinMaxTest
CV_GpuMinMaxTest_test
;
tests/gpu/src/gputest_main.cpp
View file @
282e01cb
...
...
@@ -48,7 +48,6 @@ const char* blacklist[] =
"GPU-MatOperatorAsyncCall"
,
// crash
"GPU-NppImageSum"
,
// crash, probably npp bug
"GPU-NppImageMinNax"
,
// npp bug - don't find min/max near right border
"GPU-NppImageCanny"
,
// NPP_TEXTURE_BIND_ERROR
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment