Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
8a799aa8
Commit
8a799aa8
authored
Oct 03, 2011
by
Alexey Spizhevoy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Updated optimal block size estimation for the convolve() function
parent
bee68e51
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
37 additions
and
16 deletions
+37
-16
perf_imgproc.cpp
modules/gpu/perf/perf_imgproc.cpp
+7
-5
perf_utility.hpp
modules/gpu/perf/perf_utility.hpp
+1
-0
imgproc.cpp
modules/gpu/src/imgproc.cpp
+8
-3
performance.cpp
samples/gpu/performance/performance.cpp
+14
-7
performance.h
samples/gpu/performance/performance.h
+7
-1
No files found.
modules/gpu/perf/perf_imgproc.cpp
View file @
8a799aa8
...
@@ -735,16 +735,18 @@ PERF_TEST_P(DevInfo_Size, dft, testing::Combine(testing::ValuesIn(devices()),
...
@@ -735,16 +735,18 @@ PERF_TEST_P(DevInfo_Size, dft, testing::Combine(testing::ValuesIn(devices()),
SANITY_CHECK
(
dst_host
);
SANITY_CHECK
(
dst_host
);
}
}
PERF_TEST_P
(
DevInfo_Size
,
convolve
,
testing
::
Combine
(
testing
::
ValuesIn
(
devices
()),
PERF_TEST_P
(
DevInfo_Int_Int
,
convolve
,
testing
::
Combine
(
testing
::
ValuesIn
(
devices
()),
testing
::
Values
(
GPU_TYPICAL_MAT_SIZES
)))
testing
::
Values
(
512
,
1024
,
1536
,
2048
,
2560
,
3072
,
3584
),
testing
::
Values
(
27
,
32
,
64
)))
{
{
DeviceInfo
devInfo
=
std
::
tr1
::
get
<
0
>
(
GetParam
());
DeviceInfo
devInfo
=
std
::
tr1
::
get
<
0
>
(
GetParam
());
Size
size
=
std
::
tr1
::
get
<
1
>
(
GetParam
());
int
image_size
=
std
::
tr1
::
get
<
1
>
(
GetParam
());
int
templ_size
=
std
::
tr1
::
get
<
2
>
(
GetParam
());
setDevice
(
devInfo
.
deviceID
());
setDevice
(
devInfo
.
deviceID
());
Mat
image_host
(
size
,
CV_32FC1
);
Mat
image_host
(
image_size
,
image_
size
,
CV_32FC1
);
Mat
templ_host
(
size
,
CV_32FC1
);
Mat
templ_host
(
templ_size
,
templ_
size
,
CV_32FC1
);
declare
.
in
(
image_host
,
templ_host
,
WARMUP_RNG
);
declare
.
in
(
image_host
,
templ_host
,
WARMUP_RNG
);
...
...
modules/gpu/perf/perf_utility.hpp
View file @
8a799aa8
...
@@ -32,6 +32,7 @@ struct CvtColorInfo
...
@@ -32,6 +32,7 @@ struct CvtColorInfo
typedef
TestBaseWithParam
<
DeviceInfo
>
DevInfo
;
typedef
TestBaseWithParam
<
DeviceInfo
>
DevInfo
;
typedef
TestBaseWithParam
<
std
::
tr1
::
tuple
<
DeviceInfo
,
Size
>
>
DevInfo_Size
;
typedef
TestBaseWithParam
<
std
::
tr1
::
tuple
<
DeviceInfo
,
Size
>
>
DevInfo_Size
;
typedef
TestBaseWithParam
<
std
::
tr1
::
tuple
<
DeviceInfo
,
int
,
int
>
>
DevInfo_Int_Int
;
typedef
TestBaseWithParam
<
std
::
tr1
::
tuple
<
DeviceInfo
,
MatType
>
>
DevInfo_MatType
;
typedef
TestBaseWithParam
<
std
::
tr1
::
tuple
<
DeviceInfo
,
MatType
>
>
DevInfo_MatType
;
typedef
TestBaseWithParam
<
std
::
tr1
::
tuple
<
DeviceInfo
,
Size
,
MatType
>
>
DevInfo_Size_MatType
;
typedef
TestBaseWithParam
<
std
::
tr1
::
tuple
<
DeviceInfo
,
Size
,
MatType
>
>
DevInfo_Size_MatType
;
typedef
TestBaseWithParam
<
std
::
tr1
::
tuple
<
DeviceInfo
,
Size
,
MatType
,
MatType
>
>
DevInfo_Size_MatType_MatType
;
typedef
TestBaseWithParam
<
std
::
tr1
::
tuple
<
DeviceInfo
,
Size
,
MatType
,
MatType
>
>
DevInfo_Size_MatType_MatType
;
...
...
modules/gpu/src/imgproc.cpp
View file @
8a799aa8
...
@@ -1546,18 +1546,23 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
...
@@ -1546,18 +1546,23 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
Size
cv
::
gpu
::
ConvolveBuf
::
estimateBlockSize
(
Size
result_size
,
Size
templ_size
)
Size
cv
::
gpu
::
ConvolveBuf
::
estimateBlockSize
(
Size
result_size
,
Size
templ_size
)
{
{
int
scale
=
40
;
int
scale
=
40
;
Size
bsize_min
(
1024
,
1024
);
Size
bsize_min
(
512
,
512
);
// Check whether we use Fermi generation or newer GPU
// Check whether we use Fermi generation or newer GPU
if
(
DeviceInfo
().
majorVersion
()
>=
2
)
if
(
DeviceInfo
().
majorVersion
()
>=
2
)
{
{
bsize_min
.
width
=
2048
;
bsize_min
.
width
=
1024
;
bsize_min
.
height
=
2048
;
bsize_min
.
height
=
1024
;
}
}
Size
bsize
(
std
::
max
(
templ_size
.
width
*
scale
,
bsize_min
.
width
),
Size
bsize
(
std
::
max
(
templ_size
.
width
*
scale
,
bsize_min
.
width
),
std
::
max
(
templ_size
.
height
*
scale
,
bsize_min
.
height
));
std
::
max
(
templ_size
.
height
*
scale
,
bsize_min
.
height
));
int
blocks_per_row
=
(
result_size
.
width
+
bsize
.
width
-
1
)
/
bsize
.
width
;
int
blocks_per_col
=
(
result_size
.
height
+
bsize
.
height
-
1
)
/
bsize
.
height
;
bsize
.
width
=
(
result_size
.
width
+
blocks_per_row
-
1
)
/
blocks_per_row
;
bsize
.
height
=
(
result_size
.
height
+
blocks_per_col
-
1
)
/
blocks_per_col
;
bsize
.
width
=
std
::
min
(
bsize
.
width
,
result_size
.
width
);
bsize
.
width
=
std
::
min
(
bsize
.
width
,
result_size
.
width
);
bsize
.
height
=
std
::
min
(
bsize
.
height
,
result_size
.
height
);
bsize
.
height
=
std
::
min
(
bsize
.
height
,
result_size
.
height
);
return
bsize
;
return
bsize
;
...
...
samples/gpu/performance/performance.cpp
View file @
8a799aa8
...
@@ -8,9 +8,15 @@ using namespace cv;
...
@@ -8,9 +8,15 @@ using namespace cv;
void
TestSystem
::
run
()
void
TestSystem
::
run
()
{
{
// Run test initializers
if
(
is_list_mode_
)
vector
<
Runnable
*>::
iterator
it
=
inits_
.
begin
();
{
for
(;
it
!=
inits_
.
end
();
++
it
)
for
(
vector
<
Runnable
*>::
iterator
it
=
tests_
.
begin
();
it
!=
tests_
.
end
();
++
it
)
cout
<<
(
*
it
)
->
name
()
<<
endl
;
return
;
}
// Run test initializers
for
(
vector
<
Runnable
*>::
iterator
it
=
inits_
.
begin
();
it
!=
inits_
.
end
();
++
it
)
{
{
if
((
*
it
)
->
name
().
find
(
test_filter_
,
0
)
!=
string
::
npos
)
if
((
*
it
)
->
name
().
find
(
test_filter_
,
0
)
!=
string
::
npos
)
(
*
it
)
->
run
();
(
*
it
)
->
run
();
...
@@ -19,8 +25,7 @@ void TestSystem::run()
...
@@ -19,8 +25,7 @@ void TestSystem::run()
printHeading
();
printHeading
();
// Run tests
// Run tests
it
=
tests_
.
begin
();
for
(
vector
<
Runnable
*>::
iterator
it
=
tests_
.
begin
();
it
!=
tests_
.
end
();
++
it
)
for
(;
it
!=
tests_
.
end
();
++
it
)
{
{
try
try
{
{
...
@@ -145,13 +150,15 @@ int main(int argc, char** argv)
...
@@ -145,13 +150,15 @@ int main(int argc, char** argv)
string
key
=
argv
[
i
];
string
key
=
argv
[
i
];
if
(
key
==
"--help"
)
if
(
key
==
"--help"
)
{
{
cout
<<
"Usage: performance_gpu [--
filter <test_filter>] [--working-
dir <working_dir_with_slash>]
\n
"
;
cout
<<
"Usage: performance_gpu [--
ls] [--filter <test_filter>] [--work
dir <working_dir_with_slash>]
\n
"
;
return
0
;
return
0
;
}
}
if
(
key
==
"--filter"
&&
i
+
1
<
argc
)
if
(
key
==
"--filter"
&&
i
+
1
<
argc
)
TestSystem
::
instance
().
setTestFilter
(
argv
[
++
i
]);
TestSystem
::
instance
().
setTestFilter
(
argv
[
++
i
]);
else
if
(
key
==
"--work
ing-
dir"
&&
i
+
1
<
argc
)
else
if
(
key
==
"--workdir"
&&
i
+
1
<
argc
)
TestSystem
::
instance
().
setWorkingDir
(
argv
[
++
i
]);
TestSystem
::
instance
().
setWorkingDir
(
argv
[
++
i
]);
else
if
(
key
==
"--ls"
)
TestSystem
::
instance
().
setListMode
(
true
);
else
else
{
{
cout
<<
"Unknown parameter: '"
<<
key
<<
"'"
<<
endl
;
cout
<<
"Unknown parameter: '"
<<
key
<<
"'"
<<
endl
;
...
...
samples/gpu/performance/performance.h
View file @
8a799aa8
...
@@ -68,10 +68,14 @@ public:
...
@@ -68,10 +68,14 @@ public:
cur_subtest_is_empty_
=
false
;
cur_subtest_is_empty_
=
false
;
}
}
bool
isListMode
()
const
{
return
is_list_mode_
;
}
void
setListMode
(
bool
value
)
{
is_list_mode_
=
value
;
}
private
:
private
:
TestSystem
()
:
cur_subtest_is_empty_
(
true
),
cpu_elapsed_
(
0
),
TestSystem
()
:
cur_subtest_is_empty_
(
true
),
cpu_elapsed_
(
0
),
gpu_elapsed_
(
0
),
speedup_total_
(
0
.
0
),
gpu_elapsed_
(
0
),
speedup_total_
(
0
.
0
),
num_subtests_called_
(
0
)
{}
num_subtests_called_
(
0
),
is_list_mode_
(
false
)
{}
void
finishCurrentSubtest
();
void
finishCurrentSubtest
();
void
resetCurrentSubtest
()
void
resetCurrentSubtest
()
...
@@ -100,6 +104,8 @@ private:
...
@@ -100,6 +104,8 @@ private:
double
speedup_total_
;
double
speedup_total_
;
int
num_subtests_called_
;
int
num_subtests_called_
;
bool
is_list_mode_
;
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment