Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
dfaa8af6
Commit
dfaa8af6
authored
Aug 08, 2011
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed #1279
parent
ed801d3e
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
17 additions
and
18 deletions
+17
-18
surf.cu
modules/gpu/src/cuda/surf.cu
+0
-3
utility.hpp
modules/gpu/src/opencv2/gpu/device/utility.hpp
+13
-13
test_main.cpp
modules/gpu/test/test_main.cpp
+4
-2
No files found.
modules/gpu/src/cuda/surf.cu
View file @
dfaa8af6
...
...
@@ -566,9 +566,6 @@ namespace cv { namespace gpu { namespace surf
float* s_sum_row = s_sum + threadIdx.y * 32;
//reduceSum32(s_sum_row, sumx);
//reduceSum32(s_sum_row, sumy);
warpReduce32(s_sum_row, sumx, threadIdx.x, plus<volatile float>());
warpReduce32(s_sum_row, sumy, threadIdx.x, plus<volatile float>());
...
...
modules/gpu/src/opencv2/gpu/device/utility.hpp
View file @
dfaa8af6
...
...
@@ -46,13 +46,13 @@
#include "internal_shared.hpp"
#include "saturate_cast.hpp"
#ifndef __CUDA_ARCH__
#define __CUDA_ARCH__ 0
#ifndef __CUDA_ARCH__
#define __CUDA_ARCH__ 0
#endif
#define OPENCV_GPU_LOG_WARP_SIZE (5)
#define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
#define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
#define OPENCV_GPU_LOG_WARP_SIZE (5)
#define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
#define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
#define OPENCV_GPU_MEM_BANKS (1 << OPENCV_GPU_LOG_MEM_BANKS)
#if defined(_WIN64) || defined(__LP64__)
...
...
@@ -65,15 +65,15 @@
namespace
cv
{
namespace
gpu
{
namespace
device
{
template
<
typename
T
>
void
__host__
__device__
__forceinline__
swap
(
T
&
a
,
T
&
b
)
{
T
temp
=
a
;
a
=
b
;
b
=
temp
;
template
<
typename
T
>
void
__host__
__device__
__forceinline__
swap
(
T
&
a
,
T
&
b
)
{
T
temp
=
a
;
a
=
b
;
b
=
temp
;
}
// warp-synchronous 32 elements reduction
template
<
typename
T
,
typename
Op
>
__device__
__forceinline__
void
warpReduce32
(
volatile
T
*
data
,
volatile
T
&
partial_reduction
,
int
tid
,
Op
op
)
template
<
typename
T
,
typename
Op
>
__device__
__forceinline__
void
warpReduce32
(
volatile
T
*
data
,
T
&
partial_reduction
,
int
tid
,
Op
op
)
{
data
[
tid
]
=
partial_reduction
;
...
...
@@ -88,7 +88,7 @@ namespace cv { namespace gpu { namespace device
}
// warp-synchronous 16 elements reduction
template
<
typename
T
,
typename
Op
>
__device__
__forceinline__
void
warpReduce16
(
volatile
T
*
data
,
volatile
T
&
partial_reduction
,
int
tid
,
Op
op
)
template
<
typename
T
,
typename
Op
>
__device__
__forceinline__
void
warpReduce16
(
volatile
T
*
data
,
T
&
partial_reduction
,
int
tid
,
Op
op
)
{
data
[
tid
]
=
partial_reduction
;
...
...
@@ -102,7 +102,7 @@ namespace cv { namespace gpu { namespace device
}
// warp-synchronous reduction
template
<
int
n
,
typename
T
,
typename
Op
>
__device__
__forceinline__
void
warpReduce
(
volatile
T
*
data
,
volatile
T
&
partial_reduction
,
int
tid
,
Op
op
)
template
<
int
n
,
typename
T
,
typename
Op
>
__device__
__forceinline__
void
warpReduce
(
volatile
T
*
data
,
T
&
partial_reduction
,
int
tid
,
Op
op
)
{
if
(
tid
<
n
)
data
[
tid
]
=
partial_reduction
;
...
...
modules/gpu/test/test_main.cpp
View file @
dfaa8af6
...
...
@@ -109,9 +109,11 @@ int main(int argc, char** argv)
cvtest
::
TS
::
ptr
()
->
init
(
"gpu"
);
testing
::
InitGoogleTest
(
&
argc
,
argv
);
//cv::CommandLineParser parser(argc, (const char**)argv)
;
const
char
*
keys
=
"{ nvtest_output_level | nvtest_output_level | none | NVidia test verbosity level }"
;
std
::
string
outputLevel
=
"none"
;
//parser.get<std::string>("nvtest_output_level", "none");
cv
::
CommandLineParser
parser
(
argc
,
(
const
char
**
)
argv
,
keys
);
std
::
string
outputLevel
=
parser
.
get
<
std
::
string
>
(
"nvtest_output_level"
,
"none"
);
if
(
outputLevel
==
"none"
)
nvidiaTestOutputLevel
=
OutputLevelNone
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment