Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
fa11f04a
Commit
fa11f04a
authored
Oct 11, 2013
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Oct 11, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1605 from alalek:ocl_fixes
parents
c0265c60
3b0823db
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
110 additions
and
950 deletions
+110
-950
surf.ocl.cpp
modules/nonfree/src/surf.ocl.cpp
+4
-16
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+5
-3
util.hpp
modules/ocl/include/opencv2/ocl/private/util.hpp
+25
-27
cl_context.cpp
modules/ocl/src/cl_context.cpp
+28
-16
cl_programcache.cpp
modules/ocl/src/cl_programcache.cpp
+16
-11
fft.cpp
modules/ocl/src/fft.cpp
+6
-7
gemm.cpp
modules/ocl/src/gemm.cpp
+2
-3
gftt.cpp
modules/ocl/src/gftt.cpp
+0
-2
mcwutil.cpp
modules/ocl/src/mcwutil.cpp
+1
-31
pyrlk_no_image.cl
modules/ocl/src/opencl/pyrlk_no_image.cl
+0
-764
pyrlk.cpp
modules/ocl/src/pyrlk.cpp
+23
-67
tvl1flow.cpp
modules/ocl/src/tvl1flow.cpp
+0
-3
No files found.
modules/nonfree/src/surf.ocl.cpp
View file @
fa11f04a
...
@@ -55,20 +55,11 @@ namespace cv
...
@@ -55,20 +55,11 @@ namespace cv
{
{
namespace
ocl
namespace
ocl
{
{
static
const
char
noImage2dOption
[]
=
"-D DISABLE_IMAGE2D"
;
static
bool
use_image2d
=
false
;
static
void
openCLExecuteKernelSURF
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
static
void
openCLExecuteKernelSURF
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
)
size_t
localThreads
[
3
],
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
)
{
{
char
optBuf
[
100
]
=
{
0
};
char
optBuf
[
100
]
=
{
0
};
char
*
optBufPtr
=
optBuf
;
char
*
optBufPtr
=
optBuf
;
if
(
!
use_image2d
)
{
strcat
(
optBufPtr
,
noImage2dOption
);
optBufPtr
+=
strlen
(
noImage2dOption
);
}
cl_kernel
kernel
;
cl_kernel
kernel
;
kernel
=
openCLGetKernelFromSource
(
clCxt
,
source
,
kernelName
,
optBufPtr
);
kernel
=
openCLGetKernelFromSource
(
clCxt
,
source
,
kernelName
,
optBufPtr
);
size_t
wave_size
=
queryWaveFrontSize
(
kernel
);
size_t
wave_size
=
queryWaveFrontSize
(
kernel
);
...
@@ -149,13 +140,10 @@ public:
...
@@ -149,13 +140,10 @@ public:
counters
.
setTo
(
Scalar
::
all
(
0
));
counters
.
setTo
(
Scalar
::
all
(
0
));
integral
(
img
,
surf_
.
sum
);
integral
(
img
,
surf_
.
sum
);
use_image2d
=
support_image2d
();
if
(
use_image2d
)
bindImgTex
(
img
,
imgTex
);
{
bindImgTex
(
surf_
.
sum
,
sumTex
);
bindImgTex
(
img
,
imgTex
);
finish
();
bindImgTex
(
surf_
.
sum
,
sumTex
);
finish
();
}
maskSumTex
=
0
;
maskSumTex
=
0
;
...
...
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
fa11f04a
...
@@ -187,9 +187,9 @@ namespace cv
...
@@ -187,9 +187,9 @@ namespace cv
return
Context
::
getContext
()
->
getOpenCLCommandQueuePtr
();
return
Context
::
getContext
()
->
getOpenCLCommandQueuePtr
();
}
}
bool
CV_EXPORTS
supportsFeature
(
FEATURE_TYPE
featureType
);
CV_EXPORTS
bool
supportsFeature
(
FEATURE_TYPE
featureType
);
void
CV_EXPORTS
finish
();
CV_EXPORTS
void
finish
();
enum
BINARY_CACHE_MODE
enum
BINARY_CACHE_MODE
{
{
...
@@ -1739,7 +1739,7 @@ namespace cv
...
@@ -1739,7 +1739,7 @@ namespace cv
// output -
// output -
// keys = {1, 2, 3} (CV_8UC1)
// keys = {1, 2, 3} (CV_8UC1)
// values = {6,2, 10,5, 4,3} (CV_8UC2)
// values = {6,2, 10,5, 4,3} (CV_8UC2)
void
CV_EXPORTS
sortByKey
(
oclMat
&
keys
,
oclMat
&
values
,
int
method
,
bool
isGreaterThan
=
false
);
CV_EXPORTS
void
sortByKey
(
oclMat
&
keys
,
oclMat
&
values
,
int
method
,
bool
isGreaterThan
=
false
);
/*!Base class for MOG and MOG2!*/
/*!Base class for MOG and MOG2!*/
class
CV_EXPORTS
BackgroundSubtractor
class
CV_EXPORTS
BackgroundSubtractor
{
{
...
@@ -1938,6 +1938,7 @@ namespace cv
...
@@ -1938,6 +1938,7 @@ namespace cv
private
:
private
:
oclMat
samples_ocl
;
oclMat
samples_ocl
;
};
};
/*!*************** SVM *************!*/
/*!*************** SVM *************!*/
class
CV_EXPORTS
CvSVM_OCL
:
public
CvSVM
class
CV_EXPORTS
CvSVM_OCL
:
public
CvSVM
{
{
...
@@ -1957,6 +1958,7 @@ namespace cv
...
@@ -1957,6 +1958,7 @@ namespace cv
void
create_kernel
();
void
create_kernel
();
void
create_solver
();
void
create_solver
();
};
};
/*!*************** END *************!*/
/*!*************** END *************!*/
}
}
}
}
...
...
modules/ocl/include/opencv2/ocl/private/util.hpp
View file @
fa11f04a
...
@@ -77,6 +77,8 @@ inline cl_command_queue getClCommandQueue(const Context *ctx)
...
@@ -77,6 +77,8 @@ inline cl_command_queue getClCommandQueue(const Context *ctx)
return
*
(
cl_command_queue
*
)(
ctx
->
getOpenCLCommandQueuePtr
());
return
*
(
cl_command_queue
*
)(
ctx
->
getOpenCLCommandQueuePtr
());
}
}
CV_EXPORTS
cv
::
Mutex
&
getInitializationMutex
();
enum
openCLMemcpyKind
enum
openCLMemcpyKind
{
{
clMemcpyHostToDevice
=
0
,
clMemcpyHostToDevice
=
0
,
...
@@ -84,39 +86,39 @@ enum openCLMemcpyKind
...
@@ -84,39 +86,39 @@ enum openCLMemcpyKind
clMemcpyDeviceToDevice
clMemcpyDeviceToDevice
};
};
///////////////////////////OpenCL call wrappers////////////////////////////
///////////////////////////OpenCL call wrappers////////////////////////////
void
CV_EXPORTS
openCLMallocPitch
(
Context
*
clCxt
,
void
**
dev_ptr
,
size_t
*
pitch
,
CV_EXPORTS
void
openCLMallocPitch
(
Context
*
clCxt
,
void
**
dev_ptr
,
size_t
*
pitch
,
size_t
widthInBytes
,
size_t
height
);
size_t
widthInBytes
,
size_t
height
);
void
CV_EXPORTS
openCLMallocPitchEx
(
Context
*
clCxt
,
void
**
dev_ptr
,
size_t
*
pitch
,
CV_EXPORTS
void
openCLMallocPitchEx
(
Context
*
clCxt
,
void
**
dev_ptr
,
size_t
*
pitch
,
size_t
widthInBytes
,
size_t
height
,
DevMemRW
rw_type
,
DevMemType
mem_type
);
size_t
widthInBytes
,
size_t
height
,
DevMemRW
rw_type
,
DevMemType
mem_type
);
void
CV_EXPORTS
openCLMemcpy2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
CV_EXPORTS
void
openCLMemcpy2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
const
void
*
src
,
size_t
spitch
,
const
void
*
src
,
size_t
spitch
,
size_t
width
,
size_t
height
,
openCLMemcpyKind
kind
,
int
channels
=
-
1
);
size_t
width
,
size_t
height
,
openCLMemcpyKind
kind
,
int
channels
=
-
1
);
void
CV_EXPORTS
openCLCopyBuffer2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
int
dst_offset
,
CV_EXPORTS
void
openCLCopyBuffer2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
int
dst_offset
,
const
void
*
src
,
size_t
spitch
,
const
void
*
src
,
size_t
spitch
,
size_t
width
,
size_t
height
,
int
src_offset
);
size_t
width
,
size_t
height
,
int
src_offset
);
void
CV_EXPORTS
openCLFree
(
void
*
devPtr
);
CV_EXPORTS
void
openCLFree
(
void
*
devPtr
);
cl_mem
CV_EXPORTS
openCLCreateBuffer
(
Context
*
clCxt
,
size_t
flag
,
size_t
size
);
CV_EXPORTS
cl_mem
openCLCreateBuffer
(
Context
*
clCxt
,
size_t
flag
,
size_t
size
);
void
CV_EXPORTS
openCLReadBuffer
(
Context
*
clCxt
,
cl_mem
dst_buffer
,
void
*
host_buffer
,
size_t
size
);
CV_EXPORTS
void
openCLReadBuffer
(
Context
*
clCxt
,
cl_mem
dst_buffer
,
void
*
host_buffer
,
size_t
size
);
cl_kernel
CV_EXPORTS
openCLGetKernelFromSource
(
const
Context
*
clCxt
,
CV_EXPORTS
cl_kernel
openCLGetKernelFromSource
(
const
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
);
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
);
cl_kernel
CV_EXPORTS
openCLGetKernelFromSource
(
const
Context
*
clCxt
,
CV_EXPORTS
cl_kernel
openCLGetKernelFromSource
(
const
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
const
char
*
build_options
);
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
const
char
*
build_options
);
void
CV_EXPORTS
openCLVerifyKernel
(
const
Context
*
clCxt
,
cl_kernel
kernel
,
size_t
*
localThreads
);
CV_EXPORTS
void
openCLVerifyKernel
(
const
Context
*
clCxt
,
cl_kernel
kernel
,
size_t
*
localThreads
);
void
CV_EXPORTS
openCLExecuteKernel
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
string
kernelName
,
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
CV_EXPORTS
void
openCLExecuteKernel
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
string
kernelName
,
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
globalcols
,
int
globalrows
,
size_t
blockSize
=
16
,
int
kernel_expand_depth
=
-
1
,
int
kernel_expand_channel
=
-
1
);
int
globalcols
,
int
globalrows
,
size_t
blockSize
=
16
,
int
kernel_expand_depth
=
-
1
,
int
kernel_expand_channel
=
-
1
);
void
CV_EXPORTS
openCLExecuteKernel_
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
CV_EXPORTS
void
openCLExecuteKernel_
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
,
const
char
*
build_options
);
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
,
const
char
*
build_options
);
void
CV_EXPORTS
openCLExecuteKernel
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
size_t
globalThreads
[
3
],
CV_EXPORTS
void
openCLExecuteKernel
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
);
size_t
localThreads
[
3
],
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
);
void
CV_EXPORTS
openCLExecuteKernel
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
size_t
globalThreads
[
3
],
CV_EXPORTS
void
openCLExecuteKernel
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
size_t
localThreads
[
3
],
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
,
const
char
*
build_options
);
int
depth
,
const
char
*
build_options
);
cl_mem
CV_EXPORTS
load_constant
(
cl_context
context
,
cl_command_queue
command_queue
,
const
void
*
value
,
CV_EXPORTS
cl_mem
load_constant
(
cl_context
context
,
cl_command_queue
command_queue
,
const
void
*
value
,
const
size_t
size
);
const
size_t
size
);
cl_mem
CV_EXPORTS
openCLMalloc
(
cl_context
clCxt
,
size_t
size
,
cl_mem_flags
flags
,
void
*
host_ptr
);
CV_EXPORTS
cl_mem
openCLMalloc
(
cl_context
clCxt
,
size_t
size
,
cl_mem_flags
flags
,
void
*
host_ptr
);
enum
FLUSH_MODE
enum
FLUSH_MODE
{
{
...
@@ -125,9 +127,9 @@ enum FLUSH_MODE
...
@@ -125,9 +127,9 @@ enum FLUSH_MODE
DISABLE
DISABLE
};
};
void
CV_EXPORTS
openCLExecuteKernel2
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
size_t
globalThreads
[
3
],
CV_EXPORTS
void
openCLExecuteKernel2
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
,
FLUSH_MODE
finish_mode
=
DISABLE
);
size_t
localThreads
[
3
],
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
,
FLUSH_MODE
finish_mode
=
DISABLE
);
void
CV_EXPORTS
openCLExecuteKernel2
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
size_t
globalThreads
[
3
],
CV_EXPORTS
void
openCLExecuteKernel2
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
size_t
localThreads
[
3
],
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
,
char
*
build_options
,
FLUSH_MODE
finish_mode
=
DISABLE
);
int
depth
,
char
*
build_options
,
FLUSH_MODE
finish_mode
=
DISABLE
);
...
@@ -135,8 +137,8 @@ void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry
...
@@ -135,8 +137,8 @@ void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry
// note:
// note:
// 1. there is no memory management. User need to explicitly release the resource
// 1. there is no memory management. User need to explicitly release the resource
// 2. for faster clamping, there is no buffer padding for the constructed texture
// 2. for faster clamping, there is no buffer padding for the constructed texture
cl_mem
CV_EXPORTS
bindTexture
(
const
oclMat
&
mat
);
CV_EXPORTS
cl_mem
bindTexture
(
const
oclMat
&
mat
);
void
CV_EXPORTS
releaseTexture
(
cl_mem
&
texture
);
CV_EXPORTS
void
releaseTexture
(
cl_mem
&
texture
);
//Represents an image texture object
//Represents an image texture object
class
CV_EXPORTS
TextureCL
class
CV_EXPORTS
TextureCL
...
@@ -163,15 +165,11 @@ private:
...
@@ -163,15 +165,11 @@ private:
// bind oclMat to OpenCL image textures and retunrs an TextureCL object
// bind oclMat to OpenCL image textures and retunrs an TextureCL object
// note:
// note:
// for faster clamping, there is no buffer padding for the constructed texture
// for faster clamping, there is no buffer padding for the constructed texture
Ptr
<
TextureCL
>
CV_EXPORTS
bindTexturePtr
(
const
oclMat
&
mat
);
CV_EXPORTS
Ptr
<
TextureCL
>
bindTexturePtr
(
const
oclMat
&
mat
);
// returns whether the current context supports image2d_t format or not
bool
CV_EXPORTS
support_image2d
(
Context
*
clCxt
=
Context
::
getContext
());
bool
CV_EXPORTS
isCpuDevice
();
size_t
CV_EXPORTS
queryWaveFrontSize
(
cl_kernel
kernel
);
CV_EXPORTS
bool
isCpuDevice
(
);
CV_EXPORTS
size_t
queryWaveFrontSize
(
cl_kernel
kernel
);
inline
size_t
divUp
(
size_t
total
,
size_t
grain
)
inline
size_t
divUp
(
size_t
total
,
size_t
grain
)
...
...
modules/ocl/src/cl_context.cpp
View file @
fa11f04a
...
@@ -55,6 +55,21 @@
...
@@ -55,6 +55,21 @@
namespace
cv
{
namespace
cv
{
namespace
ocl
{
namespace
ocl
{
struct
__Module
{
__Module
();
~
__Module
();
cv
::
Mutex
initializationMutex
;
cv
::
Mutex
currentContextMutex
;
};
static
__Module
__module
;
cv
::
Mutex
&
getInitializationMutex
()
{
return
__module
.
initializationMutex
;
}
struct
PlatformInfoImpl
struct
PlatformInfoImpl
{
{
cl_platform_id
platform_id
;
cl_platform_id
platform_id
;
...
@@ -312,7 +327,6 @@ not_found:
...
@@ -312,7 +327,6 @@ not_found:
return
false
;
return
false
;
}
}
static
cv
::
Mutex
__initializedMutex
;
static
bool
__initialized
=
false
;
static
bool
__initialized
=
false
;
static
int
initializeOpenCLDevices
()
static
int
initializeOpenCLDevices
()
{
{
...
@@ -499,7 +513,6 @@ private:
...
@@ -499,7 +513,6 @@ private:
ContextImpl
&
operator
=
(
const
ContextImpl
&
);
// disabled
ContextImpl
&
operator
=
(
const
ContextImpl
&
);
// disabled
};
};
static
cv
::
Mutex
currentContextMutex
;
static
ContextImpl
*
currentContext
=
NULL
;
static
ContextImpl
*
currentContext
=
NULL
;
Context
*
Context
::
getContext
()
Context
*
Context
::
getContext
()
...
@@ -508,7 +521,7 @@ Context* Context::getContext()
...
@@ -508,7 +521,7 @@ Context* Context::getContext()
{
{
if
(
!
__initialized
||
!
__deviceSelected
)
if
(
!
__initialized
||
!
__deviceSelected
)
{
{
cv
::
AutoLock
lock
(
__initializedMutex
);
cv
::
AutoLock
lock
(
getInitializationMutex
()
);
if
(
!
__initialized
)
if
(
!
__initialized
)
{
{
if
(
initializeOpenCLDevices
()
==
0
)
if
(
initializeOpenCLDevices
()
==
0
)
...
@@ -604,7 +617,7 @@ void ContextImpl::cleanupContext(void)
...
@@ -604,7 +617,7 @@ void ContextImpl::cleanupContext(void)
fft_teardown
();
fft_teardown
();
clBlasTeardown
();
clBlasTeardown
();
cv
::
AutoLock
lock
(
currentContextMutex
);
cv
::
AutoLock
lock
(
__module
.
currentContextMutex
);
if
(
currentContext
)
if
(
currentContext
)
delete
currentContext
;
delete
currentContext
;
currentContext
=
NULL
;
currentContext
=
NULL
;
...
@@ -615,7 +628,7 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo)
...
@@ -615,7 +628,7 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo)
CV_Assert
(
deviceInfo
->
_id
>=
0
&&
deviceInfo
->
_id
<
(
int
)
global_devices
.
size
());
CV_Assert
(
deviceInfo
->
_id
>=
0
&&
deviceInfo
->
_id
<
(
int
)
global_devices
.
size
());
{
{
cv
::
AutoLock
lock
(
currentContextMutex
);
cv
::
AutoLock
lock
(
__module
.
currentContextMutex
);
if
(
currentContext
)
if
(
currentContext
)
{
{
if
(
currentContext
->
deviceInfo
.
_id
==
deviceInfo
->
_id
)
if
(
currentContext
->
deviceInfo
.
_id
==
deviceInfo
->
_id
)
...
@@ -640,7 +653,7 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo)
...
@@ -640,7 +653,7 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo)
ContextImpl
*
old
=
NULL
;
ContextImpl
*
old
=
NULL
;
{
{
cv
::
AutoLock
lock
(
currentContextMutex
);
cv
::
AutoLock
lock
(
__module
.
currentContextMutex
);
old
=
currentContext
;
old
=
currentContext
;
currentContext
=
ctx
;
currentContext
=
ctx
;
}
}
...
@@ -724,20 +737,19 @@ bool supportsFeature(FEATURE_TYPE featureType)
...
@@ -724,20 +737,19 @@ bool supportsFeature(FEATURE_TYPE featureType)
return
Context
::
getContext
()
->
supportsFeature
(
featureType
);
return
Context
::
getContext
()
->
supportsFeature
(
featureType
);
}
}
struct
__Module
__Module
::
__Module
()
{
/* moved to Context::getContext(): initializeOpenCLDevices(); */
}
__Module
::~
__Module
()
{
{
__Module
()
{
/* moved to Context::getContext(): initializeOpenCLDevices(); */
}
~
__Module
()
{
#if defined(WIN32) && defined(CVAPI_EXPORTS)
#if defined(WIN32) && defined(CVAPI_EXPORTS)
// nothing, see DllMain
// nothing, see DllMain
#else
#else
ContextImpl
::
cleanupContext
();
ContextImpl
::
cleanupContext
();
#endif
#endif
}
}
};
static
__Module
__module
;
}
// namespace ocl
}
// namespace ocl
}
// namespace cv
}
// namespace cv
...
...
modules/ocl/src/cl_programcache.cpp
View file @
fa11f04a
...
@@ -448,26 +448,30 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn
...
@@ -448,26 +448,30 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn
{
{
stringstream
src_sign
;
stringstream
src_sign
;
src_sign
<<
source
->
name
;
if
(
source
->
name
)
src_sign
<<
getClContext
(
ctx
);
if
(
NULL
!=
build_options
)
{
{
src_sign
<<
"_"
<<
build_options
;
src_sign
<<
source
->
name
;
}
src_sign
<<
getClContext
(
ctx
);
if
(
NULL
!=
build_options
)
{
src_sign
<<
"_"
<<
build_options
;
}
{
cv
::
AutoLock
lockCache
(
mutexCache
);
cl_program
program
=
ProgramCache
::
getProgramCache
()
->
progLookup
(
src_sign
.
str
());
if
(
!!
program
)
{
{
clRetainProgram
(
program
);
cv
::
AutoLock
lockCache
(
mutexCache
);
return
program
;
cl_program
program
=
ProgramCache
::
getProgramCache
()
->
progLookup
(
src_sign
.
str
());
if
(
!!
program
)
{
clRetainProgram
(
program
);
return
program
;
}
}
}
}
}
cv
::
AutoLock
lockCache
(
mutexFiles
);
cv
::
AutoLock
lockCache
(
mutexFiles
);
// second check
// second check
if
(
source
->
name
)
{
{
cv
::
AutoLock
lockCache
(
mutexCache
);
cv
::
AutoLock
lockCache
(
mutexCache
);
cl_program
program
=
ProgramCache
::
getProgramCache
()
->
progLookup
(
src_sign
.
str
());
cl_program
program
=
ProgramCache
::
getProgramCache
()
->
progLookup
(
src_sign
.
str
());
...
@@ -493,6 +497,7 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn
...
@@ -493,6 +497,7 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn
cl_program
program
=
programFileCache
.
getOrBuildProgram
(
ctx
,
source
,
all_build_options
);
cl_program
program
=
programFileCache
.
getOrBuildProgram
(
ctx
,
source
,
all_build_options
);
//Cache the binary for future use if build_options is null
//Cache the binary for future use if build_options is null
if
(
source
->
name
)
{
{
cv
::
AutoLock
lockCache
(
mutexCache
);
cv
::
AutoLock
lockCache
(
mutexCache
);
this
->
addProgram
(
src_sign
.
str
(),
program
);
this
->
addProgram
(
src_sign
.
str
(),
program
);
...
...
modules/ocl/src/fft.cpp
View file @
fa11f04a
...
@@ -90,8 +90,7 @@ namespace cv
...
@@ -90,8 +90,7 @@ namespace cv
protected
:
protected
:
PlanCache
();
PlanCache
();
~
PlanCache
();
~
PlanCache
();
friend
class
auto_ptr
<
PlanCache
>
;
static
PlanCache
*
planCache
;
static
auto_ptr
<
PlanCache
>
planCache
;
bool
started
;
bool
started
;
vector
<
FftPlan
*>
planStore
;
vector
<
FftPlan
*>
planStore
;
...
@@ -102,9 +101,9 @@ namespace cv
...
@@ -102,9 +101,9 @@ namespace cv
static
PlanCache
*
getPlanCache
()
static
PlanCache
*
getPlanCache
()
{
{
if
(
NULL
==
planCache
.
get
()
)
if
(
NULL
==
planCache
)
planCache
.
reset
(
new
PlanCache
()
);
planCache
=
new
PlanCache
(
);
return
planCache
.
get
()
;
return
planCache
;
}
}
// return a baked plan->
// return a baked plan->
// if there is one matched plan, return it
// if there is one matched plan, return it
...
@@ -118,7 +117,7 @@ namespace cv
...
@@ -118,7 +117,7 @@ namespace cv
};
};
}
}
}
}
auto_ptr
<
PlanCache
>
PlanCache
::
planCache
;
PlanCache
*
PlanCache
::
planCache
=
NULL
;
void
cv
::
ocl
::
fft_setup
()
void
cv
::
ocl
::
fft_setup
()
{
{
...
@@ -138,13 +137,13 @@ void cv::ocl::fft_teardown()
...
@@ -138,13 +137,13 @@ void cv::ocl::fft_teardown()
{
{
return
;
return
;
}
}
delete
pCache
.
setupData
;
for
(
size_t
i
=
0
;
i
<
pCache
.
planStore
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
pCache
.
planStore
.
size
();
i
++
)
{
{
delete
pCache
.
planStore
[
i
];
delete
pCache
.
planStore
[
i
];
}
}
pCache
.
planStore
.
clear
();
pCache
.
planStore
.
clear
();
openCLSafeCall
(
clAmdFftTeardown
(
)
);
openCLSafeCall
(
clAmdFftTeardown
(
)
);
delete
pCache
.
setupData
;
pCache
.
setupData
=
NULL
;
pCache
.
started
=
false
;
pCache
.
started
=
false
;
}
}
...
...
modules/ocl/src/gemm.cpp
View file @
fa11f04a
...
@@ -76,13 +76,12 @@ void cv::ocl::clBlasTeardown()
...
@@ -76,13 +76,12 @@ void cv::ocl::clBlasTeardown()
using
namespace
cv
;
using
namespace
cv
;
static
bool
clBlasInitialized
=
false
;
static
bool
clBlasInitialized
=
false
;
static
Mutex
cs
;
void
cv
::
ocl
::
clBlasSetup
()
void
cv
::
ocl
::
clBlasSetup
()
{
{
if
(
!
clBlasInitialized
)
if
(
!
clBlasInitialized
)
{
{
AutoLock
al
(
cs
);
AutoLock
lock
(
getInitializationMutex
()
);
if
(
!
clBlasInitialized
)
if
(
!
clBlasInitialized
)
{
{
openCLSafeCall
(
clAmdBlasSetup
());
openCLSafeCall
(
clAmdBlasSetup
());
...
@@ -93,7 +92,7 @@ void cv::ocl::clBlasSetup()
...
@@ -93,7 +92,7 @@ void cv::ocl::clBlasSetup()
void
cv
::
ocl
::
clBlasTeardown
()
void
cv
::
ocl
::
clBlasTeardown
()
{
{
AutoLock
al
(
cs
);
AutoLock
lock
(
getInitializationMutex
()
);
if
(
clBlasInitialized
)
if
(
clBlasInitialized
)
{
{
clAmdBlasTeardown
();
clAmdBlasTeardown
();
...
...
modules/ocl/src/gftt.cpp
View file @
fa11f04a
...
@@ -202,8 +202,6 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
...
@@ -202,8 +202,6 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
CV_Assert
(
qualityLevel
>
0
&&
minDistance
>=
0
&&
maxCorners
>=
0
);
CV_Assert
(
qualityLevel
>
0
&&
minDistance
>=
0
&&
maxCorners
>=
0
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
image
.
size
()));
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
image
.
size
()));
CV_DbgAssert
(
support_image2d
());
ensureSizeIsEnough
(
image
.
size
(),
CV_32F
,
eig_
);
ensureSizeIsEnough
(
image
.
size
(),
CV_32F
,
eig_
);
if
(
useHarrisDetector
)
if
(
useHarrisDetector
)
...
...
modules/ocl/src/mcwutil.cpp
View file @
fa11f04a
...
@@ -216,41 +216,11 @@ namespace cv
...
@@ -216,41 +216,11 @@ namespace cv
{
{
return
Ptr
<
TextureCL
>
(
new
TextureCL
(
bindTexture
(
mat
),
mat
.
rows
,
mat
.
cols
,
mat
.
type
()));
return
Ptr
<
TextureCL
>
(
new
TextureCL
(
bindTexture
(
mat
),
mat
.
rows
,
mat
.
cols
,
mat
.
type
()));
}
}
void
releaseTexture
(
cl_mem
&
texture
)
void
releaseTexture
(
cl_mem
&
texture
)
{
{
openCLFree
(
texture
);
openCLFree
(
texture
);
}
}
bool
support_image2d
(
Context
*
clCxt
)
{
const
cv
::
ocl
::
ProgramEntry
_kernel
=
{
"test_func"
,
"__kernel void test_func(image2d_t img) {}"
,
NULL
};
static
bool
_isTested
=
false
;
static
bool
_support
=
false
;
if
(
_isTested
)
{
return
_support
;
}
try
{
cv
::
ocl
::
openCLGetKernelFromSource
(
clCxt
,
&
_kernel
,
"test_func"
);
cv
::
ocl
::
finish
();
_support
=
true
;
}
catch
(
const
cv
::
Exception
&
e
)
{
if
(
e
.
code
==
-
217
)
{
_support
=
false
;
}
else
{
// throw e once again
throw
e
;
}
}
_isTested
=
true
;
return
_support
;
}
}
//namespace ocl
}
//namespace ocl
}
//namespace cv
}
//namespace cv
modules/ocl/src/opencl/pyrlk_no_image.cl
deleted
100644 → 0
View file @
c0265c60
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Multicoreware,
Inc.,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Sen
Liu,
sen@multicorewareinc.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
oclMaterials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
define
BUFFER
256
void
reduce3
(
float
val1,
float
val2,
float
val3,
__local
float
*smem1,
__local
float
*smem2,
__local
float
*smem3,
int
tid
)
{
smem1[tid]
=
val1
;
smem2[tid]
=
val2
;
smem3[tid]
=
val3
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
if
BUFFER
>
128
if
(
tid
<
128
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
128]
;
smem2[tid]
=
val2
+=
smem2[tid
+
128]
;
smem3[tid]
=
val3
+=
smem3[tid
+
128]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
#
if
BUFFER
>
64
if
(
tid
<
64
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
64]
;
smem2[tid]
=
val2
+=
smem2[tid
+
64]
;
smem3[tid]
=
val3
+=
smem3[tid
+
64]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
if
(
tid
<
32
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
32]
;
smem2[tid]
=
val2
+=
smem2[tid
+
32]
;
smem3[tid]
=
val3
+=
smem3[tid
+
32]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
16]
;
smem2[tid]
=
val2
+=
smem2[tid
+
16]
;
smem3[tid]
=
val3
+=
smem3[tid
+
16]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
volatile
__local
float
*vmem1
=
smem1
;
volatile
__local
float
*vmem2
=
smem2
;
volatile
__local
float
*vmem3
=
smem3
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
8]
;
vmem2[tid]
=
val2
+=
vmem2[tid
+
8]
;
vmem3[tid]
=
val3
+=
vmem3[tid
+
8]
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
4]
;
vmem2[tid]
=
val2
+=
vmem2[tid
+
4]
;
vmem3[tid]
=
val3
+=
vmem3[tid
+
4]
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
2]
;
vmem2[tid]
=
val2
+=
vmem2[tid
+
2]
;
vmem3[tid]
=
val3
+=
vmem3[tid
+
2]
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
1]
;
vmem2[tid]
=
val2
+=
vmem2[tid
+
1]
;
vmem3[tid]
=
val3
+=
vmem3[tid
+
1]
;
}
}
void
reduce2
(
float
val1,
float
val2,
__local
float
*smem1,
__local
float
*smem2,
int
tid
)
{
smem1[tid]
=
val1
;
smem2[tid]
=
val2
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
if
BUFFER
>
128
if
(
tid
<
128
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
128]
;
smem2[tid]
=
val2
+=
smem2[tid
+
128]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
#
if
BUFFER
>
64
if
(
tid
<
64
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
64]
;
smem2[tid]
=
val2
+=
smem2[tid
+
64]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
if
(
tid
<
32
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
32]
;
smem2[tid]
=
val2
+=
smem2[tid
+
32]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
16]
;
smem2[tid]
=
val2
+=
smem2[tid
+
16]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
volatile
__local
float
*vmem1
=
smem1
;
volatile
__local
float
*vmem2
=
smem2
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
8]
;
vmem2[tid]
=
val2
+=
vmem2[tid
+
8]
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
4]
;
vmem2[tid]
=
val2
+=
vmem2[tid
+
4]
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
2]
;
vmem2[tid]
=
val2
+=
vmem2[tid
+
2]
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
1]
;
vmem2[tid]
=
val2
+=
vmem2[tid
+
1]
;
}
}
void
reduce1
(
float
val1,
__local
float
*smem1,
int
tid
)
{
smem1[tid]
=
val1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
if
BUFFER
>
128
if
(
tid
<
128
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
128]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
#
if
BUFFER
>
64
if
(
tid
<
64
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
64]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
if
(
tid
<
32
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
32]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
volatile
__local
float
*vmem1
=
smem1
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
16]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
volatile
__local
float
*vmem1
=
smem1
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
8]
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
4]
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
2]
;
vmem1[tid]
=
val1
+=
vmem1[tid
+
1]
;
}
}
#
define
SCALE
(
1.0f
/
(
1
<<
20
))
#
define
THRESHOLD
0.01f
#
define
DIMENSION
21
float
readImage2Df_C1
(
__global
const
float
*image,
const
float
x,
const
float
y,
const
int
rows,
const
int
cols,
const
int
elemCntPerRow
)
{
float2
coor
=
(
float2
)(
x,
y
)
;
int
i0
=
clamp
((
int
)
floor
(
coor.x
)
,
0
,
cols
-
1
)
;
int
j0
=
clamp
((
int
)
floor
(
coor.y
)
,
0
,
rows
-
1
)
;
int
i1
=
clamp
((
int
)
floor
(
coor.x
)
+
1
,
0
,
cols
-
1
)
;
int
j1
=
clamp
((
int
)
floor
(
coor.y
)
+
1
,
0
,
rows
-
1
)
;
float
a
=
coor.x
-
floor
(
coor.x
)
;
float
b
=
coor.y
-
floor
(
coor.y
)
;
return
(
1
-
a
)
*
(
1
-
b
)
*
image[mad24
(
j0,
elemCntPerRow,
i0
)
]
+
a
*
(
1
-
b
)
*
image[mad24
(
j0,
elemCntPerRow,
i1
)
]
+
(
1
-
a
)
*
b
*
image[mad24
(
j1,
elemCntPerRow,
i0
)
]
+
a
*
b
*
image[mad24
(
j1,
elemCntPerRow,
i1
)
]
;
}
__kernel
void
lkSparse_C1_D5
(
__global
const
float
*I,
__global
const
float
*J,
__global
const
float2
*prevPts,
int
prevPtsStep,
__global
float2
*nextPts,
int
nextPtsStep,
__global
uchar
*status,
__global
float
*err,
const
int
level,
const
int
rows,
const
int
cols,
const
int
elemCntPerRow,
int
PATCH_X,
int
PATCH_Y,
int
cn,
int
c_winSize_x,
int
c_winSize_y,
int
c_iters,
char
calcErr
)
{
__local
float
smem1[BUFFER]
;
__local
float
smem2[BUFFER]
;
__local
float
smem3[BUFFER]
;
float2
c_halfWin
=
(
float2
)((
c_winSize_x
-
1
)
>>
1
,
(
c_winSize_y
-
1
)
>>
1
)
;
const
int
tid
=
mad24
(
get_local_id
(
1
)
,
get_local_size
(
0
)
,
get_local_id
(
0
))
;
float2
prevPt
=
prevPts[get_group_id
(
0
)
]
*
(
1.0f
/
(
1
<<
level
))
;
if
(
prevPt.x
<
0
|
| prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
{
if (tid == 0 && level == 0)
{
status[get_group_id(0)] = 0;
}
return;
}
prevPt -= c_halfWin;
// extract the patch from the first image, compute covariation matrix of derivatives
float A11 = 0;
float A12 = 0;
float A22 = 0;
float I_patch[1][3];
float dIdx_patch[1][3];
float dIdy_patch[1][3];
for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i)
{
for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j)
{
float x = (prevPt.x + xBase);
float y = (prevPt.y + yBase);
I_patch[i][j] = readImage2Df_C1(I, x, y, rows, cols, elemCntPerRow);
float dIdx = 3.0f * readImage2Df_C1(I, x + 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x + 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x + 1, y + 1, rows, cols, elemCntPerRow) -
(3.0f * readImage2Df_C1(I, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x - 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x - 1, y + 1, rows, cols, elemCntPerRow));
float dIdy = 3.0f * readImage2Df_C1(I, x - 1, y + 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x, y + 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x + 1, y + 1, rows, cols, elemCntPerRow) -
(3.0f * readImage2Df_C1(I, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x, y - 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x + 1, y - 1, rows, cols, elemCntPerRow));
dIdx_patch[i][j] = dIdx;
dIdy_patch[i][j] = dIdy;
A11 += dIdx * dIdx;
A12 += dIdx * dIdy;
A22 += dIdy * dIdy;
}
}
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
barrier(CLK_LOCAL_MEM_FENCE);
A11 = smem1[0];
A12 = smem2[0];
A22 = smem3[0];
float D = A11 * A22 - A12 * A12;
if (D < 1.192092896e-07f)
{
if (tid == 0 && level == 0)
{
status[get_group_id(0)] = 0;
}
return;
}
D = 1.f / D;
A11 *= D;
A12 *= D;
A22 *= D;
float2 nextPt = nextPts[get_group_id(0)];
nextPt = nextPt * 2.0f - c_halfWin;
for (int k = 0; k < c_iters; ++k)
{
if (nextPt.x < -c_halfWin.x || nextPt.x >= cols || nextPt.y < -c_halfWin.y || nextPt.y >= rows)
{
if (tid == 0 && level == 0)
{
status[get_group_id(0)] = 0;
}
return;
}
float b1 = 0;
float b2 = 0;
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i)
{
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j)
{
float diff = (readImage2Df_C1(J, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]) * 32.0f;
b1 += diff * dIdx_patch[i][j];
b2 += diff * dIdy_patch[i][j];
}
}
reduce2(b1, b2, smem1, smem2, tid);
barrier(CLK_LOCAL_MEM_FENCE);
b1 = smem1[0];
b2 = smem2[0];
float2 delta;
delta.x = A12 * b2 - A22 * b1;
delta.y = A12 * b1 - A11 * b2;
nextPt += delta;
//if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD)
// break;
}
float errval = 0.0f;
if (calcErr)
{
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i)
{
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j)
{
float diff = readImage2Df_C1(J, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j];
errval += fabs(diff);
}
}
reduce1(errval, smem1, tid);
}
if (tid == 0)
{
nextPt += c_halfWin;
nextPts[get_group_id(0)] = nextPt;
if (calcErr)
{
err[get_group_id(0)] = smem1[0] / (c_winSize_x * c_winSize_y);
}
}
}
float4 readImage2Df_C4(__global const float4 *image, const float x, const float y, const int rows, const int cols, const int elemCntPerRow)
{
float2 coor = (float2)(x, y);
int i0 = clamp((int)floor(coor.x), 0, cols - 1);
int j0 = clamp((int)floor(coor.y), 0, rows - 1);
int i1 = clamp((int)floor(coor.x) + 1, 0, cols - 1);
int j1 = clamp((int)floor(coor.y) + 1, 0, rows - 1);
float a = coor.x - floor(coor.x);
float b = coor.y - floor(coor.y);
return (1 - a) * (1 - b) * image[mad24(j0, elemCntPerRow, i0)]
+ a * (1 - b) * image[mad24(j0, elemCntPerRow, i1)]
+ (1 - a) * b * image[mad24(j1, elemCntPerRow, i0)]
+ a * b * image[mad24(j1, elemCntPerRow, i1)];
}
__kernel void lkSparse_C4_D5(__global const float *I, __global const float *J,
__global const float2 *prevPts, int prevPtsStep, __global float2 *nextPts, int nextPtsStep, __global uchar *status, __global float *err,
const int level, const int rows, const int cols, const int elemCntPerRow,
int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
{
__local float smem1[BUFFER];
__local float smem2[BUFFER];
__local float smem3[BUFFER];
float2 c_halfWin = (float2)((c_winSize_x - 1) >> 1, (c_winSize_y - 1) >> 1);
const int tid = mad24(get_local_id(1), get_local_size(0), get_local_id(0));
float2 prevPt = prevPts[get_group_id(0)] * (1.0f / (1 << level));
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
{
if (tid == 0 && level == 0)
{
status[get_group_id(0)] = 0;
}
return;
}
prevPt -= c_halfWin;
// extract the patch from the first image, compute covariation matrix of derivatives
float A11 = 0;
float A12 = 0;
float A22 = 0;
float4 I_patch[1][3];
float4 dIdx_patch[1][3];
float4 dIdy_patch[1][3];
__global float4 *ptrI = (__global float4 *)I;
for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i)
{
for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j)
{
float x = (prevPt.x + xBase);
float y = (prevPt.y + yBase);
I_patch[i][j] = readImage2Df_C4(ptrI, x, y, rows, cols, elemCntPerRow);
float4 dIdx = 3.0f * readImage2Df_C4(ptrI, x + 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x + 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x + 1, y + 1, rows, cols, elemCntPerRow) -
(3.0f * readImage2Df_C4(ptrI, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x - 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x - 1, y + 1, rows, cols, elemCntPerRow));
float4 dIdy = 3.0f * readImage2Df_C4(ptrI, x - 1, y + 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x, y + 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x + 1, y + 1, rows, cols, elemCntPerRow) -
(3.0f * readImage2Df_C4(ptrI, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x, y - 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x + 1, y - 1, rows, cols, elemCntPerRow));
dIdx_patch[i][j] = dIdx;
dIdy_patch[i][j] = dIdy;
A11 += (dIdx * dIdx).x + (dIdx * dIdx).y + (dIdx * dIdx).z;
A12 += (dIdx * dIdy).x + (dIdx * dIdy).y + (dIdx * dIdy).z;
A22 += (dIdy * dIdy).x + (dIdy * dIdy).y + (dIdy * dIdy).z;
}
}
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
barrier(CLK_LOCAL_MEM_FENCE);
A11 = smem1[0];
A12 = smem2[0];
A22 = smem3[0];
float D = A11 * A22 - A12 * A12;
//pD[get_group_id(0)] = D;
if (D < 1.192092896e-07f)
{
if (tid == 0 && level == 0)
{
status[get_group_id(0)] = 0;
}
return;
}
D = 1.f / D;
A11 *= D;
A12 *= D;
A22 *= D;
float2 nextPt = nextPts[get_group_id(0)];
nextPt = nextPt * 2.0f - c_halfWin;
__global float4 *ptrJ = (__global float4 *)J;
for (int k = 0; k < c_iters; ++k)
{
if (nextPt.x < -c_halfWin.x || nextPt.x >= cols || nextPt.y < -c_halfWin.y || nextPt.y >= rows)
{
if (tid == 0 && level == 0)
{
status[get_group_id(0)] = 0;
}
return;
}
float b1 = 0;
float b2 = 0;
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i)
{
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j)
{
float4 diff = (readImage2Df_C4(ptrJ, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]) * 32.0f;
b1 += (diff * dIdx_patch[i][j]).x + (diff * dIdx_patch[i][j]).y + (diff * dIdx_patch[i][j]).z;
b2 += (diff * dIdy_patch[i][j]).x + (diff * dIdy_patch[i][j]).y + (diff * dIdy_patch[i][j]).z;
}
}
reduce2(b1, b2, smem1, smem2, tid);
barrier(CLK_LOCAL_MEM_FENCE);
b1 = smem1[0];
b2 = smem2[0];
float2 delta;
delta.x = A12 * b2 - A22 * b1;
delta.y = A12 * b1 - A11 * b2;
nextPt += delta;
//if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD)
// break;
}
float errval = 0.0f;
if (calcErr)
{
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i)
{
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j)
{
float4 diff = readImage2Df_C4(ptrJ, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j];
errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z);
}
}
reduce1(errval, smem1, tid);
}
if (tid == 0)
{
nextPt += c_halfWin;
nextPts[get_group_id(0)] = nextPt;
if (calcErr)
{
err[get_group_id(0)] = smem1[0] / (3 * c_winSize_x * c_winSize_y);
}
}
}
int readImage2Di_C1(__global const int *image, float2 coor, int2 size, const int elemCntPerRow)
{
int i = clamp((int)floor(coor.x), 0, size.x - 1);
int j = clamp((int)floor(coor.y), 0, size.y - 1);
return image[mad24(j, elemCntPerRow, i)];
}
__kernel void lkDense_C1_D0(__global const int *I, __global const int *J, __global float *u, int uStep, __global float *v, int vStep, __global const float *prevU, int prevUStep, __global const float *prevV, int prevVStep,
const int rows, const int cols, /*__global float* err, int errStep, int cn,*/
const int elemCntPerRow, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
{
int c_halfWin_x = (c_winSize_x - 1) / 2;
int c_halfWin_y = (c_winSize_y - 1) / 2;
const int patchWidth = get_local_size(0) + 2 * c_halfWin_x;
const int patchHeight = get_local_size(1) + 2 * c_halfWin_y;
__local int smem[8192];
__local int *I_patch = smem;
__local int *dIdx_patch = I_patch + patchWidth * patchHeight;
__local int *dIdy_patch = dIdx_patch + patchWidth * patchHeight;
const int xBase = get_group_id(0) * get_local_size(0);
const int yBase = get_group_id(1) * get_local_size(1);
int2 size = (int2)(cols, rows);
for (int i = get_local_id(1); i < patchHeight; i += get_local_size(1))
{
for (int j = get_local_id(0); j < patchWidth; j += get_local_size(0))
{
float x = xBase - c_halfWin_x + j + 0.5f;
float y = yBase - c_halfWin_y + i + 0.5f;
I_patch[i * patchWidth + j] = readImage2Di_C1(I, (float2)(x, y), size, elemCntPerRow);
// Sharr Deriv
dIdx_patch[i * patchWidth + j] = 3 * readImage2Di_C1(I, (float2)(x + 1, y - 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x + 1, y), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x + 1, y + 1), size, elemCntPerRow) -
(3 * readImage2Di_C1(I, (float2)(x - 1, y - 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x - 1, y), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x - 1, y + 1), size, elemCntPerRow));
dIdy_patch[i * patchWidth + j] = 3 * readImage2Di_C1(I, (float2)(x - 1, y + 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x, y + 1), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x + 1, y + 1), size, elemCntPerRow) -
(3 * readImage2Di_C1(I, (float2)(x - 1, y - 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x, y - 1), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x + 1, y - 1), size, elemCntPerRow));
}
}
barrier(CLK_LOCAL_MEM_FENCE);
// extract the patch from the first image, compute covariation matrix of derivatives
const int x = get_global_id(0);
const int y = get_global_id(1);
if (x >= cols || y >= rows)
{
return;
}
int A11i = 0;
int A12i = 0;
int A22i = 0;
for (int i = 0; i < c_winSize_y; ++i)
{
for (int j = 0; j < c_winSize_x; ++j)
{
int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)];
int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)];
A11i += dIdx * dIdx;
A12i += dIdx * dIdy;
A22i += dIdy * dIdy;
}
}
float A11 = A11i;
float A12 = A12i;
float A22 = A22i;
float D = A11 * A22 - A12 * A12;
//if (calcErr && GET_MIN_EIGENVALS)
// (err + y * errStep)[x] = minEig;
if (D < 1.192092896e-07f)
{
//if (calcErr)
// err(y, x) = 3.402823466e+38f;
return;
}
D = 1.f / D;
A11 *= D;
A12 *= D;
A22 *= D;
float2 nextPt;
nextPt.x = x + prevU[y / 2 * prevUStep / 4 + x / 2] * 2.0f;
nextPt.y = y + prevV[y / 2 * prevVStep / 4 + x / 2] * 2.0f;
for (int k = 0; k < c_iters; ++k)
{
if (nextPt.x < 0 || nextPt.x >= cols || nextPt.y < 0 |
|
nextPt.y
>=
rows
)
{
//if
(
calcErr
)
//
err
(
y,
x
)
=
3.402823466e+38f
;
return
;
}
int
b1
=
0
;
int
b2
=
0
;
for
(
int
i
=
0
; i < c_winSize_y; ++i)
{
for
(
int
j
=
0
; j < c_winSize_x; ++j)
{
int
iI
=
I_patch[
(
get_local_id
(
1
)
+
i
)
*
patchWidth
+
get_local_id
(
0
)
+
j]
;
int
iJ
=
readImage2Di_C1
(
J,
(
float2
)(
nextPt.x
-
c_halfWin_x
+
j
+
0.5f,
nextPt.y
-
c_halfWin_y
+
i
+
0.5f
)
,
size,
elemCntPerRow
)
;
int
diff
=
(
iJ
-
iI
)
*
32
;
int
dIdx
=
dIdx_patch[
(
get_local_id
(
1
)
+
i
)
*
patchWidth
+
(
get_local_id
(
0
)
+
j
)
]
;
int
dIdy
=
dIdy_patch[
(
get_local_id
(
1
)
+
i
)
*
patchWidth
+
(
get_local_id
(
0
)
+
j
)
]
;
b1
+=
diff
*
dIdx
;
b2
+=
diff
*
dIdy
;
}
}
float2
delta
;
delta.x
=
A12
*
b2
-
A22
*
b1
;
delta.y
=
A12
*
b1
-
A11
*
b2
;
nextPt.x
+=
delta.x
;
nextPt.y
+=
delta.y
;
if
(
fabs
(
delta.x
)
<
0.01f
&&
fabs
(
delta.y
)
<
0.01f
)
{
break
;
}
}
u[y
*
uStep
/
4
+
x]
=
nextPt.x
-
x
;
v[y
*
vStep
/
4
+
x]
=
nextPt.y
-
y
;
if
(
calcErr
)
{
int
errval
=
0
;
for
(
int
i
=
0
; i < c_winSize_y; ++i)
{
for
(
int
j
=
0
; j < c_winSize_x; ++j)
{
int
iI
=
I_patch[
(
get_local_id
(
1
)
+
i
)
*
patchWidth
+
get_local_id
(
0
)
+
j]
;
int
iJ
=
readImage2Di_C1
(
J,
(
float2
)(
nextPt.x
-
c_halfWin_x
+
j
+
0.5f,
nextPt.y
-
c_halfWin_y
+
i
+
0.5f
)
,
size,
elemCntPerRow
)
;
errval
+=
abs
(
iJ
-
iI
)
;
}
}
//err[y
*
errStep
/
4
+
x]
=
static_cast<float>
(
errval
)
/
(
c_winSize_x
*
c_winSize_y
)
;
}
}
modules/ocl/src/pyrlk.cpp
View file @
fa11f04a
...
@@ -82,18 +82,16 @@ static void lkSparse_run(oclMat &I, oclMat &J,
...
@@ -82,18 +82,16 @@ static void lkSparse_run(oclMat &I, oclMat &J,
int
level
,
dim3
patch
,
Size
winSize
,
int
iters
)
int
level
,
dim3
patch
,
Size
winSize
,
int
iters
)
{
{
Context
*
clCxt
=
I
.
clCxt
;
Context
*
clCxt
=
I
.
clCxt
;
int
elemCntPerRow
=
I
.
step
/
I
.
elemSize
();
string
kernelName
=
"lkSparse"
;
string
kernelName
=
"lkSparse"
;
bool
isImageSupported
=
support_image2d
();
size_t
localThreads
[
3
]
=
{
8
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
8
,
isImageSupported
?
8
:
32
,
1
};
size_t
globalThreads
[
3
]
=
{
8
*
ptcount
,
8
,
1
};
size_t
globalThreads
[
3
]
=
{
8
*
ptcount
,
isImageSupported
?
8
:
32
,
1
};
int
cn
=
I
.
oclchannels
();
int
cn
=
I
.
oclchannels
();
char
calcErr
=
level
==
0
?
1
:
0
;
char
calcErr
=
level
==
0
?
1
:
0
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
cl_mem
ITex
=
isImageSupported
?
bindTexture
(
I
)
:
(
cl_mem
)
I
.
data
;
cl_mem
ITex
=
bindTexture
(
I
)
;
cl_mem
JTex
=
isImageSupported
?
bindTexture
(
J
)
:
(
cl_mem
)
J
.
data
;
cl_mem
JTex
=
bindTexture
(
J
)
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
ITex
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
ITex
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
JTex
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
JTex
));
...
@@ -106,8 +104,6 @@ static void lkSparse_run(oclMat &I, oclMat &J,
...
@@ -106,8 +104,6 @@ static void lkSparse_run(oclMat &I, oclMat &J,
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
level
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
level
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
I
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
I
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
I
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
I
.
cols
));
if
(
!
isImageSupported
)
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
elemCntPerRow
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
patch
.
x
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
patch
.
x
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
patch
.
y
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
patch
.
y
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cn
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cn
));
...
@@ -120,32 +116,23 @@ static void lkSparse_run(oclMat &I, oclMat &J,
...
@@ -120,32 +116,23 @@ static void lkSparse_run(oclMat &I, oclMat &J,
if
(
is_cpu
)
if
(
is_cpu
)
{
{
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
(),
(
char
*
)
" -D CPU"
);
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
(),
(
char
*
)
" -D CPU"
);
releaseTexture
(
ITex
);
releaseTexture
(
JTex
);
}
}
else
else
{
{
if
(
isImageSupported
)
stringstream
idxStr
;
{
idxStr
<<
kernelName
<<
"_C"
<<
I
.
oclchannels
()
<<
"_D"
<<
I
.
depth
();
stringstream
idxStr
;
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
pyrlk
,
idxStr
.
str
());
idxStr
<<
kernelName
<<
"_C"
<<
I
.
oclchannels
()
<<
"_D"
<<
I
.
depth
();
int
wave_size
=
(
int
)
queryWaveFrontSize
(
kernel
);
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
pyrlk
,
idxStr
.
str
());
openCLSafeCall
(
clReleaseKernel
(
kernel
));
int
wave_size
=
(
int
)
queryWaveFrontSize
(
kernel
);
openCLSafeCall
(
clReleaseKernel
(
kernel
));
static
char
opt
[
32
]
=
{
0
};
sprintf
(
opt
,
"-D WAVE_SIZE=%d"
,
wave_size
);
static
char
opt
[
32
]
=
{
0
};
sprintf
(
opt
,
"-D WAVE_SIZE=%d"
,
wave_size
);
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
(),
opt
);
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
(),
opt
);
releaseTexture
(
ITex
);
releaseTexture
(
JTex
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
pyrlk_no_image
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
());
}
}
}
releaseTexture
(
ITex
);
releaseTexture
(
JTex
);
}
}
void
cv
::
ocl
::
PyrLKOpticalFlow
::
sparse
(
const
oclMat
&
prevImg
,
const
oclMat
&
nextImg
,
const
oclMat
&
prevPts
,
oclMat
&
nextPts
,
oclMat
&
status
,
oclMat
*
err
)
void
cv
::
ocl
::
PyrLKOpticalFlow
::
sparse
(
const
oclMat
&
prevImg
,
const
oclMat
&
nextImg
,
const
oclMat
&
prevPts
,
oclMat
&
nextPts
,
oclMat
&
status
,
oclMat
*
err
)
...
@@ -226,37 +213,19 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
...
@@ -226,37 +213,19 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
oclMat
&
prevU
,
oclMat
&
prevV
,
oclMat
*
err
,
Size
winSize
,
int
iters
)
oclMat
&
prevU
,
oclMat
&
prevV
,
oclMat
*
err
,
Size
winSize
,
int
iters
)
{
{
Context
*
clCxt
=
I
.
clCxt
;
Context
*
clCxt
=
I
.
clCxt
;
bool
isImageSupported
=
support_image2d
();
int
elemCntPerRow
=
I
.
step
/
I
.
elemSize
();
string
kernelName
=
"lkDense"
;
string
kernelName
=
"lkDense"
;
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
]
=
{
I
.
cols
,
I
.
rows
,
1
};
size_t
globalThreads
[
3
]
=
{
I
.
cols
,
I
.
rows
,
1
};
bool
calcErr
;
cl_char
calcErr
=
err
?
1
:
0
;
if
(
err
)
{
calcErr
=
true
;
}
else
{
calcErr
=
false
;
}
cl_mem
ITex
;
cl_mem
ITex
;
cl_mem
JTex
;
cl_mem
JTex
;
if
(
isImageSupported
)
ITex
=
bindTexture
(
I
);
{
JTex
=
bindTexture
(
J
);
ITex
=
bindTexture
(
I
);
JTex
=
bindTexture
(
J
);
}
else
{
ITex
=
(
cl_mem
)
I
.
data
;
JTex
=
(
cl_mem
)
J
.
data
;
}
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
...
@@ -273,28 +242,15 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
...
@@ -273,28 +242,15 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
prevV
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
prevV
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
I
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
I
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
I
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
I
.
cols
));
//args.push_back( make_pair( sizeof(cl_mem), (void *)&(*err).data ));
//args.push_back( make_pair( sizeof(cl_int), (void *)&(*err).step ));
if
(
!
isImageSupported
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
elemCntPerRow
)
);
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
winSize
.
width
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
winSize
.
width
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
winSize
.
height
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
winSize
.
height
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
iters
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
iters
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
),
(
void
*
)
&
calcErr
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
),
(
void
*
)
&
calcErr
));
if
(
isImageSupported
)
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
());
{
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
());
releaseTexture
(
ITex
);
releaseTexture
(
ITex
);
releaseTexture
(
JTex
);
releaseTexture
(
JTex
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
pyrlk_no_image
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
());
}
}
}
void
cv
::
ocl
::
PyrLKOpticalFlow
::
dense
(
const
oclMat
&
prevImg
,
const
oclMat
&
nextImg
,
oclMat
&
u
,
oclMat
&
v
,
oclMat
*
err
)
void
cv
::
ocl
::
PyrLKOpticalFlow
::
dense
(
const
oclMat
&
prevImg
,
const
oclMat
&
nextImg
,
oclMat
&
u
,
oclMat
&
v
,
oclMat
*
err
)
...
...
modules/ocl/src/tvl1flow.cpp
View file @
fa11f04a
...
@@ -411,9 +411,6 @@ void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
...
@@ -411,9 +411,6 @@ void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
void
ocl_tvl1flow
::
warpBackward
(
const
oclMat
&
I0
,
const
oclMat
&
I1
,
oclMat
&
I1x
,
oclMat
&
I1y
,
oclMat
&
u1
,
oclMat
&
u2
,
oclMat
&
I1w
,
oclMat
&
I1wx
,
oclMat
&
I1wy
,
oclMat
&
grad
,
oclMat
&
rho
)
void
ocl_tvl1flow
::
warpBackward
(
const
oclMat
&
I0
,
const
oclMat
&
I1
,
oclMat
&
I1x
,
oclMat
&
I1y
,
oclMat
&
u1
,
oclMat
&
u2
,
oclMat
&
I1w
,
oclMat
&
I1wx
,
oclMat
&
I1wy
,
oclMat
&
grad
,
oclMat
&
rho
)
{
{
Context
*
clCxt
=
I0
.
clCxt
;
Context
*
clCxt
=
I0
.
clCxt
;
const
bool
isImgSupported
=
support_image2d
(
clCxt
);
CV_Assert
(
isImgSupported
);
int
u1ElementSize
=
u1
.
elemSize
();
int
u1ElementSize
=
u1
.
elemSize
();
int
u1Step
=
u1
.
step
/
u1ElementSize
;
int
u1Step
=
u1
.
step
/
u1ElementSize
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment