Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1283d62e
Commit
1283d62e
authored
Sep 08, 2017
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ocl: Kernel::runProfiling()
parent
d9ab3149
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
62 additions
and
15 deletions
+62
-15
ocl.hpp
modules/core/include/opencv2/core/ocl.hpp
+6
-0
ocl.cpp
modules/core/src/ocl.cpp
+56
-15
No files found.
modules/core/include/opencv2/core/ocl.hpp
View file @
1283d62e
...
...
@@ -573,6 +573,12 @@ public:
size_t
localsize
[],
bool
sync
,
const
Queue
&
q
=
Queue
());
bool
runTask
(
bool
sync
,
const
Queue
&
q
=
Queue
());
/** @brief Similar to synchronized run() call with returning of kernel execution time
* Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE)
* @return Execution time in nanoseconds or negative number on error
*/
int64
runProfiling
(
int
dims
,
size_t
globalsize
[],
size_t
localsize
[],
const
Queue
&
q
=
Queue
());
size_t
workGroupSize
()
const
;
size_t
preferedWorkGroupSizeMultiple
()
const
;
bool
compileWorkGroupSize
(
size_t
wsz
[])
const
;
...
...
modules/core/src/ocl.cpp
View file @
1283d62e
...
...
@@ -2094,6 +2094,9 @@ struct Kernel::Impl
release
();
}
bool
run
(
int
dims
,
size_t
_globalsize
[],
size_t
_localsize
[],
bool
sync
,
int64
*
timeNS
,
const
Queue
&
q
);
~
Impl
()
{
if
(
handle
)
...
...
@@ -2321,19 +2324,15 @@ int Kernel::set(int i, const KernelArg& arg)
return
i
+
1
;
}
bool
Kernel
::
run
(
int
dims
,
size_t
_globalsize
[],
size_t
_localsize
[],
bool
sync
,
const
Queue
&
q
)
{
CV_INSTRUMENT_REGION_OPENCL_RUN
(
p
->
name
.
c_str
());
if
(
!
p
||
!
p
->
handle
||
p
->
isInProgress
)
if
(
!
p
)
return
false
;
cl_command_queue
qq
=
getQueue
(
q
);
size_t
globalsize
[
CV_MAX_DIM
]
=
{
1
,
1
,
1
};
size_t
total
=
1
;
CV_Assert
(
_globalsize
!=
0
);
CV_Assert
(
_globalsize
!=
NULL
);
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
size_t
val
=
_localsize
?
_localsize
[
i
]
:
...
...
@@ -2345,12 +2344,28 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
globalsize
[
i
]
=
divUp
(
_globalsize
[
i
],
(
unsigned
int
)
val
)
*
val
;
}
CV_Assert
(
total
>
0
);
if
(
p
->
haveTempDstUMats
)
return
p
->
run
(
dims
,
globalsize
,
_localsize
,
sync
,
NULL
,
q
);
}
bool
Kernel
::
Impl
::
run
(
int
dims
,
size_t
globalsize
[],
size_t
localsize
[],
bool
sync
,
int64
*
timeNS
,
const
Queue
&
q
)
{
CV_INSTRUMENT_REGION_OPENCL_RUN
(
p
->
name
.
c_str
());
if
(
!
handle
||
isInProgress
)
return
false
;
cl_command_queue
qq
=
getQueue
(
q
);
if
(
haveTempDstUMats
)
sync
=
true
;
if
(
timeNS
)
sync
=
true
;
cl_event
asyncEvent
=
0
;
cl_int
retval
=
clEnqueueNDRangeKernel
(
qq
,
p
->
handle
,
(
cl_uint
)
dims
,
NULL
,
globalsize
,
_
localsize
,
0
,
0
,
sync
?
0
:
&
asyncEvent
);
cl_int
retval
=
clEnqueueNDRangeKernel
(
qq
,
handle
,
(
cl_uint
)
dims
,
NULL
,
globalsize
,
localsize
,
0
,
0
,
(
sync
&&
!
timeNS
)
?
0
:
&
asyncEvent
);
#if CV_OPENCL_SHOW_RUN_ERRORS
if
(
retval
!=
CL_SUCCESS
)
{
...
...
@@ -2358,16 +2373,31 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
fflush
(
stdout
);
}
#endif
if
(
sync
||
retval
!=
CL_SUCCESS
)
if
(
sync
||
retval
!=
CL_SUCCESS
)
{
CV_OclDbgAssert
(
clFinish
(
qq
)
==
CL_SUCCESS
);
p
->
cleanupUMats
();
if
(
timeNS
)
{
if
(
retval
==
CL_SUCCESS
)
{
clWaitForEvents
(
1
,
&
asyncEvent
);
cl_ulong
startTime
,
stopTime
;
CV_Assert
(
CL_SUCCESS
==
clGetEventProfilingInfo
(
asyncEvent
,
CL_PROFILING_COMMAND_START
,
sizeof
(
startTime
),
&
startTime
,
NULL
));
CV_Assert
(
CL_SUCCESS
==
clGetEventProfilingInfo
(
asyncEvent
,
CL_PROFILING_COMMAND_END
,
sizeof
(
stopTime
),
&
stopTime
,
NULL
));
*
timeNS
=
(
int64
)(
stopTime
-
startTime
);
}
else
{
*
timeNS
=
-
1
;
}
}
cleanupUMats
();
}
else
{
p
->
addref
();
p
->
isInProgress
=
true
;
CV_OclDbgAssert
(
clSetEventCallback
(
asyncEvent
,
CL_COMPLETE
,
oclCleanupCallback
,
p
)
==
CL_SUCCESS
);
addref
();
isInProgress
=
true
;
CV_OclDbgAssert
(
clSetEventCallback
(
asyncEvent
,
CL_COMPLETE
,
oclCleanupCallback
,
this
)
==
CL_SUCCESS
);
}
if
(
asyncEvent
)
clReleaseEvent
(
asyncEvent
);
...
...
@@ -2398,6 +2428,17 @@ bool Kernel::runTask(bool sync, const Queue& q)
return
retval
==
CL_SUCCESS
;
}
int64
Kernel
::
runProfiling
(
int
dims
,
size_t
globalsize
[],
size_t
localsize
[],
const
Queue
&
q_
)
{
CV_Assert
(
p
&&
p
->
handle
&&
!
p
->
isInProgress
);
Queue
q
=
q_
.
ptr
()
?
q_
:
Queue
::
getDefault
();
CV_Assert
(
q
.
ptr
());
q
.
finish
();
// call clFinish() on base queue
Queue
profilingQueue
=
q
.
getProfilingQueue
();
int64
timeNs
=
-
1
;
bool
res
=
p
->
run
(
dims
,
globalsize
,
localsize
,
true
,
&
timeNs
,
profilingQueue
);
return
res
?
timeNs
:
-
1
;
}
size_t
Kernel
::
workGroupSize
()
const
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment