Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
af8fc3e2
Commit
af8fc3e2
authored
Sep 28, 2017
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #9604 from alalek:ocl_kernel_profiling
parents
22467599
e5890fc9
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
152 additions
and
26 deletions
+152
-26
ocl.hpp
modules/core/include/opencv2/core/ocl.hpp
+11
-1
ocl.cpp
modules/core/src/ocl.cpp
+120
-17
ocl_deprecated.hpp
modules/core/src/ocl_deprecated.hpp
+21
-8
No files found.
modules/core/include/opencv2/core/ocl.hpp
View file @
af8fc3e2
...
...
@@ -333,8 +333,12 @@ public:
void
*
ptr
()
const
;
static
Queue
&
getDefault
();
/// @brief Returns OpenCL command queue with enable profiling mode support
const
Queue
&
getProfilingQueue
()
const
;
struct
Impl
;
friend
struct
Impl
;
inline
Impl
*
getImpl
()
const
{
return
p
;
}
protected
:
struct
Impl
;
Impl
*
p
;
};
...
...
@@ -569,6 +573,12 @@ public:
size_t
localsize
[],
bool
sync
,
const
Queue
&
q
=
Queue
());
bool
runTask
(
bool
sync
,
const
Queue
&
q
=
Queue
());
/** @brief Similar to synchronized run() call with returning of kernel execution time
* Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE)
* @return Execution time in nanoseconds or negative number on error
*/
int64
runProfiling
(
int
dims
,
size_t
globalsize
[],
size_t
localsize
[],
const
Queue
&
q
=
Queue
());
size_t
workGroupSize
()
const
;
size_t
preferedWorkGroupSizeMultiple
()
const
;
bool
compileWorkGroupSize
(
size_t
wsz
[])
const
;
...
...
modules/core/src/ocl.cpp
View file @
af8fc3e2
...
...
@@ -1840,9 +1840,35 @@ void initializeContextFromHandle(Context& ctx, void* platform, void* _context, v
struct
Queue
::
Impl
{
Impl
(
const
Context
&
c
,
const
Device
&
d
)
inline
void
__init
(
)
{
refcount
=
1
;
handle
=
0
;
isProfilingQueue_
=
false
;
}
Impl
(
cl_command_queue
q
)
{
__init
();
handle
=
q
;
cl_command_queue_properties
props
=
0
;
cl_int
result
=
clGetCommandQueueInfo
(
handle
,
CL_QUEUE_PROPERTIES
,
sizeof
(
cl_command_queue_properties
),
&
props
,
NULL
);
CV_Assert
(
result
&&
"clGetCommandQueueInfo(CL_QUEUE_PROPERTIES)"
);
isProfilingQueue_
=
!!
(
props
&
CL_QUEUE_PROFILING_ENABLE
);
}
Impl
(
cl_command_queue
q
,
bool
isProfilingQueue
)
{
__init
();
handle
=
q
;
isProfilingQueue_
=
isProfilingQueue
;
}
Impl
(
const
Context
&
c
,
const
Device
&
d
,
bool
withProfiling
=
false
)
{
__init
();
const
Context
*
pc
=
&
c
;
cl_context
ch
=
(
cl_context
)
pc
->
ptr
();
if
(
!
ch
)
...
...
@@ -1854,8 +1880,10 @@ struct Queue::Impl
if
(
!
dh
)
dh
=
(
cl_device_id
)
pc
->
device
(
0
).
ptr
();
cl_int
retval
=
0
;
handle
=
clCreateCommandQueue
(
ch
,
dh
,
0
,
&
retval
);
cl_command_queue_properties
props
=
withProfiling
?
CL_QUEUE_PROFILING_ENABLE
:
0
;
handle
=
clCreateCommandQueue
(
ch
,
dh
,
props
,
&
retval
);
CV_OclDbgAssert
(
retval
==
CL_SUCCESS
);
isProfilingQueue_
=
withProfiling
;
}
~
Impl
()
...
...
@@ -1873,9 +1901,37 @@ struct Queue::Impl
}
}
const
cv
::
ocl
::
Queue
&
getProfilingQueue
(
const
cv
::
ocl
::
Queue
&
self
)
{
if
(
isProfilingQueue_
)
return
self
;
if
(
profiling_queue_
.
ptr
())
return
profiling_queue_
;
cl_context
ctx
=
0
;
CV_Assert
(
CL_SUCCESS
==
clGetCommandQueueInfo
(
handle
,
CL_QUEUE_CONTEXT
,
sizeof
(
cl_context
),
&
ctx
,
NULL
));
cl_device_id
device
=
0
;
CV_Assert
(
CL_SUCCESS
==
clGetCommandQueueInfo
(
handle
,
CL_QUEUE_DEVICE
,
sizeof
(
cl_device_id
),
&
device
,
NULL
));
cl_int
result
=
CL_SUCCESS
;
cl_command_queue_properties
props
=
CL_QUEUE_PROFILING_ENABLE
;
cl_command_queue
q
=
clCreateCommandQueue
(
ctx
,
device
,
props
,
&
result
);
CV_Assert
(
result
==
CL_SUCCESS
&&
"clCreateCommandQueue(with CL_QUEUE_PROFILING_ENABLE)"
);
Queue
queue
;
queue
.
p
=
new
Impl
(
q
,
true
);
profiling_queue_
=
queue
;
return
profiling_queue_
;
}
IMPLEMENT_REFCOUNTABLE
();
cl_command_queue
handle
;
bool
isProfilingQueue_
;
cv
::
ocl
::
Queue
profiling_queue_
;
};
Queue
::
Queue
()
...
...
@@ -1929,6 +1985,12 @@ void Queue::finish()
}
}
const
Queue
&
Queue
::
getProfilingQueue
()
const
{
CV_Assert
(
p
);
return
p
->
getProfilingQueue
(
*
this
);
}
void
*
Queue
::
ptr
()
const
{
return
p
?
p
->
handle
:
0
;
...
...
@@ -2032,6 +2094,9 @@ struct Kernel::Impl
release
();
}
bool
run
(
int
dims
,
size_t
_globalsize
[],
size_t
_localsize
[],
bool
sync
,
int64
*
timeNS
,
const
Queue
&
q
);
~
Impl
()
{
if
(
handle
)
...
...
@@ -2259,19 +2324,15 @@ int Kernel::set(int i, const KernelArg& arg)
return
i
+
1
;
}
bool
Kernel
::
run
(
int
dims
,
size_t
_globalsize
[],
size_t
_localsize
[],
bool
sync
,
const
Queue
&
q
)
{
CV_INSTRUMENT_REGION_OPENCL_RUN
(
p
->
name
.
c_str
());
if
(
!
p
||
!
p
->
handle
||
p
->
isInProgress
)
if
(
!
p
)
return
false
;
cl_command_queue
qq
=
getQueue
(
q
);
size_t
globalsize
[
CV_MAX_DIM
]
=
{
1
,
1
,
1
};
size_t
total
=
1
;
CV_Assert
(
_globalsize
!=
0
);
CV_Assert
(
_globalsize
!=
NULL
);
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
size_t
val
=
_localsize
?
_localsize
[
i
]
:
...
...
@@ -2283,12 +2344,28 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
globalsize
[
i
]
=
divUp
(
_globalsize
[
i
],
(
unsigned
int
)
val
)
*
val
;
}
CV_Assert
(
total
>
0
);
if
(
p
->
haveTempDstUMats
)
return
p
->
run
(
dims
,
globalsize
,
_localsize
,
sync
,
NULL
,
q
);
}
bool
Kernel
::
Impl
::
run
(
int
dims
,
size_t
globalsize
[],
size_t
localsize
[],
bool
sync
,
int64
*
timeNS
,
const
Queue
&
q
)
{
CV_INSTRUMENT_REGION_OPENCL_RUN
(
p
->
name
.
c_str
());
if
(
!
handle
||
isInProgress
)
return
false
;
cl_command_queue
qq
=
getQueue
(
q
);
if
(
haveTempDstUMats
)
sync
=
true
;
if
(
timeNS
)
sync
=
true
;
cl_event
asyncEvent
=
0
;
cl_int
retval
=
clEnqueueNDRangeKernel
(
qq
,
p
->
handle
,
(
cl_uint
)
dims
,
NULL
,
globalsize
,
_
localsize
,
0
,
0
,
sync
?
0
:
&
asyncEvent
);
cl_int
retval
=
clEnqueueNDRangeKernel
(
qq
,
handle
,
(
cl_uint
)
dims
,
NULL
,
globalsize
,
localsize
,
0
,
0
,
(
sync
&&
!
timeNS
)
?
0
:
&
asyncEvent
);
#if CV_OPENCL_SHOW_RUN_ERRORS
if
(
retval
!=
CL_SUCCESS
)
{
...
...
@@ -2296,16 +2373,31 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
fflush
(
stdout
);
}
#endif
if
(
sync
||
retval
!=
CL_SUCCESS
)
if
(
sync
||
retval
!=
CL_SUCCESS
)
{
CV_OclDbgAssert
(
clFinish
(
qq
)
==
CL_SUCCESS
);
p
->
cleanupUMats
();
if
(
timeNS
)
{
if
(
retval
==
CL_SUCCESS
)
{
clWaitForEvents
(
1
,
&
asyncEvent
);
cl_ulong
startTime
,
stopTime
;
CV_Assert
(
CL_SUCCESS
==
clGetEventProfilingInfo
(
asyncEvent
,
CL_PROFILING_COMMAND_START
,
sizeof
(
startTime
),
&
startTime
,
NULL
));
CV_Assert
(
CL_SUCCESS
==
clGetEventProfilingInfo
(
asyncEvent
,
CL_PROFILING_COMMAND_END
,
sizeof
(
stopTime
),
&
stopTime
,
NULL
));
*
timeNS
=
(
int64
)(
stopTime
-
startTime
);
}
else
{
*
timeNS
=
-
1
;
}
}
cleanupUMats
();
}
else
{
p
->
addref
();
p
->
isInProgress
=
true
;
CV_OclDbgAssert
(
clSetEventCallback
(
asyncEvent
,
CL_COMPLETE
,
oclCleanupCallback
,
p
)
==
CL_SUCCESS
);
addref
();
isInProgress
=
true
;
CV_OclDbgAssert
(
clSetEventCallback
(
asyncEvent
,
CL_COMPLETE
,
oclCleanupCallback
,
this
)
==
CL_SUCCESS
);
}
if
(
asyncEvent
)
clReleaseEvent
(
asyncEvent
);
...
...
@@ -2336,6 +2428,17 @@ bool Kernel::runTask(bool sync, const Queue& q)
return
retval
==
CL_SUCCESS
;
}
int64
Kernel
::
runProfiling
(
int
dims
,
size_t
globalsize
[],
size_t
localsize
[],
const
Queue
&
q_
)
{
CV_Assert
(
p
&&
p
->
handle
&&
!
p
->
isInProgress
);
Queue
q
=
q_
.
ptr
()
?
q_
:
Queue
::
getDefault
();
CV_Assert
(
q
.
ptr
());
q
.
finish
();
// call clFinish() on base queue
Queue
profilingQueue
=
q
.
getProfilingQueue
();
int64
timeNs
=
-
1
;
bool
res
=
p
->
run
(
dims
,
globalsize
,
localsize
,
true
,
&
timeNs
,
profilingQueue
);
return
res
?
timeNs
:
-
1
;
}
size_t
Kernel
::
workGroupSize
()
const
{
...
...
modules/core/src/ocl_deprecated.hpp
View file @
af8fc3e2
...
...
@@ -759,15 +759,15 @@ OCL_FUNC_P(cl_mem, clCreateBuffer,
/*
OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue))
*/
OCL_FUNC
(
cl_int
,
clGetCommandQueueInfo
,
(cl_command_queue command_queue,
cl_command_queue_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret),
(command_queue, param_name, param_value_size, param_value, param_value_size_ret))
(
cl_command_queue
command_queue
,
cl_command_queue_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
),
(
command_queue
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
))
/*
OCL_FUNC_P(cl_mem, clCreateSubBuffer,
(cl_mem buffer,
cl_mem_flags flags,
...
...
@@ -1202,6 +1202,19 @@ OCL_FUNC(cl_int, clSetEventCallback,
OCL_FUNC
(
cl_int
,
clReleaseEvent
,
(
cl_event
event
),
(
event
))
OCL_FUNC
(
cl_int
,
clWaitForEvents
,
(
cl_uint
num_events
,
const
cl_event
*
event_list
),
(
num_events
,
event_list
))
OCL_FUNC
(
cl_int
,
clGetEventProfilingInfo
,
(
cl_event
event
,
cl_profiling_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
),
(
event
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
))
}
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment