Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
278fb617
Commit
278fb617
authored
Oct 22, 2013
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
the first draft of transparent API and new UMat class.
parent
29f37fc1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
4006 additions
and
0 deletions
+4006
-0
ocl.hpp
modules/core/include/opencv2/core/ocl.hpp
+462
-0
ocl.cpp
modules/core/src/ocl.cpp
+2901
-0
umatrix.cpp
modules/core/src/umatrix.cpp
+643
-0
No files found.
modules/core/include/opencv2/core/ocl.hpp
0 → 100644
View file @
278fb617
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_OPENCL_HPP__
#define __OPENCV_OPENCL_HPP__
#include "opencv2/core.hpp"
namespace
cv
{
namespace
ocl
{
CV_EXPORTS
bool
haveOpenCL
();
CV_EXPORTS
bool
useOpenCL
();
CV_EXPORTS
void
setUseOpenCL
(
bool
flag
);
CV_EXPORTS
void
finish
();
class
CV_EXPORTS
Context
;
class
CV_EXPORTS
Device
;
class
CV_EXPORTS
Kernel
;
class
CV_EXPORTS
Program
;
class
CV_EXPORTS
ProgramSource
;
class
CV_EXPORTS
Queue
;
class
CV_EXPORTS
Device
{
public
:
Device
();
explicit
Device
(
void
*
d
);
Device
(
const
Device
&
d
);
Device
&
operator
=
(
const
Device
&
d
);
~
Device
();
void
set
(
void
*
d
);
enum
{
TYPE_DEFAULT
=
(
1
<<
0
),
TYPE_CPU
=
(
1
<<
1
),
TYPE_GPU
=
(
1
<<
2
),
TYPE_ACCELERATOR
=
(
1
<<
3
),
TYPE_DGPU
=
TYPE_GPU
+
(
1
<<
16
),
TYPE_IGPU
=
TYPE_GPU
+
(
1
<<
17
),
TYPE_ALL
=
0xFFFFFFFF
};
String
name
()
const
;
String
extensions
()
const
;
String
vendor
()
const
;
String
OpenCL_C_Version
()
const
;
String
OpenCLVersion
()
const
;
String
driverVersion
()
const
;
void
*
ptr
()
const
;
int
type
()
const
;
int
addressBits
()
const
;
bool
available
()
const
;
bool
compilerAvailable
()
const
;
bool
linkerAvailable
()
const
;
enum
{
FP_DENORM
=
(
1
<<
0
),
FP_INF_NAN
=
(
1
<<
1
),
FP_ROUND_TO_NEAREST
=
(
1
<<
2
),
FP_ROUND_TO_ZERO
=
(
1
<<
3
),
FP_ROUND_TO_INF
=
(
1
<<
4
),
FP_FMA
=
(
1
<<
5
),
FP_SOFT_FLOAT
=
(
1
<<
6
),
FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
=
(
1
<<
7
)
};
int
doubleFPConfig
()
const
;
int
singleFPConfig
()
const
;
int
halfFPConfig
()
const
;
bool
endianLittle
()
const
;
bool
errorCorrectionSupport
()
const
;
enum
{
EXEC_KERNEL
=
(
1
<<
0
),
EXEC_NATIVE_KERNEL
=
(
1
<<
1
)
};
int
executionCapabilities
()
const
;
size_t
globalMemCacheSize
()
const
;
enum
{
NO_CACHE
=
0
,
READ_ONLY_CACHE
=
1
,
READ_WRITE_CACHE
=
2
};
int
globalMemCacheType
()
const
;
int
globalMemCacheLineSize
()
const
;
size_t
globalMemSize
()
const
;
size_t
localMemSize
()
const
;
enum
{
NO_LOCAL_MEM
=
0
,
LOCAL_IS_LOCAL
=
1
,
LOCAL_IS_GLOBAL
=
2
};
int
localMemType
()
const
;
bool
hostUnifiedMemory
()
const
;
bool
imageSupport
()
const
;
size_t
image2DMaxWidth
()
const
;
size_t
image2DMaxHeight
()
const
;
size_t
image3DMaxWidth
()
const
;
size_t
image3DMaxHeight
()
const
;
size_t
image3DMaxDepth
()
const
;
size_t
imageMaxBufferSize
()
const
;
size_t
imageMaxArraySize
()
const
;
int
maxClockFrequency
()
const
;
int
maxComputeUnits
()
const
;
int
maxConstantArgs
()
const
;
size_t
maxConstantBufferSize
()
const
;
size_t
maxMemAllocSize
()
const
;
size_t
maxParameterSize
()
const
;
int
maxReadImageArgs
()
const
;
int
maxWriteImageArgs
()
const
;
int
maxSamplers
()
const
;
size_t
maxWorkGroupSize
()
const
;
int
maxWorkItemDims
()
const
;
void
maxWorkItemSizes
(
size_t
*
)
const
;
int
memBaseAddrAlign
()
const
;
int
nativeVectorWidthChar
()
const
;
int
nativeVectorWidthShort
()
const
;
int
nativeVectorWidthInt
()
const
;
int
nativeVectorWidthLong
()
const
;
int
nativeVectorWidthFloat
()
const
;
int
nativeVectorWidthDouble
()
const
;
int
nativeVectorWidthHalf
()
const
;
int
preferredVectorWidthChar
()
const
;
int
preferredVectorWidthShort
()
const
;
int
preferredVectorWidthInt
()
const
;
int
preferredVectorWidthLong
()
const
;
int
preferredVectorWidthFloat
()
const
;
int
preferredVectorWidthDouble
()
const
;
int
preferredVectorWidthHalf
()
const
;
size_t
printfBufferSize
()
const
;
size_t
profilingTimerResolution
()
const
;
static
const
Device
&
getDefault
();
protected
:
struct
Impl
;
Impl
*
p
;
};
class
CV_EXPORTS
Context
{
public
:
Context
();
explicit
Context
(
int
dtype
);
~
Context
();
Context
(
const
Context
&
c
);
Context
&
operator
=
(
const
Context
&
c
);
bool
create
(
int
dtype
);
size_t
ndevices
()
const
;
const
Device
&
device
(
size_t
idx
)
const
;
int
dtype
()
const
;
Program
getProg
(
const
ProgramSource
&
prog
,
const
String
&
buildopt
,
String
&
errmsg
);
static
Context
&
getDefault
();
void
*
ptr
()
const
;
protected
:
struct
Impl
;
Impl
*
p
;
};
class
CV_EXPORTS
Queue
{
public
:
Queue
();
explicit
Queue
(
const
Context
&
c
,
const
Device
&
d
=
Device
());
~
Queue
();
Queue
(
const
Queue
&
q
);
Queue
&
operator
=
(
const
Queue
&
q
);
bool
create
(
const
Context
&
c
=
Context
(),
const
Device
&
d
=
Device
());
void
finish
();
void
*
ptr
()
const
;
static
Queue
&
getDefault
();
protected
:
struct
Impl
;
Impl
*
p
;
};
class
CV_EXPORTS
KernelArg
{
public
:
enum
{
LOCAL
=
1
,
READ_ONLY
=
2
,
WRITE_ONLY
=
4
,
READ_WRITE
=
6
,
CONSTANT
=
8
};
KernelArg
(
int
_flags
,
UMat
*
_m
,
void
*
_obj
=
0
,
size_t
_sz
=
0
);
static
KernelArg
Local
()
{
return
KernelArg
(
LOCAL
,
0
);
}
static
KernelArg
ReadOnly
(
const
UMat
&
m
)
{
return
KernelArg
(
READ_ONLY
,
(
UMat
*
)
&
m
);
}
static
KernelArg
WriteOnly
(
const
UMat
&
m
)
{
return
KernelArg
(
WRITE_ONLY
,
(
UMat
*
)
&
m
);
}
static
KernelArg
Constant
(
const
Mat
&
m
);
template
<
typename
_Tp
>
static
KernelArg
Constant
(
const
_Tp
*
arr
,
size_t
n
)
{
return
KernelArg
(
CONSTANT
,
0
,
(
void
*
)
arr
,
n
);
}
int
flags
;
UMat
*
m
;
void
*
obj
;
size_t
sz
;
};
class
CV_EXPORTS
Kernel
{
public
:
class
CV_EXPORTS
Callback
{
public
:
virtual
~
Callback
()
{}
virtual
void
operator
()()
=
0
;
};
Kernel
();
Kernel
(
const
char
*
kname
,
const
Program
&
prog
);
Kernel
(
const
char
*
kname
,
const
ProgramSource
&
prog
,
const
String
&
buildopts
,
String
&
errmsg
);
~
Kernel
();
Kernel
(
const
Kernel
&
k
);
Kernel
&
operator
=
(
const
Kernel
&
k
);
bool
create
(
const
char
*
kname
,
const
Program
&
prog
);
bool
create
(
const
char
*
kname
,
const
ProgramSource
&
prog
,
const
String
&
buildopts
,
String
&
errmsg
);
int
set
(
int
i
,
const
void
*
value
,
size_t
sz
);
int
set
(
int
i
,
const
UMat
&
m
);
int
set
(
int
i
,
const
KernelArg
&
arg
);
template
<
typename
_Tp
>
int
set
(
int
i
,
const
_Tp
&
value
)
{
return
set
(
i
,
&
value
,
sizeof
(
value
));
}
template
<
typename
_Tp1
>
Kernel
&
args
(
_Tp1
a1
)
{
set
(
0
,
a1
);
return
*
this
;
}
template
<
typename
_Tp1
,
typename
_Tp2
>
Kernel
&
args
(
_Tp1
a1
,
_Tp2
a2
)
{
int
i
=
set
(
0
,
a1
);
set
(
i
,
a2
);
return
*
this
;
}
template
<
typename
_Tp1
,
typename
_Tp2
,
typename
_Tp3
>
Kernel
&
args
(
_Tp1
a1
,
_Tp2
a2
,
_Tp3
a3
)
{
int
i
=
set
(
0
,
a1
);
i
=
set
(
i
,
a2
);
set
(
i
,
a3
);
return
*
this
;
}
template
<
typename
_Tp1
,
typename
_Tp2
,
typename
_Tp3
,
typename
_Tp4
>
Kernel
&
args
(
_Tp1
a1
,
_Tp2
a2
,
_Tp3
a3
,
_Tp4
a4
)
{
int
i
=
set
(
0
,
a1
);
i
=
set
(
i
,
a2
);
i
=
set
(
i
,
a3
);
set
(
i
,
a4
);
return
*
this
;
}
template
<
typename
_Tp1
,
typename
_Tp2
,
typename
_Tp3
,
typename
_Tp4
,
typename
_Tp5
>
Kernel
&
args
(
_Tp1
a1
,
_Tp2
a2
,
_Tp3
a3
,
_Tp4
a4
,
_Tp5
a5
)
{
int
i
=
set
(
0
,
a1
);
i
=
set
(
i
,
a2
);
i
=
set
(
i
,
a3
);
i
=
set
(
i
,
a4
);
set
(
i
,
a5
);
return
*
this
;
}
template
<
typename
_Tp1
,
typename
_Tp2
,
typename
_Tp3
,
typename
_Tp4
,
typename
_Tp5
,
typename
_Tp6
>
Kernel
&
args
(
_Tp1
a1
,
_Tp2
a2
,
_Tp3
a3
,
_Tp4
a4
,
_Tp5
a5
,
_Tp6
a6
)
{
int
i
=
set
(
0
,
a1
);
i
=
set
(
i
,
a2
);
i
=
set
(
i
,
a3
);
i
=
set
(
i
,
a4
);
i
=
set
(
i
,
a5
);
set
(
i
,
a6
);
return
*
this
;
}
template
<
typename
_Tp1
,
typename
_Tp2
,
typename
_Tp3
,
typename
_Tp4
,
typename
_Tp5
,
typename
_Tp6
,
typename
_Tp7
>
Kernel
&
args
(
_Tp1
a1
,
_Tp2
a2
,
_Tp3
a3
,
_Tp4
a4
,
_Tp5
a5
,
_Tp6
a6
,
_Tp7
a7
)
{
int
i
=
set
(
0
,
a1
);
i
=
set
(
i
,
a2
);
i
=
set
(
i
,
a3
);
i
=
set
(
i
,
a4
);
i
=
set
(
i
,
a5
);
i
=
set
(
i
,
a6
);
set
(
i
,
a7
);
return
*
this
;
}
template
<
typename
_Tp1
,
typename
_Tp2
,
typename
_Tp3
,
typename
_Tp4
,
typename
_Tp5
,
typename
_Tp6
,
typename
_Tp7
,
typename
_Tp8
>
Kernel
&
args
(
_Tp1
a1
,
_Tp2
a2
,
_Tp3
a3
,
_Tp4
a4
,
_Tp5
a5
,
_Tp6
a6
,
_Tp7
a7
,
_Tp8
a8
)
{
int
i
=
set
(
0
,
a1
);
i
=
set
(
i
,
a2
);
i
=
set
(
i
,
a3
);
i
=
set
(
i
,
a4
);
i
=
set
(
i
,
a5
);
i
=
set
(
i
,
a6
);
i
=
set
(
i
,
a7
);
set
(
i
,
a8
);
return
*
this
;
}
template
<
typename
_Tp1
,
typename
_Tp2
,
typename
_Tp3
,
typename
_Tp4
,
typename
_Tp5
,
typename
_Tp6
,
typename
_Tp7
,
typename
_Tp8
,
typename
_Tp9
>
Kernel
&
args
(
_Tp1
a1
,
_Tp2
a2
,
_Tp3
a3
,
_Tp4
a4
,
_Tp5
a5
,
_Tp6
a6
,
_Tp7
a7
,
_Tp8
a8
,
_Tp9
a9
)
{
int
i
=
set
(
0
,
a1
);
i
=
set
(
i
,
a2
);
i
=
set
(
i
,
a3
);
i
=
set
(
i
,
a4
);
i
=
set
(
i
,
a5
);
i
=
set
(
i
,
a6
);
i
=
set
(
i
,
a7
);
i
=
set
(
i
,
a8
);
set
(
i
,
a9
);
return
*
this
;
}
template
<
typename
_Tp1
,
typename
_Tp2
,
typename
_Tp3
,
typename
_Tp4
,
typename
_Tp5
,
typename
_Tp6
,
typename
_Tp7
,
typename
_Tp8
,
typename
_Tp9
,
typename
_Tp10
>
Kernel
&
args
(
_Tp1
a1
,
_Tp2
a2
,
_Tp3
a3
,
_Tp4
a4
,
_Tp5
a5
,
_Tp6
a6
,
_Tp7
a7
,
_Tp8
a8
,
_Tp9
a9
,
_Tp10
a10
)
{
int
i
=
set
(
0
,
a1
);
i
=
set
(
i
,
a2
);
i
=
set
(
i
,
a3
);
i
=
set
(
i
,
a4
);
i
=
set
(
i
,
a5
);
i
=
set
(
i
,
a6
);
i
=
set
(
i
,
a7
);
i
=
set
(
i
,
a8
);
i
=
set
(
i
,
a9
);
set
(
i
,
a10
);
return
*
this
;
}
template
<
typename
_Tp1
,
typename
_Tp2
,
typename
_Tp3
,
typename
_Tp4
,
typename
_Tp5
,
typename
_Tp6
,
typename
_Tp7
,
typename
_Tp8
,
typename
_Tp9
,
typename
_Tp10
,
typename
_Tp11
>
Kernel
&
args
(
_Tp1
a1
,
_Tp2
a2
,
_Tp3
a3
,
_Tp4
a4
,
_Tp5
a5
,
_Tp6
a6
,
_Tp7
a7
,
_Tp8
a8
,
_Tp9
a9
,
_Tp10
a10
,
_Tp11
a11
)
{
int
i
=
set
(
0
,
a1
);
i
=
set
(
i
,
a2
);
i
=
set
(
i
,
a3
);
i
=
set
(
i
,
a4
);
i
=
set
(
i
,
a5
);
i
=
set
(
i
,
a6
);
i
=
set
(
i
,
a7
);
i
=
set
(
i
,
a8
);
i
=
set
(
i
,
a9
);
i
=
set
(
i
,
a10
);
set
(
i
,
a11
);
return
*
this
;
}
template
<
typename
_Tp1
,
typename
_Tp2
,
typename
_Tp3
,
typename
_Tp4
,
typename
_Tp5
,
typename
_Tp6
,
typename
_Tp7
,
typename
_Tp8
,
typename
_Tp9
,
typename
_Tp10
,
typename
_Tp11
,
typename
_Tp12
>
Kernel
&
args
(
_Tp1
a1
,
_Tp2
a2
,
_Tp3
a3
,
_Tp4
a4
,
_Tp5
a5
,
_Tp6
a6
,
_Tp7
a7
,
_Tp8
a8
,
_Tp9
a9
,
_Tp10
a10
,
_Tp11
a11
,
_Tp12
a12
)
{
int
i
=
set
(
0
,
a1
);
i
=
set
(
i
,
a2
);
i
=
set
(
i
,
a3
);
i
=
set
(
i
,
a4
);
i
=
set
(
i
,
a5
);
i
=
set
(
i
,
a6
);
i
=
set
(
i
,
a7
);
i
=
set
(
i
,
a8
);
i
=
set
(
i
,
a9
);
i
=
set
(
i
,
a10
);
i
=
set
(
i
,
a11
);
set
(
i
,
a12
);
return
*
this
;
}
void
run
(
int
dims
,
size_t
offset
[],
size_t
globalsize
[],
size_t
localsize
[],
bool
sync
,
const
Ptr
<
Callback
>&
cleanupCallback
=
Ptr
<
Callback
>
(),
const
Queue
&
q
=
Queue
());
void
runTask
(
bool
sync
,
const
Ptr
<
Callback
>&
cleanupCallback
=
Ptr
<
Callback
>
(),
const
Queue
&
q
=
Queue
());
size_t
workGroupSize
()
const
;
bool
compileWorkGroupSize
(
size_t
wsz
[])
const
;
size_t
localMemSize
()
const
;
void
*
ptr
()
const
;
struct
Impl
;
protected
:
Impl
*
p
;
};
class
CV_EXPORTS
Program
{
public
:
Program
();
Program
(
const
ProgramSource
&
src
,
const
String
&
buildflags
,
String
&
errmsg
);
explicit
Program
(
const
String
&
buf
);
Program
(
const
Program
&
prog
);
Program
&
operator
=
(
const
Program
&
prog
);
~
Program
();
bool
create
(
const
ProgramSource
&
src
,
const
String
&
buildflags
,
String
&
errmsg
);
bool
read
(
const
String
&
buf
,
const
String
&
buildflags
);
bool
write
(
String
&
buf
)
const
;
const
ProgramSource
&
source
()
const
;
void
*
ptr
()
const
;
String
getPrefix
()
const
;
static
String
getPrefix
(
const
String
&
buildflags
);
protected
:
struct
Impl
;
Impl
*
p
;
};
class
CV_EXPORTS
ProgramSource
{
public
:
typedef
uint64
hash_t
;
ProgramSource
();
explicit
ProgramSource
(
const
String
&
prog
);
explicit
ProgramSource
(
const
char
*
prog
);
~
ProgramSource
();
ProgramSource
(
const
ProgramSource
&
prog
);
ProgramSource
&
operator
=
(
const
ProgramSource
&
prog
);
const
String
&
source
()
const
;
hash_t
hash
()
const
;
protected
:
struct
Impl
;
Impl
*
p
;
};
}}
#endif
modules/core/src/ocl.cpp
0 → 100644
View file @
278fb617
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <map>
/*
Part of the file is an extract from the standard OpenCL headers from Khronos site.
Below is the original copyright.
*/
/*******************************************************************************
* Copyright (c) 2008 - 2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
#if 0 //defined __APPLE__
#define HAVE_OPENCL 1
#else
#undef HAVE_OPENCL
#endif
#define OPENCV_CL_NOT_IMPLEMENTED -1000
#ifdef HAVE_OPENCL
#if defined __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
static
const
bool
g_haveOpenCL
=
true
;
#else
extern
"C"
{
struct
_cl_platform_id
{
int
dummy
;
};
struct
_cl_device_id
{
int
dummy
;
};
struct
_cl_context
{
int
dummy
;
};
struct
_cl_command_queue
{
int
dummy
;
};
struct
_cl_mem
{
int
dummy
;
};
struct
_cl_program
{
int
dummy
;
};
struct
_cl_kernel
{
int
dummy
;
};
struct
_cl_event
{
int
dummy
;
};
struct
_cl_sampler
{
int
dummy
;
};
typedef
struct
_cl_platform_id
*
cl_platform_id
;
typedef
struct
_cl_device_id
*
cl_device_id
;
typedef
struct
_cl_context
*
cl_context
;
typedef
struct
_cl_command_queue
*
cl_command_queue
;
typedef
struct
_cl_mem
*
cl_mem
;
typedef
struct
_cl_program
*
cl_program
;
typedef
struct
_cl_kernel
*
cl_kernel
;
typedef
struct
_cl_event
*
cl_event
;
typedef
struct
_cl_sampler
*
cl_sampler
;
typedef
int
cl_int
;
typedef
unsigned
cl_uint
;
typedef
long
cl_long
;
typedef
unsigned
long
cl_ulong
;
typedef
cl_uint
cl_bool
;
/* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
typedef
cl_ulong
cl_bitfield
;
typedef
cl_bitfield
cl_device_type
;
typedef
cl_uint
cl_platform_info
;
typedef
cl_uint
cl_device_info
;
typedef
cl_bitfield
cl_device_fp_config
;
typedef
cl_uint
cl_device_mem_cache_type
;
typedef
cl_uint
cl_device_local_mem_type
;
typedef
cl_bitfield
cl_device_exec_capabilities
;
typedef
cl_bitfield
cl_command_queue_properties
;
typedef
intptr_t
cl_device_partition_property
;
typedef
cl_bitfield
cl_device_affinity_domain
;
typedef
intptr_t
cl_context_properties
;
typedef
cl_uint
cl_context_info
;
typedef
cl_uint
cl_command_queue_info
;
typedef
cl_uint
cl_channel_order
;
typedef
cl_uint
cl_channel_type
;
typedef
cl_bitfield
cl_mem_flags
;
typedef
cl_uint
cl_mem_object_type
;
typedef
cl_uint
cl_mem_info
;
typedef
cl_bitfield
cl_mem_migration_flags
;
typedef
cl_uint
cl_image_info
;
typedef
cl_uint
cl_buffer_create_type
;
typedef
cl_uint
cl_addressing_mode
;
typedef
cl_uint
cl_filter_mode
;
typedef
cl_uint
cl_sampler_info
;
typedef
cl_bitfield
cl_map_flags
;
typedef
cl_uint
cl_program_info
;
typedef
cl_uint
cl_program_build_info
;
typedef
cl_uint
cl_program_binary_type
;
typedef
cl_int
cl_build_status
;
typedef
cl_uint
cl_kernel_info
;
typedef
cl_uint
cl_kernel_arg_info
;
typedef
cl_uint
cl_kernel_arg_address_qualifier
;
typedef
cl_uint
cl_kernel_arg_access_qualifier
;
typedef
cl_bitfield
cl_kernel_arg_type_qualifier
;
typedef
cl_uint
cl_kernel_work_group_info
;
typedef
cl_uint
cl_event_info
;
typedef
cl_uint
cl_command_type
;
typedef
cl_uint
cl_profiling_info
;
typedef
struct
_cl_image_format
{
cl_channel_order
image_channel_order
;
cl_channel_type
image_channel_data_type
;
}
cl_image_format
;
typedef
struct
_cl_image_desc
{
cl_mem_object_type
image_type
;
size_t
image_width
;
size_t
image_height
;
size_t
image_depth
;
size_t
image_array_size
;
size_t
image_row_pitch
;
size_t
image_slice_pitch
;
cl_uint
num_mip_levels
;
cl_uint
num_samples
;
cl_mem
buffer
;
}
cl_image_desc
;
typedef
struct
_cl_buffer_region
{
size_t
origin
;
size_t
size
;
}
cl_buffer_region
;
//////////////////////////////////////////////////////////
#define CL_SUCCESS 0
#define CL_DEVICE_NOT_FOUND -1
#define CL_DEVICE_NOT_AVAILABLE -2
#define CL_COMPILER_NOT_AVAILABLE -3
#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
#define CL_OUT_OF_RESOURCES -5
#define CL_OUT_OF_HOST_MEMORY -6
#define CL_PROFILING_INFO_NOT_AVAILABLE -7
#define CL_MEM_COPY_OVERLAP -8
#define CL_IMAGE_FORMAT_MISMATCH -9
#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
#define CL_BUILD_PROGRAM_FAILURE -11
#define CL_MAP_FAILURE -12
#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
#define CL_COMPILE_PROGRAM_FAILURE -15
#define CL_LINKER_NOT_AVAILABLE -16
#define CL_LINK_PROGRAM_FAILURE -17
#define CL_DEVICE_PARTITION_FAILED -18
#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19
#define CL_INVALID_VALUE -30
#define CL_INVALID_DEVICE_TYPE -31
#define CL_INVALID_PLATFORM -32
#define CL_INVALID_DEVICE -33
#define CL_INVALID_CONTEXT -34
#define CL_INVALID_QUEUE_PROPERTIES -35
#define CL_INVALID_COMMAND_QUEUE -36
#define CL_INVALID_HOST_PTR -37
#define CL_INVALID_MEM_OBJECT -38
#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
#define CL_INVALID_IMAGE_SIZE -40
#define CL_INVALID_SAMPLER -41
#define CL_INVALID_BINARY -42
#define CL_INVALID_BUILD_OPTIONS -43
#define CL_INVALID_PROGRAM -44
#define CL_INVALID_PROGRAM_EXECUTABLE -45
#define CL_INVALID_KERNEL_NAME -46
#define CL_INVALID_KERNEL_DEFINITION -47
#define CL_INVALID_KERNEL -48
#define CL_INVALID_ARG_INDEX -49
#define CL_INVALID_ARG_VALUE -50
#define CL_INVALID_ARG_SIZE -51
#define CL_INVALID_KERNEL_ARGS -52
#define CL_INVALID_WORK_DIMENSION -53
#define CL_INVALID_WORK_GROUP_SIZE -54
#define CL_INVALID_WORK_ITEM_SIZE -55
#define CL_INVALID_GLOBAL_OFFSET -56
#define CL_INVALID_EVENT_WAIT_LIST -57
#define CL_INVALID_EVENT -58
#define CL_INVALID_OPERATION -59
#define CL_INVALID_GL_OBJECT -60
#define CL_INVALID_BUFFER_SIZE -61
#define CL_INVALID_MIP_LEVEL -62
#define CL_INVALID_GLOBAL_WORK_SIZE -63
#define CL_INVALID_PROPERTY -64
#define CL_INVALID_IMAGE_DESCRIPTOR -65
#define CL_INVALID_COMPILER_OPTIONS -66
#define CL_INVALID_LINKER_OPTIONS -67
#define CL_INVALID_DEVICE_PARTITION_COUNT -68
/*#define CL_VERSION_1_0 1
#define CL_VERSION_1_1 1
#define CL_VERSION_1_2 1*/
#define CL_FALSE 0
#define CL_TRUE 1
#define CL_BLOCKING CL_TRUE
#define CL_NON_BLOCKING CL_FALSE
#define CL_PLATFORM_PROFILE 0x0900
#define CL_PLATFORM_VERSION 0x0901
#define CL_PLATFORM_NAME 0x0902
#define CL_PLATFORM_VENDOR 0x0903
#define CL_PLATFORM_EXTENSIONS 0x0904
#define CL_DEVICE_TYPE_DEFAULT (1 << 0)
#define CL_DEVICE_TYPE_CPU (1 << 1)
#define CL_DEVICE_TYPE_GPU (1 << 2)
#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
#define CL_DEVICE_TYPE_CUSTOM (1 << 4)
#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
#define CL_DEVICE_TYPE 0x1000
#define CL_DEVICE_VENDOR_ID 0x1001
#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
#define CL_DEVICE_ADDRESS_BITS 0x100D
#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
#define CL_DEVICE_IMAGE_SUPPORT 0x1016
#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
#define CL_DEVICE_MAX_SAMPLERS 0x1018
#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
#define CL_DEVICE_ENDIAN_LITTLE 0x1026
#define CL_DEVICE_AVAILABLE 0x1027
#define CL_DEVICE_COMPILER_AVAILABLE 0x1028
#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
#define CL_DEVICE_QUEUE_PROPERTIES 0x102A
#define CL_DEVICE_NAME 0x102B
#define CL_DEVICE_VENDOR 0x102C
#define CL_DRIVER_VERSION 0x102D
#define CL_DEVICE_PROFILE 0x102E
#define CL_DEVICE_VERSION 0x102F
#define CL_DEVICE_EXTENSIONS 0x1030
#define CL_DEVICE_PLATFORM 0x1031
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
#define CL_DEVICE_OPENCL_C_VERSION 0x103D
#define CL_DEVICE_LINKER_AVAILABLE 0x103E
#define CL_DEVICE_BUILT_IN_KERNELS 0x103F
#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040
#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041
#define CL_DEVICE_PARENT_DEVICE 0x1042
#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043
#define CL_DEVICE_PARTITION_PROPERTIES 0x1044
#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045
#define CL_DEVICE_PARTITION_TYPE 0x1046
#define CL_DEVICE_REFERENCE_COUNT 0x1047
#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048
#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
#define CL_FP_DENORM (1 << 0)
#define CL_FP_INF_NAN (1 << 1)
#define CL_FP_ROUND_TO_NEAREST (1 << 2)
#define CL_FP_ROUND_TO_ZERO (1 << 3)
#define CL_FP_ROUND_TO_INF (1 << 4)
#define CL_FP_FMA (1 << 5)
#define CL_FP_SOFT_FLOAT (1 << 6)
#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7)
#define CL_NONE 0x0
#define CL_READ_ONLY_CACHE 0x1
#define CL_READ_WRITE_CACHE 0x2
#define CL_LOCAL 0x1
#define CL_GLOBAL 0x2
#define CL_EXEC_KERNEL (1 << 0)
#define CL_EXEC_NATIVE_KERNEL (1 << 1)
#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
#define CL_QUEUE_PROFILING_ENABLE (1 << 1)
#define CL_CONTEXT_REFERENCE_COUNT 0x1080
#define CL_CONTEXT_DEVICES 0x1081
#define CL_CONTEXT_PROPERTIES 0x1082
#define CL_CONTEXT_NUM_DEVICES 0x1083
#define CL_CONTEXT_PLATFORM 0x1084
#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085
#define CL_DEVICE_PARTITION_EQUALLY 0x1086
#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087
#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088
#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0)
#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1)
#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2)
#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3)
#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4)
#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
#define CL_QUEUE_CONTEXT 0x1090
#define CL_QUEUE_DEVICE 0x1091
#define CL_QUEUE_REFERENCE_COUNT 0x1092
#define CL_QUEUE_PROPERTIES 0x1093
#define CL_MEM_READ_WRITE (1 << 0)
#define CL_MEM_WRITE_ONLY (1 << 1)
#define CL_MEM_READ_ONLY (1 << 2)
#define CL_MEM_USE_HOST_PTR (1 << 3)
#define CL_MEM_ALLOC_HOST_PTR (1 << 4)
#define CL_MEM_COPY_HOST_PTR (1 << 5)
// reserved (1 << 6)
#define CL_MEM_HOST_WRITE_ONLY (1 << 7)
#define CL_MEM_HOST_READ_ONLY (1 << 8)
#define CL_MEM_HOST_NO_ACCESS (1 << 9)
#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0)
#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1)
#define CL_R 0x10B0
#define CL_A 0x10B1
#define CL_RG 0x10B2
#define CL_RA 0x10B3
#define CL_RGB 0x10B4
#define CL_RGBA 0x10B5
#define CL_BGRA 0x10B6
#define CL_ARGB 0x10B7
#define CL_INTENSITY 0x10B8
#define CL_LUMINANCE 0x10B9
#define CL_Rx 0x10BA
#define CL_RGx 0x10BB
#define CL_RGBx 0x10BC
#define CL_DEPTH 0x10BD
#define CL_DEPTH_STENCIL 0x10BE
#define CL_SNORM_INT8 0x10D0
#define CL_SNORM_INT16 0x10D1
#define CL_UNORM_INT8 0x10D2
#define CL_UNORM_INT16 0x10D3
#define CL_UNORM_SHORT_565 0x10D4
#define CL_UNORM_SHORT_555 0x10D5
#define CL_UNORM_INT_101010 0x10D6
#define CL_SIGNED_INT8 0x10D7
#define CL_SIGNED_INT16 0x10D8
#define CL_SIGNED_INT32 0x10D9
#define CL_UNSIGNED_INT8 0x10DA
#define CL_UNSIGNED_INT16 0x10DB
#define CL_UNSIGNED_INT32 0x10DC
#define CL_HALF_FLOAT 0x10DD
#define CL_FLOAT 0x10DE
#define CL_UNORM_INT24 0x10DF
#define CL_MEM_OBJECT_BUFFER 0x10F0
#define CL_MEM_OBJECT_IMAGE2D 0x10F1
#define CL_MEM_OBJECT_IMAGE3D 0x10F2
#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
#define CL_MEM_OBJECT_IMAGE1D 0x10F4
#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
#define CL_MEM_TYPE 0x1100
#define CL_MEM_FLAGS 0x1101
#define CL_MEM_SIZE 0x1102
#define CL_MEM_HOST_PTR 0x1103
#define CL_MEM_MAP_COUNT 0x1104
#define CL_MEM_REFERENCE_COUNT 0x1105
#define CL_MEM_CONTEXT 0x1106
#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
#define CL_MEM_OFFSET 0x1108
#define CL_IMAGE_FORMAT 0x1110
#define CL_IMAGE_ELEMENT_SIZE 0x1111
#define CL_IMAGE_ROW_PITCH 0x1112
#define CL_IMAGE_SLICE_PITCH 0x1113
#define CL_IMAGE_WIDTH 0x1114
#define CL_IMAGE_HEIGHT 0x1115
#define CL_IMAGE_DEPTH 0x1116
#define CL_IMAGE_ARRAY_SIZE 0x1117
#define CL_IMAGE_BUFFER 0x1118
#define CL_IMAGE_NUM_MIP_LEVELS 0x1119
#define CL_IMAGE_NUM_SAMPLES 0x111A
#define CL_ADDRESS_NONE 0x1130
#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
#define CL_ADDRESS_CLAMP 0x1132
#define CL_ADDRESS_REPEAT 0x1133
#define CL_ADDRESS_MIRRORED_REPEAT 0x1134
#define CL_FILTER_NEAREST 0x1140
#define CL_FILTER_LINEAR 0x1141
#define CL_SAMPLER_REFERENCE_COUNT 0x1150
#define CL_SAMPLER_CONTEXT 0x1151
#define CL_SAMPLER_NORMALIZED_COORDS 0x1152
#define CL_SAMPLER_ADDRESSING_MODE 0x1153
#define CL_SAMPLER_FILTER_MODE 0x1154
#define CL_MAP_READ (1 << 0)
#define CL_MAP_WRITE (1 << 1)
#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
#define CL_PROGRAM_REFERENCE_COUNT 0x1160
#define CL_PROGRAM_CONTEXT 0x1161
#define CL_PROGRAM_NUM_DEVICES 0x1162
#define CL_PROGRAM_DEVICES 0x1163
#define CL_PROGRAM_SOURCE 0x1164
#define CL_PROGRAM_BINARY_SIZES 0x1165
#define CL_PROGRAM_BINARIES 0x1166
#define CL_PROGRAM_NUM_KERNELS 0x1167
#define CL_PROGRAM_KERNEL_NAMES 0x1168
#define CL_PROGRAM_BUILD_STATUS 0x1181
#define CL_PROGRAM_BUILD_OPTIONS 0x1182
#define CL_PROGRAM_BUILD_LOG 0x1183
#define CL_PROGRAM_BINARY_TYPE 0x1184
#define CL_PROGRAM_BINARY_TYPE_NONE 0x0
#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1
#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2
#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4
#define CL_BUILD_SUCCESS 0
#define CL_BUILD_NONE -1
#define CL_BUILD_ERROR -2
#define CL_BUILD_IN_PROGRESS -3
#define CL_KERNEL_FUNCTION_NAME 0x1190
#define CL_KERNEL_NUM_ARGS 0x1191
#define CL_KERNEL_REFERENCE_COUNT 0x1192
#define CL_KERNEL_CONTEXT 0x1193
#define CL_KERNEL_PROGRAM 0x1194
#define CL_KERNEL_ATTRIBUTES 0x1195
#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196
#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197
#define CL_KERNEL_ARG_TYPE_NAME 0x1198
#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199
#define CL_KERNEL_ARG_NAME 0x119A
#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B
#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C
#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D
#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E
#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0
#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1
#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2
#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3
#define CL_KERNEL_ARG_TYPE_NONE 0
#define CL_KERNEL_ARG_TYPE_CONST (1 << 0)
#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1)
#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2)
#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5
#define CL_EVENT_COMMAND_QUEUE 0x11D0
#define CL_EVENT_COMMAND_TYPE 0x11D1
#define CL_EVENT_REFERENCE_COUNT 0x11D2
#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
#define CL_EVENT_CONTEXT 0x11D4
#define CL_COMMAND_NDRANGE_KERNEL 0x11F0
#define CL_COMMAND_TASK 0x11F1
#define CL_COMMAND_NATIVE_KERNEL 0x11F2
#define CL_COMMAND_READ_BUFFER 0x11F3
#define CL_COMMAND_WRITE_BUFFER 0x11F4
#define CL_COMMAND_COPY_BUFFER 0x11F5
#define CL_COMMAND_READ_IMAGE 0x11F6
#define CL_COMMAND_WRITE_IMAGE 0x11F7
#define CL_COMMAND_COPY_IMAGE 0x11F8
#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
#define CL_COMMAND_MAP_BUFFER 0x11FB
#define CL_COMMAND_MAP_IMAGE 0x11FC
#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
#define CL_COMMAND_MARKER 0x11FE
#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
#define CL_COMMAND_READ_BUFFER_RECT 0x1201
#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
#define CL_COMMAND_COPY_BUFFER_RECT 0x1203
#define CL_COMMAND_USER 0x1204
#define CL_COMMAND_BARRIER 0x1205
#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206
#define CL_COMMAND_FILL_BUFFER 0x1207
#define CL_COMMAND_FILL_IMAGE 0x1208
#define CL_COMPLETE 0x0
#define CL_RUNNING 0x1
#define CL_SUBMITTED 0x2
#define CL_QUEUED 0x3
#define CL_BUFFER_CREATE_TYPE_REGION 0x1220
#define CL_PROFILING_COMMAND_QUEUED 0x1280
#define CL_PROFILING_COMMAND_SUBMIT 0x1281
#define CL_PROFILING_COMMAND_START 0x1282
#define CL_PROFILING_COMMAND_END 0x1283
#define CL_CALLBACK CV_STDCALL
static
volatile
bool
g_haveOpenCL
=
false
;
static
const
char
*
oclFuncToCheck
=
"clEnqueueReadBufferRect"
;
#if defined(__APPLE__)
#include <dlfcn.h>
static
void
*
initOpenCLAndLoad
(
const
char
*
funcname
)
{
static
bool
initialized
=
false
;
static
void
*
handle
=
0
;
if
(
!
handle
)
{
if
(
!
initialized
)
{
handle
=
dlopen
(
"/System/Library/Frameworks/OpenСL.framework/Versions/Current/OpenСL"
,
RTLD_LAZY
);
initialized
=
true
;
g_haveOpenCL
=
handle
!=
0
&&
dlsym
(
handle
,
oclFuncToCheck
)
!=
0
;
}
if
(
!
handle
)
return
0
;
}
return
funcname
?
dlsym
(
handle
,
funcname
)
:
0
;
}
#elif defined WIN32 || defined _WIN32
#ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
#define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
#endif
#include <windows.h>
#if (_WIN32_WINNT >= 0x0602)
#include <synchapi.h>
#endif
#undef small
#undef min
#undef max
#undef abs
static
void
*
initOpenCLAndLoad
(
const
char
*
funcname
)
{
static
bool
initialized
=
false
;
static
HMODULE
handle
=
0
;
if
(
!
handle
)
{
if
(
!
initialized
)
{
handle
=
LoadLibraryA
(
"OpenCL.dll"
);
initialized
=
true
;
g_haveOpenCL
=
handle
!=
0
&&
GetProcAddressA
(
handle
,
oclFuncToCheck
)
!=
0
;
}
if
(
!
handle
)
return
0
;
}
return
funcname
?
(
void
*
)
GetProcAddressA
(
handle
,
funcname
)
:
0
;
}
#elif defined(__linux)
#include <dlfcn.h>
#include <stdio.h>
static
void
*
initOpenCLAndLoad
(
const
char
*
funcname
)
{
static
bool
initialized
=
false
;
static
void
*
handle
=
0
;
if
(
!
handle
)
{
if
(
!
initialized
)
{
handle
=
dlopen
(
"libOpenCL.so"
);
if
(
!
handle
)
handle
=
dlopen
(
"libCL.so"
);
initialized
=
true
;
g_haveOpenCL
=
handle
!=
0
&&
dlsym
(
handle
,
oclFuncToCheck
)
!=
0
;
}
if
(
!
handle
)
return
0
;
}
return
funcname
?
(
void
*
)
dlsym
(
handle
,
funcname
)
:
0
;
}
#else
static
void
*
initOpenCLAndLoad
(
const
char
*
)
{
return
0
;
}
#endif
#define OCL_FUNC(rettype, funcname, argsdecl, args) \
typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
static rettype funcname argsdecl \
{ \
static funcname##_t funcname##_p = 0; \
if( !funcname##_p ) \
{ \
funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
if( !funcname##_p ) \
return OPENCV_CL_NOT_IMPLEMENTED; \
} \
return funcname##_p args; \
}
#define OCL_FUNC_P(rettype, funcname, argsdecl, args) \
typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
static rettype funcname argsdecl \
{ \
static funcname##_t funcname##_p = 0; \
if( !funcname##_p ) \
{ \
funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
if( !funcname##_p ) \
{ \
if( errcode_ret ) \
*errcode_ret = OPENCV_CL_NOT_IMPLEMENTED; \
return 0; \
} \
} \
return funcname##_p args; \
}
OCL_FUNC
(
cl_int
,
clGetPlatformIDs
,
(
cl_uint
num_entries
,
cl_platform_id
*
platforms
,
cl_uint
*
num_platforms
),
(
num_entries
,
platforms
,
num_platforms
))
OCL_FUNC
(
cl_int
,
clGetPlatformInfo
,
(
cl_platform_id
platform
,
cl_platform_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
),
(
platform
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
))
OCL_FUNC
(
cl_int
,
clGetDeviceInfo
,
(
cl_device_id
device
,
cl_device_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
),
(
device
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
))
OCL_FUNC
(
cl_int
,
clGetDeviceIDs
,
(
cl_platform_id
platform
,
cl_device_type
device_type
,
cl_uint
num_entries
,
cl_device_id
*
devices
,
cl_uint
*
num_devices
),
(
platform
,
device_type
,
num_entries
,
devices
,
num_devices
))
OCL_FUNC_P
(
cl_context
,
clCreateContext
,
(
const
cl_context_properties
*
properties
,
cl_uint
num_devices
,
const
cl_device_id
*
devices
,
void
(
CL_CALLBACK
*
pfn_notify
)(
const
char
*
,
const
void
*
,
size_t
,
void
*
),
void
*
user_data
,
cl_int
*
errcode_ret
),
(
properties
,
num_devices
,
devices
,
pfn_notify
,
user_data
,
errcode_ret
))
OCL_FUNC
(
cl_int
,
clReleaseContext
,
(
cl_context
context
),
(
context
))
/*
OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context))
OCL_FUNC_P(cl_context, clCreateContextFromType,
(const cl_context_properties * properties,
cl_device_type device_type,
void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
void * user_data,
cl_int * errcode_ret),
(properties, device_type, pfn_notify, user_data, errcode_ret))
OCL_FUNC(cl_int, clGetContextInfo,
(cl_context context,
cl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret),
(context, param_name, param_value_size,
param_value, param_value_size_ret))
*/
OCL_FUNC_P
(
cl_command_queue
,
clCreateCommandQueue
,
(
cl_context
context
,
cl_device_id
device
,
cl_command_queue_properties
properties
,
cl_int
*
errcode_ret
),
(
context
,
device
,
properties
,
errcode_ret
))
OCL_FUNC
(
cl_int
,
clReleaseCommandQueue
,
(
cl_command_queue
command_queue
),
(
command_queue
))
OCL_FUNC_P
(
cl_mem
,
clCreateBuffer
,
(
cl_context
context
,
cl_mem_flags
flags
,
size_t
size
,
void
*
host_ptr
,
cl_int
*
errcode_ret
),
(
context
,
flags
,
size
,
host_ptr
,
errcode_ret
))
/*
OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue))
OCL_FUNC(cl_int, clGetCommandQueueInfo,
(cl_command_queue command_queue,
cl_command_queue_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret),
(command_queue, param_name, param_value_size, param_value, param_value_size_ret))
OCL_FUNC_P(cl_mem, clCreateSubBuffer,
(cl_mem buffer,
cl_mem_flags flags,
cl_buffer_create_type buffer_create_type,
const void * buffer_create_info,
cl_int * errcode_ret),
(buffer, flags, buffer_create_type, buffer_create_info, errcode_ret))
OCL_FUNC_P(cl_mem, clCreateImage,
(cl_context context,
cl_mem_flags flags,
const cl_image_format * image_format,
const cl_image_desc * image_desc,
void * host_ptr,
cl_int * errcode_ret),
(context, flags, image_format, image_desc, host_ptr, errcode_ret))
OCL_FUNC(cl_int, clGetSupportedImageFormats,
(cl_context context,
cl_mem_flags flags,
cl_mem_object_type image_type,
cl_uint num_entries,
cl_image_format * image_formats,
cl_uint * num_image_formats),
(context, flags, image_type, num_entries, image_formats, num_image_formats))
OCL_FUNC(cl_int, clGetMemObjectInfo,
(cl_mem memobj,
cl_mem_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret),
(memobj, param_name, param_value_size, param_value, param_value_size_ret))
OCL_FUNC(cl_int, clGetImageInfo,
(cl_mem image,
cl_image_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret),
(image, param_name, param_value_size, param_value, param_value_size_ret))
OCL_FUNC(cl_int, clCreateKernelsInProgram,
(cl_program program,
cl_uint num_kernels,
cl_kernel * kernels,
cl_uint * num_kernels_ret),
(program, num_kernels, kernels, num_kernels_ret))
OCL_FUNC(cl_int, clRetainKernel, (cl_kernel kernel), (kernel))
OCL_FUNC(cl_int, clGetKernelArgInfo,
(cl_kernel kernel,
cl_uint arg_indx,
cl_kernel_arg_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret),
(kernel, arg_indx, param_name, param_value_size, param_value, param_value_size_ret))
OCL_FUNC(cl_int, clEnqueueReadImage,
(cl_command_queue command_queue,
cl_mem image,
cl_bool blocking_read,
const size_t * origin[3],
const size_t * region[3],
size_t row_pitch,
size_t slice_pitch,
void * ptr,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event),
(command_queue, image, blocking_read, origin, region,
row_pitch, slice_pitch,
ptr,
num_events_in_wait_list,
event_wait_list,
event))
OCL_FUNC(cl_int, clEnqueueWriteImage,
(cl_command_queue command_queue,
cl_mem image,
cl_bool blocking_write,
const size_t * origin[3],
const size_t * region[3],
size_t input_row_pitch,
size_t input_slice_pitch,
const void * ptr,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event),
(command_queue, image, blocking_write, origin, region, input_row_pitch,
input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
OCL_FUNC(cl_int, clEnqueueFillImage,
(cl_command_queue command_queue,
cl_mem image,
const void * fill_color,
const size_t * origin[3],
const size_t * region[3],
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event),
(command_queue, image, fill_color, origin, region,
num_events_in_wait_list, event_wait_list, event))
OCL_FUNC(cl_int, clEnqueueCopyImage,
(cl_command_queue command_queue,
cl_mem src_image,
cl_mem dst_image,
const size_t * src_origin[3],
const size_t * dst_origin[3],
const size_t * region[3],
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event),
(command_queue, src_image, dst_image, src_origin, dst_origin,
region, num_events_in_wait_list, event_wait_list, event))
OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer,
(cl_command_queue command_queue,
cl_mem src_image,
cl_mem dst_buffer,
const size_t * src_origin[3],
const size_t * region[3],
size_t dst_offset,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event),
(command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
num_events_in_wait_list, event_wait_list, event))
OCL_FUNC(cl_int, clEnqueueCopyBufferToImage,
(cl_command_queue command_queue,
cl_mem src_buffer,
cl_mem dst_image,
size_t src_offset,
const size_t * dst_origin[3],
const size_t * region[3],
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event),
(command_queue, src_buffer, dst_image, src_offset, dst_origin,
region, num_events_in_wait_list, event_wait_list, event))
OCL_FUNC_P(void*, clEnqueueMapImage,
(cl_command_queue command_queue,
cl_mem image,
cl_bool blocking_map,
cl_map_flags map_flags,
const size_t * origin[3],
const size_t * region[3],
size_t * image_row_pitch,
size_t * image_slice_pitch,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event,
cl_int * errcode_ret),
(command_queue, image, blocking_map, map_flags, origin, region,
image_row_pitch, image_slice_pitch, num_events_in_wait_list,
event_wait_list, event, errcode_ret))
OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program))
OCL_FUNC(cl_int, clGetKernelInfo,
(cl_kernel kernel,
cl_kernel_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret),
(kernel, param_name, param_value_size, param_value, param_value_size_ret))
OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj))
*/
OCL_FUNC
(
cl_int
,
clReleaseMemObject
,
(
cl_mem
memobj
),
(
memobj
))
OCL_FUNC_P
(
cl_program
,
clCreateProgramWithSource
,
(
cl_context
context
,
cl_uint
count
,
const
char
**
strings
,
const
size_t
*
lengths
,
cl_int
*
errcode_ret
),
(
context
,
count
,
strings
,
lengths
,
errcode_ret
))
OCL_FUNC_P
(
cl_program
,
clCreateProgramWithBinary
,
(
cl_context
context
,
cl_uint
num_devices
,
const
cl_device_id
*
device_list
,
const
size_t
*
lengths
,
const
unsigned
char
**
binaries
,
cl_int
*
binary_status
,
cl_int
*
errcode_ret
),
(
context
,
num_devices
,
device_list
,
lengths
,
binaries
,
binary_status
,
errcode_ret
))
OCL_FUNC
(
cl_int
,
clReleaseProgram
,
(
cl_program
program
),
(
program
))
OCL_FUNC
(
cl_int
,
clBuildProgram
,
(
cl_program
program
,
cl_uint
num_devices
,
const
cl_device_id
*
device_list
,
const
char
*
options
,
void
(
CL_CALLBACK
*
pfn_notify
)(
cl_program
,
void
*
),
void
*
user_data
),
(
program
,
num_devices
,
device_list
,
options
,
pfn_notify
,
user_data
))
OCL_FUNC
(
cl_int
,
clGetProgramInfo
,
(
cl_program
program
,
cl_program_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
),
(
program
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
))
OCL_FUNC
(
cl_int
,
clGetProgramBuildInfo
,
(
cl_program
program
,
cl_device_id
device
,
cl_program_build_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
),
(
program
,
device
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
))
OCL_FUNC_P
(
cl_kernel
,
clCreateKernel
,
(
cl_program
program
,
const
char
*
kernel_name
,
cl_int
*
errcode_ret
),
(
program
,
kernel_name
,
errcode_ret
))
OCL_FUNC
(
cl_int
,
clReleaseKernel
,
(
cl_kernel
kernel
),
(
kernel
))
OCL_FUNC
(
cl_int
,
clSetKernelArg
,
(
cl_kernel
kernel
,
cl_uint
arg_index
,
size_t
arg_size
,
const
void
*
arg_value
),
(
kernel
,
arg_index
,
arg_size
,
arg_value
))
OCL_FUNC
(
cl_int
,
clGetKernelWorkGroupInfo
,
(
cl_kernel
kernel
,
cl_device_id
device
,
cl_kernel_work_group_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
),
(
kernel
,
device
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
))
OCL_FUNC
(
cl_int
,
clFinish
,
(
cl_command_queue
command_queue
),
(
command_queue
))
OCL_FUNC
(
cl_int
,
clEnqueueReadBuffer
,
(
cl_command_queue
command_queue
,
cl_mem
buffer
,
cl_bool
blocking_read
,
size_t
offset
,
size_t
size
,
void
*
ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
),
(
command_queue
,
buffer
,
blocking_read
,
offset
,
size
,
ptr
,
num_events_in_wait_list
,
event_wait_list
,
event
))
OCL_FUNC
(
cl_int
,
clEnqueueReadBufferRect
,
(
cl_command_queue
command_queue
,
cl_mem
buffer
,
cl_bool
blocking_read
,
const
size_t
*
buffer_offset
,
const
size_t
*
host_offset
,
const
size_t
*
region
,
size_t
buffer_row_pitch
,
size_t
buffer_slice_pitch
,
size_t
host_row_pitch
,
size_t
host_slice_pitch
,
void
*
ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
),
(
command_queue
,
buffer
,
blocking_read
,
buffer_offset
,
host_offset
,
region
,
buffer_row_pitch
,
buffer_slice_pitch
,
host_row_pitch
,
host_slice_pitch
,
ptr
,
num_events_in_wait_list
,
event_wait_list
,
event
))
OCL_FUNC
(
cl_int
,
clEnqueueWriteBuffer
,
(
cl_command_queue
command_queue
,
cl_mem
buffer
,
cl_bool
blocking_write
,
size_t
offset
,
size_t
size
,
const
void
*
ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
),
(
command_queue
,
buffer
,
blocking_write
,
offset
,
size
,
ptr
,
num_events_in_wait_list
,
event_wait_list
,
event
))
OCL_FUNC
(
cl_int
,
clEnqueueWriteBufferRect
,
(
cl_command_queue
command_queue
,
cl_mem
buffer
,
cl_bool
blocking_write
,
const
size_t
*
buffer_offset
,
const
size_t
*
host_offset
,
const
size_t
*
region
,
size_t
buffer_row_pitch
,
size_t
buffer_slice_pitch
,
size_t
host_row_pitch
,
size_t
host_slice_pitch
,
const
void
*
ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
),
(
command_queue
,
buffer
,
blocking_write
,
buffer_offset
,
host_offset
,
region
,
buffer_row_pitch
,
buffer_slice_pitch
,
host_row_pitch
,
host_slice_pitch
,
ptr
,
num_events_in_wait_list
,
event_wait_list
,
event
))
OCL_FUNC
(
cl_int
,
clEnqueueFillBuffer
,
(
cl_command_queue
command_queue
,
cl_mem
buffer
,
const
void
*
pattern
,
size_t
pattern_size
,
size_t
offset
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
),
(
command_queue
,
buffer
,
pattern
,
pattern_size
,
offset
,
size
,
num_events_in_wait_list
,
event_wait_list
,
event
))
OCL_FUNC
(
cl_int
,
clEnqueueCopyBuffer
,
(
cl_command_queue
command_queue
,
cl_mem
src_buffer
,
cl_mem
dst_buffer
,
size_t
src_offset
,
size_t
dst_offset
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
),
(
command_queue
,
src_buffer
,
dst_buffer
,
src_offset
,
dst_offset
,
size
,
num_events_in_wait_list
,
event_wait_list
,
event
))
OCL_FUNC
(
cl_int
,
clEnqueueCopyBufferRect
,
(
cl_command_queue
command_queue
,
cl_mem
src_buffer
,
cl_mem
dst_buffer
,
const
size_t
*
src_origin
,
const
size_t
*
dst_origin
,
const
size_t
*
region
,
size_t
src_row_pitch
,
size_t
src_slice_pitch
,
size_t
dst_row_pitch
,
size_t
dst_slice_pitch
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
),
(
command_queue
,
src_buffer
,
dst_buffer
,
src_origin
,
dst_origin
,
region
,
src_row_pitch
,
src_slice_pitch
,
dst_row_pitch
,
dst_slice_pitch
,
num_events_in_wait_list
,
event_wait_list
,
event
))
OCL_FUNC_P
(
void
*
,
clEnqueueMapBuffer
,
(
cl_command_queue
command_queue
,
cl_mem
buffer
,
cl_bool
blocking_map
,
cl_map_flags
map_flags
,
size_t
offset
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
,
cl_int
*
errcode_ret
),
(
command_queue
,
buffer
,
blocking_map
,
map_flags
,
offset
,
size
,
num_events_in_wait_list
,
event_wait_list
,
event
,
errcode_ret
))
OCL_FUNC
(
cl_int
,
clEnqueueUnmapMemObject
,
(
cl_command_queue
command_queue
,
cl_mem
memobj
,
void
*
mapped_ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
),
(
command_queue
,
memobj
,
mapped_ptr
,
num_events_in_wait_list
,
event_wait_list
,
event
))
OCL_FUNC
(
cl_int
,
clEnqueueNDRangeKernel
,
(
cl_command_queue
command_queue
,
cl_kernel
kernel
,
cl_uint
work_dim
,
const
size_t
*
global_work_offset
,
const
size_t
*
global_work_size
,
const
size_t
*
local_work_size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
),
(
command_queue
,
kernel
,
work_dim
,
global_work_offset
,
global_work_size
,
local_work_size
,
num_events_in_wait_list
,
event_wait_list
,
event
))
OCL_FUNC
(
cl_int
,
clEnqueueTask
,
(
cl_command_queue
command_queue
,
cl_kernel
kernel
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
),
(
command_queue
,
kernel
,
num_events_in_wait_list
,
event_wait_list
,
event
))
OCL_FUNC
(
cl_int
,
clSetEventCallback
,
(
cl_event
event
,
cl_int
command_exec_callback_type
,
void
(
CL_CALLBACK
*
pfn_event_notify
)
(
cl_event
event
,
cl_int
event_command_exec_status
,
void
*
user_data
),
void
*
user_data
),
(
event
,
command_exec_callback_type
,
pfn_event_notify
,
user_data
))
OCL_FUNC
(
cl_int
,
clReleaseEvent
,
(
cl_event
event
),
(
event
))
}
#endif
namespace
cv
{
namespace
ocl
{
// Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
static
uint64
crc64
(
const
uchar
*
data
,
size_t
size
,
uint64
crc0
=
0
)
{
static
uint64
table
[
256
];
static
bool
initialized
=
false
;
if
(
!
initialized
)
{
for
(
int
i
=
0
;
i
<
256
;
i
++
)
{
uint64
c
=
i
;
for
(
int
j
=
0
;
j
<
8
;
j
++
)
c
=
((
c
&
1
)
?
CV_BIG_UINT
(
0xc96c5795d7870f42
)
:
0
)
^
(
c
>>
1
);
table
[
i
]
=
c
;
}
initialized
=
true
;
}
uint64
crc
=
~
crc0
;
for
(
size_t
idx
=
0
;
idx
<
size
;
idx
++
)
crc
=
table
[(
uchar
)
crc
^
data
[
idx
]]
^
(
crc
>>
8
);
return
~
crc
;
}
struct
HashKey
{
typedef
uint64
part
;
HashKey
(
part
_a
,
part
_b
)
:
a
(
_a
),
b
(
_b
)
{}
part
a
,
b
;
};
inline
bool
operator
==
(
const
HashKey
&
h1
,
const
HashKey
&
h2
)
{
return
h1
.
a
==
h2
.
a
&&
h1
.
b
==
h2
.
b
;
}
inline
bool
operator
<
(
const
HashKey
&
h1
,
const
HashKey
&
h2
)
{
return
h1
.
a
<
h2
.
a
||
(
h1
.
a
==
h2
.
a
&&
h1
.
b
<
h2
.
b
);
}
bool
haveOpenCL
()
{
initOpenCLAndLoad
(
0
);
return
g_haveOpenCL
;
}
bool
useOpenCL
()
{
TLSData
*
data
=
TLSData
::
get
();
if
(
data
->
useOpenCL
<
0
)
data
->
useOpenCL
=
(
int
)
haveOpenCL
();
return
data
->
useOpenCL
>
0
;
}
void
finish
()
{
Queue
::
getDefault
().
finish
();
}
#define IMPLEMENT_REFCOUNTABLE() \
void addref() { CV_XADD(&refcount, 1); } \
void release() { if( CV_XADD(&refcount, -1) == 1 ) delete this; } \
int refcount
class
Platform
{
public
:
Platform
();
~
Platform
();
Platform
(
const
Platform
&
p
);
Platform
&
operator
=
(
const
Platform
&
p
);
void
*
ptr
()
const
;
static
Platform
&
getDefault
();
protected
:
struct
Impl
;
Impl
*
p
;
};
struct
Platform
::
Impl
{
Impl
()
{
refcount
=
1
;
handle
=
0
;
initialized
=
false
;
}
~
Impl
()
{}
void
init
()
{
if
(
!
initialized
)
{
//cl_uint num_entries
cl_uint
n
=
0
;
if
(
clGetPlatformIDs
(
1
,
&
handle
,
&
n
)
<
0
||
n
==
0
)
handle
=
0
;
if
(
handle
!=
0
)
{
char
buf
[
1000
];
size_t
len
=
0
;
clGetPlatformInfo
(
handle
,
CL_PLATFORM_VENDOR
,
sizeof
(
buf
),
buf
,
&
len
);
buf
[
len
]
=
'\0'
;
vendor
=
String
(
buf
);
}
initialized
=
true
;
}
}
IMPLEMENT_REFCOUNTABLE
();
cl_platform_id
handle
;
String
vendor
;
bool
initialized
;
};
Platform
::
Platform
()
{
p
=
0
;
}
Platform
::~
Platform
()
{
if
(
p
)
p
->
release
();
}
Platform
::
Platform
(
const
Platform
&
pl
)
{
p
=
(
Impl
*
)
pl
.
p
;
if
(
p
)
p
->
addref
();
}
Platform
&
Platform
::
operator
=
(
const
Platform
&
pl
)
{
Impl
*
newp
=
(
Impl
*
)
pl
.
p
;
if
(
newp
)
newp
->
addref
();
if
(
p
)
p
->
release
();
p
=
newp
;
return
*
this
;
}
void
*
Platform
::
ptr
()
const
{
return
p
?
p
->
handle
:
0
;
}
Platform
&
Platform
::
getDefault
()
{
static
Platform
p
;
if
(
!
p
.
p
)
{
p
.
p
=
new
Impl
;
p
.
p
->
init
();
}
return
p
;
}
///////////////////////////////////////////////////////////////////////////////////
struct
Device
::
Impl
{
Impl
(
void
*
d
)
{
handle
=
(
cl_device_id
)
d
;
}
template
<
typename
_TpCL
,
typename
_TpOut
>
_TpOut
getProp
(
cl_device_info
prop
)
const
{
_TpCL
temp
=
_TpCL
();
size_t
sz
=
0
;
return
clGetDeviceInfo
(
handle
,
prop
,
sizeof
(
temp
),
&
temp
,
&
sz
)
>=
0
&&
sz
==
sizeof
(
temp
)
?
_TpOut
(
temp
)
:
_TpOut
();
}
String
getStrProp
(
cl_device_info
prop
)
const
{
char
buf
[
1024
];
size_t
sz
=
0
;
return
clGetDeviceInfo
(
handle
,
prop
,
sizeof
(
buf
)
-
16
,
buf
,
&
sz
)
>=
0
&&
sz
<
sizeof
(
buf
)
?
String
(
buf
)
:
String
();
}
IMPLEMENT_REFCOUNTABLE
();
cl_device_id
handle
;
};
Device
::
Device
()
{
p
=
0
;
}
Device
::
Device
(
void
*
d
)
{
p
=
0
;
set
(
d
);
}
Device
::
Device
(
const
Device
&
d
)
{
p
=
d
.
p
;
if
(
p
)
p
->
addref
();
}
Device
&
Device
::
operator
=
(
const
Device
&
d
)
{
Impl
*
newp
=
(
Impl
*
)
d
.
p
;
if
(
newp
)
newp
->
addref
();
if
(
p
)
p
->
release
();
p
=
newp
;
return
*
this
;
}
Device
::~
Device
()
{
if
(
p
)
p
->
release
();
}
void
Device
::
set
(
void
*
d
)
{
if
(
p
)
p
->
release
();
p
=
new
Impl
(
d
);
}
void
*
Device
::
ptr
()
const
{
return
p
?
p
->
handle
:
0
;
}
String
Device
::
name
()
const
{
return
p
?
p
->
getStrProp
(
CL_DEVICE_NAME
)
:
String
();
}
String
Device
::
extensions
()
const
{
return
p
?
p
->
getStrProp
(
CL_DEVICE_EXTENSIONS
)
:
String
();
}
String
Device
::
vendor
()
const
{
return
p
?
p
->
getStrProp
(
CL_DEVICE_VENDOR
)
:
String
();
}
String
Device
::
OpenCL_C_Version
()
const
{
return
p
?
p
->
getStrProp
(
CL_DEVICE_OPENCL_C_VERSION
)
:
String
();
}
String
Device
::
OpenCLVersion
()
const
{
return
p
?
p
->
getStrProp
(
CL_DEVICE_EXTENSIONS
)
:
String
();
}
String
Device
::
driverVersion
()
const
{
return
p
?
p
->
getStrProp
(
CL_DEVICE_EXTENSIONS
)
:
String
();
}
int
Device
::
type
()
const
{
return
p
?
p
->
getProp
<
cl_device_type
,
int
>
(
CL_DEVICE_TYPE
)
:
0
;
}
int
Device
::
addressBits
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_ADDRESS_BITS
)
:
0
;
}
bool
Device
::
available
()
const
{
return
p
?
p
->
getProp
<
cl_bool
,
bool
>
(
CL_DEVICE_AVAILABLE
)
:
0
;
}
bool
Device
::
compilerAvailable
()
const
{
return
p
?
p
->
getProp
<
cl_bool
,
bool
>
(
CL_DEVICE_COMPILER_AVAILABLE
)
:
0
;
}
bool
Device
::
linkerAvailable
()
const
{
return
p
?
p
->
getProp
<
cl_bool
,
bool
>
(
CL_DEVICE_LINKER_AVAILABLE
)
:
0
;
}
int
Device
::
doubleFPConfig
()
const
{
return
p
?
p
->
getProp
<
cl_device_fp_config
,
int
>
(
CL_DEVICE_DOUBLE_FP_CONFIG
)
:
0
;
}
int
Device
::
singleFPConfig
()
const
{
return
p
?
p
->
getProp
<
cl_device_fp_config
,
int
>
(
CL_DEVICE_SINGLE_FP_CONFIG
)
:
0
;
}
int
Device
::
halfFPConfig
()
const
{
return
p
?
p
->
getProp
<
cl_device_fp_config
,
int
>
(
CL_DEVICE_HALF_FP_CONFIG
)
:
0
;
}
bool
Device
::
endianLittle
()
const
{
return
p
?
p
->
getProp
<
cl_bool
,
bool
>
(
CL_DEVICE_ENDIAN_LITTLE
)
:
0
;
}
bool
Device
::
errorCorrectionSupport
()
const
{
return
p
?
p
->
getProp
<
cl_bool
,
bool
>
(
CL_DEVICE_ERROR_CORRECTION_SUPPORT
)
:
0
;
}
int
Device
::
executionCapabilities
()
const
{
return
p
?
p
->
getProp
<
cl_device_exec_capabilities
,
int
>
(
CL_DEVICE_EXECUTION_CAPABILITIES
)
:
0
;
}
size_t
Device
::
globalMemCacheSize
()
const
{
return
p
?
p
->
getProp
<
cl_ulong
,
size_t
>
(
CL_DEVICE_GLOBAL_MEM_CACHE_SIZE
)
:
0
;
}
int
Device
::
globalMemCacheType
()
const
{
return
p
?
p
->
getProp
<
cl_device_mem_cache_type
,
int
>
(
CL_DEVICE_GLOBAL_MEM_CACHE_TYPE
)
:
0
;
}
int
Device
::
globalMemCacheLineSize
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
)
:
0
;
}
size_t
Device
::
globalMemSize
()
const
{
return
p
?
p
->
getProp
<
cl_ulong
,
size_t
>
(
CL_DEVICE_GLOBAL_MEM_SIZE
)
:
0
;
}
size_t
Device
::
localMemSize
()
const
{
return
p
?
p
->
getProp
<
cl_ulong
,
size_t
>
(
CL_DEVICE_LOCAL_MEM_SIZE
)
:
0
;
}
int
Device
::
localMemType
()
const
{
return
p
?
p
->
getProp
<
cl_device_local_mem_type
,
int
>
(
CL_DEVICE_LOCAL_MEM_TYPE
)
:
0
;
}
bool
Device
::
hostUnifiedMemory
()
const
{
return
p
?
p
->
getProp
<
cl_bool
,
bool
>
(
CL_DEVICE_HOST_UNIFIED_MEMORY
)
:
0
;
}
bool
Device
::
imageSupport
()
const
{
return
p
?
p
->
getProp
<
cl_bool
,
bool
>
(
CL_DEVICE_IMAGE_SUPPORT
)
:
0
;
}
size_t
Device
::
image2DMaxWidth
()
const
{
return
p
?
p
->
getProp
<
size_t
,
size_t
>
(
CL_DEVICE_IMAGE2D_MAX_WIDTH
)
:
0
;
}
size_t
Device
::
image2DMaxHeight
()
const
{
return
p
?
p
->
getProp
<
size_t
,
size_t
>
(
CL_DEVICE_IMAGE2D_MAX_HEIGHT
)
:
0
;
}
size_t
Device
::
image3DMaxWidth
()
const
{
return
p
?
p
->
getProp
<
size_t
,
size_t
>
(
CL_DEVICE_IMAGE3D_MAX_WIDTH
)
:
0
;
}
size_t
Device
::
image3DMaxHeight
()
const
{
return
p
?
p
->
getProp
<
size_t
,
size_t
>
(
CL_DEVICE_IMAGE3D_MAX_HEIGHT
)
:
0
;
}
size_t
Device
::
image3DMaxDepth
()
const
{
return
p
?
p
->
getProp
<
size_t
,
size_t
>
(
CL_DEVICE_IMAGE3D_MAX_DEPTH
)
:
0
;
}
size_t
Device
::
imageMaxBufferSize
()
const
{
return
p
?
p
->
getProp
<
size_t
,
size_t
>
(
CL_DEVICE_IMAGE_MAX_BUFFER_SIZE
)
:
0
;
}
size_t
Device
::
imageMaxArraySize
()
const
{
return
p
?
p
->
getProp
<
size_t
,
size_t
>
(
CL_DEVICE_IMAGE_MAX_ARRAY_SIZE
)
:
0
;
}
int
Device
::
maxClockFrequency
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_MAX_CLOCK_FREQUENCY
)
:
0
;
}
int
Device
::
maxComputeUnits
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_MAX_COMPUTE_UNITS
)
:
0
;
}
int
Device
::
maxConstantArgs
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_MAX_CONSTANT_ARGS
)
:
0
;
}
size_t
Device
::
maxConstantBufferSize
()
const
{
return
p
?
p
->
getProp
<
cl_ulong
,
size_t
>
(
CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
)
:
0
;
}
size_t
Device
::
maxMemAllocSize
()
const
{
return
p
?
p
->
getProp
<
cl_ulong
,
size_t
>
(
CL_DEVICE_MAX_MEM_ALLOC_SIZE
)
:
0
;
}
size_t
Device
::
maxParameterSize
()
const
{
return
p
?
p
->
getProp
<
cl_ulong
,
size_t
>
(
CL_DEVICE_MAX_PARAMETER_SIZE
)
:
0
;
}
int
Device
::
maxReadImageArgs
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_MAX_READ_IMAGE_ARGS
)
:
0
;
}
int
Device
::
maxWriteImageArgs
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_MAX_WRITE_IMAGE_ARGS
)
:
0
;
}
int
Device
::
maxSamplers
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_MAX_SAMPLERS
)
:
0
;
}
size_t
Device
::
maxWorkGroupSize
()
const
{
return
p
?
p
->
getProp
<
size_t
,
size_t
>
(
CL_DEVICE_MAX_WORK_GROUP_SIZE
)
:
0
;
}
int
Device
::
maxWorkItemDims
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
)
:
0
;
}
void
Device
::
maxWorkItemSizes
(
size_t
*
sizes
)
const
{
if
(
p
)
{
const
int
MAX_DIMS
=
32
;
size_t
retsz
=
0
;
clGetDeviceInfo
(
p
->
handle
,
CL_DEVICE_MAX_WORK_ITEM_SIZES
,
MAX_DIMS
*
sizeof
(
sizes
[
0
]),
&
sizes
[
0
],
&
retsz
);
}
}
int
Device
::
memBaseAddrAlign
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_MEM_BASE_ADDR_ALIGN
)
:
0
;
}
int
Device
::
nativeVectorWidthChar
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR
)
:
0
;
}
int
Device
::
nativeVectorWidthShort
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT
)
:
0
;
}
int
Device
::
nativeVectorWidthInt
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_NATIVE_VECTOR_WIDTH_INT
)
:
0
;
}
int
Device
::
nativeVectorWidthLong
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG
)
:
0
;
}
int
Device
::
nativeVectorWidthFloat
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT
)
:
0
;
}
int
Device
::
nativeVectorWidthDouble
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE
)
:
0
;
}
int
Device
::
nativeVectorWidthHalf
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF
)
:
0
;
}
int
Device
::
preferredVectorWidthChar
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR
)
:
0
;
}
int
Device
::
preferredVectorWidthShort
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT
)
:
0
;
}
int
Device
::
preferredVectorWidthInt
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT
)
:
0
;
}
int
Device
::
preferredVectorWidthLong
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG
)
:
0
;
}
int
Device
::
preferredVectorWidthFloat
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT
)
:
0
;
}
int
Device
::
preferredVectorWidthDouble
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE
)
:
0
;
}
int
Device
::
preferredVectorWidthHalf
()
const
{
return
p
?
p
->
getProp
<
cl_uint
,
int
>
(
CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF
)
:
0
;
}
size_t
Device
::
printfBufferSize
()
const
{
return
p
?
p
->
getProp
<
size_t
,
size_t
>
(
CL_DEVICE_PRINTF_BUFFER_SIZE
)
:
0
;
}
size_t
Device
::
profilingTimerResolution
()
const
{
return
p
?
p
->
getProp
<
size_t
,
size_t
>
(
CL_DEVICE_PROFILING_TIMER_RESOLUTION
)
:
0
;
}
const
Device
&
Device
::
getDefault
()
{
const
Context
&
ctx
=
Context
::
getDefault
();
int
idx
=
TLSData
::
get
()
->
device
;
return
ctx
.
device
(
idx
);
}
/////////////////////////////////////////////////////////////////////////////////////////
struct
Context
::
Impl
{
Impl
(
int
dtype0
)
{
refcount
=
1
;
handle
=
0
;
cl_int
retval
=
0
;
cl_platform_id
pl
=
(
cl_platform_id
)
Platform
::
getDefault
().
ptr
();
cl_context_properties
prop
[]
=
{
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
pl
,
0
};
cl_uint
i
,
nd0
=
0
,
nd
=
0
;
int
dtype
=
dtype0
&
15
;
clGetDeviceIDs
(
pl
,
dtype
,
0
,
0
,
&
nd0
);
if
(
retval
<
0
)
return
;
AutoBuffer
<
void
*>
dlistbuf
(
nd0
*
2
+
1
);
cl_device_id
*
dlist
=
(
cl_device_id
*
)(
void
**
)
dlistbuf
;
cl_device_id
*
dlist_new
=
dlist
+
nd0
;
clGetDeviceIDs
(
pl
,
dtype
,
nd0
,
dlist
,
&
nd0
);
String
name0
;
for
(
i
=
0
;
i
<
nd0
;
i
++
)
{
Device
d
(
dlist
[
i
]);
if
(
!
d
.
available
()
||
!
d
.
compilerAvailable
()
)
continue
;
if
(
dtype0
==
Device
::
TYPE_DGPU
&&
d
.
hostUnifiedMemory
()
)
continue
;
if
(
dtype0
==
Device
::
TYPE_IGPU
&&
!
d
.
hostUnifiedMemory
()
)
continue
;
String
name
=
d
.
name
();
if
(
nd
!=
0
&&
name
!=
name0
)
continue
;
name0
=
name
;
dlist_new
[
nd
++
]
=
dlist
[
i
];
}
if
(
nd
==
0
)
return
;
// !!! in the current implementation force the number of devices to 1 !!!
nd
=
1
;
handle
=
clCreateContext
(
prop
,
nd
,
dlist_new
,
0
,
0
,
&
retval
);
bool
ok
=
handle
!=
0
&&
retval
>=
0
;
if
(
ok
)
{
devices
.
resize
(
nd
);
for
(
i
=
0
;
i
<
nd
;
i
++
)
devices
[
i
].
set
(
dlist_new
[
i
]);
}
}
~
Impl
()
{
if
(
handle
)
clReleaseContext
(
handle
);
devices
.
clear
();
}
Program
getProg
(
const
ProgramSource
&
src
,
const
String
&
buildflags
,
String
&
errmsg
)
{
String
prefix
=
Program
::
getPrefix
(
buildflags
);
HashKey
k
(
src
.
hash
(),
crc64
((
const
uchar
*
)
prefix
.
c_str
(),
prefix
.
size
()));
phash_t
::
iterator
it
=
phash
.
find
(
k
);
if
(
it
!=
phash
.
end
()
)
return
it
->
second
;
//String filename = format("%08x%08x_%08x%08x.clb2",
Program
prog
(
src
,
buildflags
,
errmsg
);
phash
.
insert
(
std
::
pair
<
HashKey
,
Program
>
(
k
,
prog
));
return
prog
;
}
IMPLEMENT_REFCOUNTABLE
();
cl_context
handle
;
std
::
vector
<
Device
>
devices
;
int
dtype
;
bool
initialized
;
typedef
ProgramSource
::
hash_t
hash_t
;
struct
HashKey
{
HashKey
(
hash_t
_a
,
hash_t
_b
)
:
a
(
_a
),
b
(
_b
)
{}
bool
operator
<
(
const
HashKey
&
k
)
const
{
return
a
<
k
.
a
||
(
a
==
k
.
a
&&
b
<
k
.
b
);
}
bool
operator
==
(
const
HashKey
&
k
)
const
{
return
a
==
k
.
a
&&
b
==
k
.
b
;
}
bool
operator
!=
(
const
HashKey
&
k
)
const
{
return
a
!=
k
.
a
||
b
!=
k
.
b
;
}
hash_t
a
,
b
;
};
typedef
std
::
map
<
HashKey
,
Program
>
phash_t
;
phash_t
phash
;
};
Context
::
Context
()
{
p
=
0
;
}
Context
::
Context
(
int
dtype
)
{
p
=
0
;
create
(
dtype
);
}
int
Context
::
dtype
()
const
{
return
p
?
p
->
dtype
:
0
;
}
bool
Context
::
create
(
int
dtype0
)
{
if
(
!
haveOpenCL
()
)
return
false
;
if
(
p
)
p
->
release
();
p
=
new
Impl
(
dtype0
);
if
(
!
p
->
handle
)
{
delete
p
;
p
=
0
;
}
return
p
!=
0
;
}
Context
::~
Context
()
{
p
->
release
();
}
Context
::
Context
(
const
Context
&
c
)
{
p
=
(
Impl
*
)
c
.
p
;
if
(
p
)
p
->
addref
();
}
Context
&
Context
::
operator
=
(
const
Context
&
c
)
{
Impl
*
newp
=
(
Impl
*
)
c
.
p
;
if
(
newp
)
newp
->
addref
();
if
(
p
)
p
->
release
();
p
=
newp
;
return
*
this
;
}
void
*
Context
::
ptr
()
const
{
return
p
->
handle
;
}
size_t
Context
::
ndevices
()
const
{
return
p
?
p
->
devices
.
size
()
:
0
;
}
const
Device
&
Context
::
device
(
size_t
idx
)
const
{
static
Device
dummy
;
return
!
p
||
idx
>=
p
->
devices
.
size
()
?
dummy
:
p
->
devices
[
idx
];
}
Context
&
Context
::
getDefault
()
{
static
Context
ctx
;
if
(
!
ctx
.
p
->
handle
&&
haveOpenCL
()
)
{
// do not create new Context right away.
// First, try to retrieve existing context of the same type.
// In its turn, Platform::getContext() may call Context::create()
// if there is no such context.
ctx
.
create
(
Device
::
TYPE_ACCELERATOR
);
if
(
!
ctx
.
p
->
handle
)
ctx
.
create
(
Device
::
TYPE_DGPU
);
if
(
!
ctx
.
p
->
handle
)
ctx
.
create
(
Device
::
TYPE_IGPU
);
if
(
!
ctx
.
p
->
handle
)
ctx
.
create
(
Device
::
TYPE_CPU
);
}
return
ctx
;
}
Program
Context
::
getProg
(
const
ProgramSource
&
prog
,
const
String
&
buildopts
,
String
&
errmsg
)
{
return
p
?
p
->
getProg
(
prog
,
buildopts
,
errmsg
)
:
Program
();
}
struct
Queue
::
Impl
{
Impl
(
const
Context
&
c
,
const
Device
&
d
)
{
refcount
=
1
;
const
Context
*
pc
=
&
c
;
cl_context
ch
=
(
cl_context
)
pc
->
ptr
();
if
(
!
ch
)
{
pc
=
&
Context
::
getDefault
();
ch
=
(
cl_context
)
pc
->
ptr
();
}
cl_device_id
dh
=
(
cl_device_id
)
d
.
ptr
();
if
(
!
dh
)
dh
=
(
cl_device_id
)
pc
->
device
(
0
).
ptr
();
cl_int
retval
=
0
;
handle
=
clCreateCommandQueue
(
ch
,
dh
,
0
,
&
retval
);
}
~
Impl
()
{
if
(
handle
)
{
clFinish
(
handle
);
clReleaseCommandQueue
(
handle
);
}
}
IMPLEMENT_REFCOUNTABLE
();
cl_command_queue
handle
;
bool
initialized
;
};
Queue
::
Queue
()
{
p
=
0
;
}
Queue
::
Queue
(
const
Context
&
c
,
const
Device
&
d
)
{
p
=
0
;
create
(
c
,
d
);
}
Queue
::
Queue
(
const
Queue
&
q
)
{
p
=
q
.
p
;
if
(
p
)
p
->
addref
();
}
Queue
&
Queue
::
operator
=
(
const
Queue
&
q
)
{
Impl
*
newp
=
(
Impl
*
)
q
.
p
;
if
(
newp
)
newp
->
addref
();
if
(
p
)
p
->
release
();
p
=
newp
;
return
*
this
;
}
Queue
::~
Queue
()
{
if
(
p
)
p
->
release
();
}
bool
Queue
::
create
(
const
Context
&
c
,
const
Device
&
d
)
{
if
(
p
)
p
->
release
();
p
=
new
Impl
(
c
,
d
);
return
p
->
handle
!=
0
;
}
void
Queue
::
finish
()
{
if
(
p
&&
p
->
handle
)
clFinish
(
p
->
handle
);
}
void
*
Queue
::
ptr
()
const
{
return
p
?
p
->
handle
:
0
;
}
Queue
&
Queue
::
getDefault
()
{
Queue
&
q
=
TLSData
::
get
()
->
oclQueue
;
if
(
!
q
.
p
->
handle
)
q
.
create
(
Context
::
getDefault
());
return
q
;
}
static
cl_command_queue
getQueue
(
const
Queue
&
q
)
{
cl_command_queue
qq
=
(
cl_command_queue
)
q
.
ptr
();
if
(
!
qq
)
qq
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
return
qq
;
}
KernelArg
::
KernelArg
(
int
_flags
,
UMat
*
_m
,
void
*
_obj
,
size_t
_sz
)
:
flags
(
_flags
),
m
(
_m
),
obj
(
_obj
),
sz
(
_sz
)
{
}
KernelArg
KernelArg
::
Constant
(
const
Mat
&
m
)
{
CV_Assert
(
m
.
isContinuous
());
return
KernelArg
(
CONSTANT
,
0
,
m
.
data
,
m
.
total
()
*
m
.
elemSize
());
}
struct
Kernel
::
Impl
{
Impl
(
const
char
*
kname
,
const
Program
&
prog
)
{
e
=
0
;
refcount
=
1
;
cl_program
ph
=
(
cl_program
)
prog
.
ptr
();
cl_int
retval
=
0
;
handle
=
ph
!=
0
?
clCreateKernel
(
ph
,
kname
,
&
retval
)
:
0
;
}
void
finit
()
{
if
(
!
f
.
empty
())
f
->
operator
()();
if
(
e
)
{
clReleaseEvent
(
e
);
e
=
0
;
}
release
();
}
~
Impl
()
{
if
(
handle
)
clReleaseKernel
(
handle
);
}
IMPLEMENT_REFCOUNTABLE
();
cl_kernel
handle
;
cl_event
e
;
Ptr
<
Kernel
::
Callback
>
f
;
};
}}
extern
"C"
{
static
void
CL_CALLBACK
oclCleanupCallback
(
cl_event
,
cl_int
,
void
*
p
)
{
((
cv
::
ocl
::
Kernel
::
Impl
*
)
p
)
->
finit
();
}
}
namespace
cv
{
namespace
ocl
{
Kernel
::
Kernel
()
{
p
=
0
;
}
Kernel
::
Kernel
(
const
char
*
kname
,
const
Program
&
prog
)
{
p
=
0
;
create
(
kname
,
prog
);
}
Kernel
::
Kernel
(
const
char
*
kname
,
const
ProgramSource
&
src
,
const
String
&
buildopts
,
String
&
errmsg
)
{
p
=
0
;
create
(
kname
,
src
,
buildopts
,
errmsg
);
}
Kernel
::
Kernel
(
const
Kernel
&
k
)
{
p
=
k
.
p
;
if
(
p
)
p
->
addref
();
}
Kernel
&
Kernel
::
operator
=
(
const
Kernel
&
k
)
{
Impl
*
newp
=
(
Impl
*
)
k
.
p
;
if
(
newp
)
newp
->
addref
();
if
(
p
)
p
->
release
();
p
=
newp
;
return
*
this
;
}
Kernel
::~
Kernel
()
{
if
(
p
)
p
->
release
();
}
bool
Kernel
::
create
(
const
char
*
kname
,
const
Program
&
prog
)
{
if
(
p
)
p
->
release
();
p
=
new
Impl
(
kname
,
prog
);
if
(
p
->
handle
==
0
)
{
p
->
release
();
p
=
0
;
}
return
p
!=
0
;
}
bool
Kernel
::
create
(
const
char
*
kname
,
const
ProgramSource
&
src
,
const
String
&
buildopts
,
String
&
errmsg
)
{
if
(
p
)
{
p
->
release
();
p
=
0
;
}
const
Program
&
prog
=
Context
::
getDefault
().
getProg
(
src
,
buildopts
,
errmsg
);
return
create
(
kname
,
prog
);
}
void
*
Kernel
::
ptr
()
const
{
return
p
?
p
->
handle
:
0
;
}
int
Kernel
::
set
(
int
i
,
const
void
*
value
,
size_t
sz
)
{
CV_Assert
(
p
&&
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)
i
,
sz
,
value
)
>=
0
);
return
i
+
1
;
}
int
Kernel
::
set
(
int
i
,
const
UMat
&
m
)
{
return
set
(
i
,
KernelArg
(
KernelArg
::
READ_WRITE
,
(
UMat
*
)
&
m
,
0
,
0
));
}
int
Kernel
::
set
(
int
i
,
const
KernelArg
&
arg
)
{
CV_Assert
(
p
&&
p
->
handle
);
if
(
arg
.
m
)
{
int
dims
=
arg
.
m
->
dims
;
void
*
h
=
arg
.
m
->
handle
(((
arg
.
flags
&
KernelArg
::
READ_ONLY
)
?
ACCESS_READ
:
0
)
+
((
arg
.
flags
&
KernelArg
::
WRITE_ONLY
)
?
ACCESS_WRITE
:
0
));
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)
i
,
sizeof
(
cl_mem
),
&
h
);
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
1
),
sizeof
(
size_t
),
&
arg
.
m
->
offset
);
if
(
dims
<=
2
)
{
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
2
),
sizeof
(
size_t
),
&
arg
.
m
->
step
.
p
[
0
]);
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
3
),
sizeof
(
arg
.
m
->
rows
),
&
arg
.
m
->
rows
);
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
4
),
sizeof
(
arg
.
m
->
cols
),
&
arg
.
m
->
cols
);
return
i
+
5
;
}
else
{
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
1
),
sizeof
(
size_t
),
&
arg
.
m
->
offset
);
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
1
),
sizeof
(
size_t
)
*
(
dims
-
1
),
&
arg
.
m
->
step
.
p
[
0
]);
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
2
),
sizeof
(
cl_int
)
*
dims
,
&
arg
.
m
->
size
.
p
[
0
]);
return
i
+
4
;
}
}
else
{
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)
i
,
arg
.
sz
,
arg
.
obj
);
return
i
+
1
;
}
}
void
Kernel
::
run
(
int
dims
,
size_t
offset
[],
size_t
globalsize
[],
size_t
localsize
[],
bool
sync
,
const
Ptr
<
Callback
>&
cleanupCallback
,
const
Queue
&
q
)
{
CV_Assert
(
p
&&
p
->
handle
&&
p
->
e
==
0
);
cl_command_queue
qq
=
getQueue
(
q
);
clEnqueueNDRangeKernel
(
qq
,
p
->
handle
,
(
cl_uint
)
dims
,
offset
,
globalsize
,
localsize
,
0
,
0
,
sync
?
0
:
&
p
->
e
);
if
(
sync
)
{
clFinish
(
qq
);
if
(
!
cleanupCallback
.
empty
()
)
cleanupCallback
->
operator
()();
}
else
{
p
->
f
=
cleanupCallback
;
p
->
addref
();
clSetEventCallback
(
p
->
e
,
CL_COMPLETE
,
oclCleanupCallback
,
p
);
}
}
void
Kernel
::
runTask
(
bool
sync
,
const
Ptr
<
Callback
>&
cleanupCallback
,
const
Queue
&
q
)
{
CV_Assert
(
p
&&
p
->
handle
&&
p
->
e
==
0
);
cl_command_queue
qq
=
getQueue
(
q
);
clEnqueueTask
(
qq
,
p
->
handle
,
0
,
0
,
sync
?
0
:
&
p
->
e
);
if
(
sync
)
{
clFinish
(
qq
);
if
(
!
cleanupCallback
.
empty
()
)
cleanupCallback
->
operator
()();
}
else
{
p
->
f
=
cleanupCallback
;
p
->
addref
();
clSetEventCallback
(
p
->
e
,
CL_COMPLETE
,
oclCleanupCallback
,
p
);
}
}
size_t
Kernel
::
workGroupSize
()
const
{
if
(
!
p
)
return
0
;
size_t
val
=
0
,
retsz
=
0
;
cl_device_id
dev
=
(
cl_device_id
)
Device
::
getDefault
().
ptr
();
return
clGetKernelWorkGroupInfo
(
p
->
handle
,
dev
,
CL_KERNEL_WORK_GROUP_SIZE
,
sizeof
(
val
),
&
val
,
&
retsz
)
>=
0
?
val
:
0
;
}
bool
Kernel
::
compileWorkGroupSize
(
size_t
wsz
[])
const
{
if
(
!
p
||
!
wsz
)
return
0
;
size_t
retsz
=
0
;
cl_device_id
dev
=
(
cl_device_id
)
Device
::
getDefault
().
ptr
();
return
clGetKernelWorkGroupInfo
(
p
->
handle
,
dev
,
CL_KERNEL_COMPILE_WORK_GROUP_SIZE
,
sizeof
(
wsz
[
0
]
*
3
),
wsz
,
&
retsz
)
>=
0
;
}
size_t
Kernel
::
localMemSize
()
const
{
if
(
!
p
)
return
0
;
size_t
retsz
=
0
;
cl_ulong
val
=
0
;
cl_device_id
dev
=
(
cl_device_id
)
Device
::
getDefault
().
ptr
();
return
clGetKernelWorkGroupInfo
(
p
->
handle
,
dev
,
CL_KERNEL_LOCAL_MEM_SIZE
,
sizeof
(
val
),
&
val
,
&
retsz
)
>=
0
?
(
size_t
)
val
:
0
;
}
////////////////////////////////////////////////////////////////////////////////////////
struct
Program
::
Impl
{
Impl
(
const
ProgramSource
&
_src
,
const
String
&
_buildflags
,
String
&
errmsg
)
{
refcount
=
1
;
const
Context
&
ctx
=
Context
::
getDefault
();
src
=
_src
;
buildflags
=
_buildflags
;
const
String
&
srcstr
=
src
.
source
();
const
char
*
srcptr
=
srcstr
.
c_str
();
size_t
srclen
=
srcstr
.
size
();
cl_int
retval
=
0
;
handle
=
clCreateProgramWithSource
((
cl_context
)
ctx
.
ptr
(),
1
,
&
srcptr
,
&
srclen
,
&
retval
);
if
(
handle
&&
retval
>=
0
)
{
int
i
,
n
=
ctx
.
ndevices
();
AutoBuffer
<
void
*>
deviceListBuf
(
n
+
1
);
void
**
deviceList
=
deviceListBuf
;
for
(
i
=
0
;
i
<
n
;
i
++
)
deviceList
[
i
]
=
ctx
.
device
(
i
).
ptr
();
retval
=
clBuildProgram
(
handle
,
n
,
(
const
cl_device_id
*
)
deviceList
,
buildflags
.
c_str
(),
0
,
0
);
if
(
retval
==
CL_BUILD_PROGRAM_FAILURE
)
{
char
buf
[
1024
];
size_t
retsz
=
0
;
clGetProgramBuildInfo
(
handle
,
(
cl_device_id
)
deviceList
[
0
],
CL_PROGRAM_BUILD_LOG
,
sizeof
(
buf
)
-
16
,
buf
,
&
retsz
);
errmsg
=
String
(
buf
);
}
}
}
Impl
(
const
String
&
_buf
,
const
String
&
_buildflags
)
{
refcount
=
1
;
handle
=
0
;
buildflags
=
_buildflags
;
if
(
_buf
.
empty
())
return
;
String
prefix0
=
Program
::
getPrefix
(
buildflags
);
const
Context
&
ctx
=
Context
::
getDefault
();
const
Device
&
dev
=
Device
::
getDefault
();
const
char
*
pos0
=
_buf
.
c_str
();
char
*
pos1
=
strchr
(
pos0
,
'\n'
);
if
(
!
pos1
)
return
;
char
*
pos2
=
strchr
(
pos1
+
1
,
'\n'
);
if
(
!
pos2
)
return
;
char
*
pos3
=
strchr
(
pos2
+
1
,
'\n'
);
if
(
!
pos3
)
return
;
size_t
prefixlen
=
(
pos3
-
pos0
)
+
1
;
String
prefix
(
pos0
,
prefixlen
);
if
(
prefix
!=
prefix0
)
return
;
const
uchar
*
bin
=
(
uchar
*
)(
pos3
+
1
);
void
*
devid
=
dev
.
ptr
();
size_t
codelen
=
_buf
.
length
()
-
prefixlen
;
cl_int
binstatus
=
0
,
retval
=
0
;
handle
=
clCreateProgramWithBinary
((
cl_context
)
ctx
.
ptr
(),
1
,
(
cl_device_id
*
)
&
devid
,
&
codelen
,
&
bin
,
&
binstatus
,
&
retval
);
}
String
store
()
{
if
(
!
handle
)
return
String
();
size_t
progsz
=
0
,
retsz
=
0
;
String
prefix
=
Program
::
getPrefix
(
buildflags
);
size_t
prefixlen
=
prefix
.
length
();
if
(
clGetProgramInfo
(
handle
,
CL_PROGRAM_BINARY_SIZES
,
sizeof
(
progsz
),
&
progsz
,
&
retsz
)
<
0
)
return
String
();
AutoBuffer
<
uchar
>
bufbuf
(
prefixlen
+
progsz
+
16
);
uchar
*
buf
=
bufbuf
;
memcpy
(
buf
,
prefix
.
c_str
(),
prefixlen
);
buf
+=
prefixlen
;
if
(
clGetProgramInfo
(
handle
,
CL_PROGRAM_BINARIES
,
sizeof
(
buf
),
&
buf
,
&
retsz
)
<
0
)
return
String
();
buf
[
progsz
]
=
(
uchar
)
'\0'
;
return
String
((
const
char
*
)(
uchar
*
)
bufbuf
,
prefixlen
+
progsz
);
}
~
Impl
()
{
if
(
handle
)
clReleaseProgram
(
handle
);
}
IMPLEMENT_REFCOUNTABLE
();
ProgramSource
src
;
String
buildflags
;
cl_program
handle
;
};
Program
::
Program
()
{
p
=
0
;
}
Program
::
Program
(
const
ProgramSource
&
src
,
const
String
&
buildflags
,
String
&
errmsg
)
{
p
=
0
;
create
(
src
,
buildflags
,
errmsg
);
}
Program
::
Program
(
const
Program
&
prog
)
{
p
=
prog
.
p
;
if
(
p
)
p
->
addref
();
}
Program
&
Program
::
operator
=
(
const
Program
&
prog
)
{
Impl
*
newp
=
(
Impl
*
)
prog
.
p
;
if
(
newp
)
newp
->
addref
();
if
(
p
)
p
->
release
();
p
=
newp
;
return
*
this
;
}
Program
::~
Program
()
{
if
(
p
)
p
->
release
();
}
bool
Program
::
create
(
const
ProgramSource
&
src
,
const
String
&
buildflags
,
String
&
errmsg
)
{
if
(
p
)
p
->
release
();
p
=
new
Impl
(
src
,
buildflags
,
errmsg
);
if
(
!
p
->
handle
)
{
p
->
release
();
p
=
0
;
}
return
p
!=
0
;
}
const
ProgramSource
&
Program
::
source
()
const
{
static
ProgramSource
dummy
;
return
p
?
p
->
src
:
dummy
;
}
void
*
Program
::
ptr
()
const
{
return
p
?
p
->
handle
:
0
;
}
bool
Program
::
read
(
const
String
&
bin
,
const
String
&
buildflags
)
{
if
(
p
)
p
->
release
();
p
=
new
Impl
(
bin
,
buildflags
);
return
p
->
handle
!=
0
;
}
bool
Program
::
write
(
String
&
bin
)
const
{
if
(
!
p
)
return
false
;
bin
=
p
->
store
();
return
!
bin
.
empty
();
}
String
Program
::
getPrefix
()
const
{
if
(
!
p
)
return
String
();
return
getPrefix
(
p
->
buildflags
);
}
String
Program
::
getPrefix
(
const
String
&
buildflags
)
{
const
Context
&
ctx
=
Context
::
getDefault
();
const
Device
&
dev
=
ctx
.
device
(
0
);
return
format
(
"name=%s
\n
driver=%s
\n
buildflags=%s
\n
"
,
dev
.
name
().
c_str
(),
dev
.
driverVersion
().
c_str
(),
buildflags
.
c_str
());
}
////////////////////////////////////////////////////////////////////////////////////////
struct
ProgramSource
::
Impl
{
Impl
(
const
char
*
_src
)
{
init
(
String
(
_src
));
}
Impl
(
const
String
&
_src
)
{
init
(
_src
);
}
void
init
(
const
String
&
_src
)
{
refcount
=
1
;
src
=
_src
;
h
=
crc64
((
uchar
*
)
src
.
c_str
(),
src
.
size
());
}
IMPLEMENT_REFCOUNTABLE
();
String
src
;
ProgramSource
::
hash_t
h
;
};
ProgramSource
::
ProgramSource
()
{
p
=
0
;
}
ProgramSource
::
ProgramSource
(
const
char
*
prog
)
{
p
=
new
Impl
(
prog
);
}
ProgramSource
::
ProgramSource
(
const
String
&
prog
)
{
p
=
new
Impl
(
prog
);
}
ProgramSource
::~
ProgramSource
()
{
if
(
p
)
p
->
release
();
}
ProgramSource
::
ProgramSource
(
const
ProgramSource
&
prog
)
{
p
=
prog
.
p
;
if
(
p
)
p
->
addref
();
}
ProgramSource
&
ProgramSource
::
operator
=
(
const
ProgramSource
&
prog
)
{
Impl
*
newp
=
(
Impl
*
)
prog
.
p
;
if
(
newp
)
newp
->
addref
();
if
(
p
)
p
->
release
();
p
=
newp
;
return
*
this
;
}
const
String
&
ProgramSource
::
source
()
const
{
static
String
dummy
;
return
p
?
p
->
src
:
dummy
;
}
ProgramSource
::
hash_t
ProgramSource
::
hash
()
const
{
return
p
?
p
->
h
:
0
;
}
//////////////////////////////////////////////////////////////////////////////////////////////
class
OpenCLAllocator
:
public
MatAllocator
{
public
:
OpenCLAllocator
()
{}
UMatData
*
defaultAllocate
(
int
dims
,
const
int
*
sizes
,
int
type
,
size_t
*
step
)
const
{
UMatData
*
u
=
Mat
::
getStdAllocator
()
->
allocate
(
dims
,
sizes
,
type
,
step
);
u
->
urefcount
=
1
;
u
->
refcount
=
0
;
return
u
;
}
void
getBestFlags
(
const
Context
&
ctx
,
int
&
createFlags
,
int
&
flags0
)
const
{
const
Device
&
dev
=
ctx
.
device
(
0
);
createFlags
=
CL_MEM_READ_WRITE
;
if
(
dev
.
hostUnifiedMemory
()
)
flags0
=
0
;
else
flags0
=
UMatData
::
COPY_ON_MAP
;
}
UMatData
*
allocate
(
int
dims
,
const
int
*
sizes
,
int
type
,
size_t
*
step
)
const
{
if
(
!
useOpenCL
())
return
defaultAllocate
(
dims
,
sizes
,
type
,
step
);
size_t
total
=
CV_ELEM_SIZE
(
type
);
for
(
int
i
=
dims
-
1
;
i
>=
0
;
i
--
)
{
if
(
step
)
step
[
i
]
=
total
;
total
*=
sizes
[
i
];
}
Context
&
ctx
=
Context
::
getDefault
();
int
createFlags
=
0
,
flags0
=
0
;
getBestFlags
(
ctx
,
createFlags
,
flags0
);
cl_int
retval
=
0
;
void
*
handle
=
clCreateBuffer
((
cl_context
)
ctx
.
ptr
(),
createFlags
,
total
,
0
,
&
retval
);
if
(
!
handle
||
retval
<
0
)
return
defaultAllocate
(
dims
,
sizes
,
type
,
step
);
UMatData
*
u
=
new
UMatData
(
this
);
u
->
data
=
0
;
u
->
size
=
total
;
u
->
handle
=
handle
;
u
->
urefcount
=
1
;
u
->
flags
=
flags0
;
return
u
;
}
bool
allocate
(
UMatData
*
u
,
int
accessFlags
)
const
{
if
(
!
u
)
return
false
;
UMatDataAutoLock
lock
(
u
);
if
(
u
->
handle
==
0
)
{
CV_Assert
(
u
->
origdata
!=
0
);
Context
&
ctx
=
Context
::
getDefault
();
int
createFlags
=
0
,
flags0
=
0
;
getBestFlags
(
ctx
,
createFlags
,
flags0
);
cl_context
ctx_handle
=
(
cl_context
)
ctx
.
ptr
();
cl_int
retval
=
0
;
int
tempUMatFlags
=
UMatData
::
TEMP_UMAT
;
u
->
handle
=
clCreateBuffer
(
ctx_handle
,
CL_MEM_USE_HOST_PTR
|
createFlags
,
u
->
size
,
u
->
origdata
,
&
retval
);
if
((
!
u
->
handle
||
retval
<
0
)
&&
!
(
accessFlags
&
ACCESS_FAST
))
{
u
->
handle
=
clCreateBuffer
(
ctx_handle
,
CL_MEM_COPY_HOST_PTR
|
createFlags
,
u
->
size
,
u
->
origdata
,
&
retval
);
tempUMatFlags
=
UMatData
::
TEMP_COPIED_UMAT
;
}
if
(
!
u
->
handle
||
retval
<
0
)
return
false
;
u
->
prevAllocator
=
u
->
currAllocator
;
u
->
currAllocator
=
this
;
u
->
flags
|=
tempUMatFlags
;
}
if
(
accessFlags
&
ACCESS_WRITE
)
u
->
markHostCopyObsolete
(
true
);
CV_XADD
(
&
u
->
urefcount
,
1
);
return
true
;
}
void
deallocate
(
UMatData
*
u
)
const
{
if
(
!
u
)
return
;
// TODO: !!! when we add Shared Virtual Memory Support,
// this function (as well as the others should be corrected)
CV_Assert
(
u
->
handle
!=
0
&&
u
->
urefcount
==
0
);
if
(
u
->
tempUMat
())
{
if
(
u
->
hostCopyObsolete
()
&&
u
->
refcount
>
0
&&
u
->
tempCopiedUMat
()
)
{
clEnqueueWriteBuffer
((
cl_command_queue
)
Queue
::
getDefault
().
ptr
(),
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
u
->
origdata
,
0
,
0
,
0
);
}
u
->
markHostCopyObsolete
(
false
);
clReleaseMemObject
((
cl_mem
)
u
->
handle
);
u
->
currAllocator
=
u
->
prevAllocator
;
if
(
u
->
refcount
==
0
)
u
->
currAllocator
->
deallocate
(
u
);
}
else
{
if
(
u
->
data
&&
u
->
copyOnMap
())
fastFree
(
u
->
data
);
clReleaseMemObject
((
cl_mem
)
u
->
handle
);
delete
u
;
}
}
void
map
(
UMatData
*
u
,
int
accessFlags
)
const
{
if
(
!
u
)
return
;
CV_Assert
(
u
->
handle
!=
0
);
UMatDataAutoLock
autolock
(
u
);
if
(
accessFlags
&
ACCESS_WRITE
)
u
->
markDeviceCopyObsolete
(
true
);
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
if
(
u
->
refcount
==
0
)
{
if
(
!
u
->
copyOnMap
()
)
{
CV_Assert
(
u
->
data
==
0
);
// because there can be other map requests for the same UMat with different access flags,
// we use the universal (read-write) access mode.
cl_int
retval
=
0
;
u
->
data
=
(
uchar
*
)
clEnqueueMapBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
(
CL_MAP_READ
|
CL_MAP_WRITE
),
0
,
u
->
size
,
0
,
0
,
0
,
&
retval
);
if
(
u
->
data
&&
retval
>=
0
)
{
u
->
markHostCopyObsolete
(
false
);
return
;
}
// if map failed, switch to copy-on-map mode for the particular buffer
u
->
flags
|=
UMatData
::
COPY_ON_MAP
;
}
if
(
!
u
->
data
)
{
u
->
data
=
(
uchar
*
)
fastMalloc
(
u
->
size
);
u
->
markHostCopyObsolete
(
true
);
}
}
if
(
(
accessFlags
&
ACCESS_READ
)
!=
0
&&
u
->
hostCopyObsolete
()
)
{
CV_Assert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
u
->
data
,
0
,
0
,
0
)
>=
0
);
u
->
markHostCopyObsolete
(
false
);
}
}
void
unmap
(
UMatData
*
u
)
const
{
if
(
!
u
)
return
;
CV_Assert
(
u
->
handle
!=
0
);
UMatDataAutoLock
autolock
(
u
);
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
if
(
!
u
->
copyOnMap
()
&&
u
->
data
)
{
CV_Assert
(
clEnqueueUnmapMemObject
(
q
,
(
cl_mem
)
u
->
handle
,
u
->
data
,
0
,
0
,
0
)
>=
0
);
u
->
data
=
0
;
}
else
if
(
u
->
copyOnMap
()
&&
u
->
deviceCopyObsolete
()
)
{
CV_Assert
(
clEnqueueWriteBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
u
->
data
,
0
,
0
,
0
)
>=
0
);
}
u
->
markDeviceCopyObsolete
(
false
);
u
->
markHostCopyObsolete
(
false
);
}
bool
checkContinuous
(
int
dims
,
const
size_t
sz
[],
const
size_t
srcofs
[],
const
size_t
srcstep
[],
const
size_t
dstofs
[],
const
size_t
dststep
[],
size_t
&
total
,
size_t
new_sz
[],
size_t
&
srcrawofs
,
size_t
new_srcofs
[],
size_t
new_srcstep
[],
size_t
&
dstrawofs
,
size_t
new_dstofs
[],
size_t
new_dststep
[])
const
{
bool
iscontinuous
=
true
;
srcrawofs
=
srcofs
?
srcofs
[
dims
-
1
]
:
0
;
dstrawofs
=
dstofs
?
dstofs
[
dims
-
1
]
:
0
;
total
=
sz
[
dims
-
1
];
for
(
int
i
=
dims
-
2
;
i
>=
0
;
i
--
)
{
if
(
i
>
0
&&
(
total
!=
srcstep
[
i
]
||
total
!=
dststep
[
i
])
)
iscontinuous
=
false
;
total
*=
sz
[
i
];
if
(
srcofs
)
srcrawofs
+=
srcofs
[
i
]
*
srcstep
[
i
];
if
(
dstofs
)
dstrawofs
+=
dstofs
[
i
]
*
dststep
[
i
];
}
if
(
!
iscontinuous
)
{
// OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
if
(
dims
==
2
)
{
new_sz
[
0
]
=
sz
[
1
];
new_sz
[
1
]
=
sz
[
0
];
new_sz
[
2
]
=
1
;
// we assume that new_... arrays are initialized by caller
// with 0's, so there is no else branch
if
(
srcofs
)
{
new_srcofs
[
0
]
=
srcofs
[
1
];
new_srcofs
[
1
]
=
srcofs
[
0
];
new_srcofs
[
2
]
=
0
;
}
if
(
dstofs
)
{
new_dstofs
[
0
]
=
dstofs
[
1
];
new_dstofs
[
1
]
=
dstofs
[
0
];
new_dstofs
[
2
]
=
0
;
}
new_srcstep
[
0
]
=
srcstep
[
0
];
new_srcstep
[
1
]
=
0
;
new_dststep
[
0
]
=
dststep
[
0
];
new_dststep
[
1
]
=
0
;
}
else
{
// we could check for dims == 3 here,
// but from user perspective this one is more informative
CV_Assert
(
dims
<=
3
);
new_sz
[
0
]
=
sz
[
2
];
new_sz
[
1
]
=
sz
[
1
];
new_sz
[
2
]
=
sz
[
0
];
if
(
srcofs
)
{
new_srcofs
[
0
]
=
srcofs
[
2
];
new_srcofs
[
1
]
=
srcofs
[
1
];
new_srcofs
[
2
]
=
srcofs
[
0
];
}
if
(
dstofs
)
{
new_dstofs
[
0
]
=
dstofs
[
2
];
new_dstofs
[
1
]
=
dstofs
[
1
];
new_dstofs
[
2
]
=
dstofs
[
0
];
}
new_srcstep
[
0
]
=
srcstep
[
1
];
new_srcstep
[
1
]
=
srcstep
[
0
];
new_dststep
[
0
]
=
dststep
[
1
];
new_dststep
[
1
]
=
dststep
[
0
];
}
}
return
iscontinuous
;
}
void
download
(
UMatData
*
u
,
void
*
dstptr
,
int
dims
,
const
size_t
sz
[],
const
size_t
srcofs
[],
const
size_t
srcstep
[],
const
size_t
dststep
[])
const
{
if
(
!
u
)
return
;
UMatDataAutoLock
autolock
(
u
);
if
(
u
->
data
&&
!
u
->
hostCopyObsolete
()
)
{
Mat
::
getStdAllocator
()
->
download
(
u
,
dstptr
,
dims
,
sz
,
srcofs
,
srcstep
,
dststep
);
return
;
}
CV_Assert
(
u
->
handle
!=
0
);
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
size_t
total
=
0
,
new_sz
[]
=
{
0
,
0
,
0
};
size_t
srcrawofs
=
0
,
new_srcofs
[]
=
{
0
,
0
,
0
},
new_srcstep
[]
=
{
0
,
0
,
0
};
size_t
dstrawofs
=
0
,
new_dstofs
[]
=
{
0
,
0
,
0
},
new_dststep
[]
=
{
0
,
0
,
0
};
bool
iscontinuous
=
checkContinuous
(
dims
,
sz
,
srcofs
,
srcstep
,
0
,
dststep
,
total
,
new_sz
,
srcrawofs
,
new_srcofs
,
new_srcstep
,
dstrawofs
,
new_dstofs
,
new_dststep
);
if
(
iscontinuous
)
{
CV_Assert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
srcrawofs
,
total
,
dstptr
,
0
,
0
,
0
)
>=
0
);
}
else
{
CV_Assert
(
clEnqueueReadBufferRect
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
new_srcofs
,
new_dstofs
,
new_sz
,
new_srcstep
[
0
],
new_srcstep
[
1
],
new_dststep
[
0
],
new_dststep
[
1
],
dstptr
,
0
,
0
,
0
)
>=
0
);
}
clFinish
(
q
);
}
void
upload
(
UMatData
*
u
,
const
void
*
srcptr
,
int
dims
,
const
size_t
sz
[],
const
size_t
dstofs
[],
const
size_t
dststep
[],
const
size_t
srcstep
[])
const
{
if
(
!
u
)
return
;
// there should be no user-visible CPU copies of the UMat which we are going to copy to
CV_Assert
(
u
->
refcount
==
0
);
size_t
total
=
0
,
new_sz
[]
=
{
0
,
0
,
0
};
size_t
srcrawofs
=
0
,
new_srcofs
[]
=
{
0
,
0
,
0
},
new_srcstep
[]
=
{
0
,
0
,
0
};
size_t
dstrawofs
=
0
,
new_dstofs
[]
=
{
0
,
0
,
0
},
new_dststep
[]
=
{
0
,
0
,
0
};
bool
iscontinuous
=
checkContinuous
(
dims
,
sz
,
0
,
srcstep
,
dstofs
,
dststep
,
total
,
new_sz
,
srcrawofs
,
new_srcofs
,
new_srcstep
,
dstrawofs
,
new_dstofs
,
new_dststep
);
UMatDataAutoLock
autolock
(
u
);
// if there is cached CPU copy of the GPU matrix,
// we could use it as a destination.
// we can do it in 2 cases:
// 1. we overwrite the whole content
// 2. we overwrite part of the matrix, but the GPU copy is out-of-date
if
(
u
->
data
&&
(
u
->
hostCopyObsolete
()
<=
u
->
deviceCopyObsolete
()
||
total
==
u
->
size
))
{
Mat
::
getStdAllocator
()
->
upload
(
u
,
srcptr
,
dims
,
sz
,
dstofs
,
dststep
,
srcstep
);
u
->
markHostCopyObsolete
(
false
);
u
->
markDeviceCopyObsolete
(
true
);
return
;
}
CV_Assert
(
u
->
handle
!=
0
);
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
if
(
iscontinuous
)
{
CV_Assert
(
clEnqueueWriteBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
dstrawofs
,
total
,
srcptr
,
0
,
0
,
0
)
>=
0
);
}
else
{
CV_Assert
(
clEnqueueWriteBufferRect
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
new_dstofs
,
new_srcofs
,
new_sz
,
new_dststep
[
0
],
new_dststep
[
1
],
new_srcstep
[
0
],
new_srcstep
[
1
],
srcptr
,
0
,
0
,
0
)
>=
0
);
}
u
->
markHostCopyObsolete
(
true
);
u
->
markDeviceCopyObsolete
(
false
);
clFinish
(
q
);
}
void
copy
(
UMatData
*
src
,
UMatData
*
dst
,
int
dims
,
const
size_t
sz
[],
const
size_t
srcofs
[],
const
size_t
srcstep
[],
const
size_t
dstofs
[],
const
size_t
dststep
[],
bool
sync
)
const
{
if
(
!
src
||
!
dst
)
return
;
size_t
total
=
0
,
new_sz
[]
=
{
0
,
0
,
0
};
size_t
srcrawofs
=
0
,
new_srcofs
[]
=
{
0
,
0
,
0
},
new_srcstep
[]
=
{
0
,
0
,
0
};
size_t
dstrawofs
=
0
,
new_dstofs
[]
=
{
0
,
0
,
0
},
new_dststep
[]
=
{
0
,
0
,
0
};
bool
iscontinuous
=
checkContinuous
(
dims
,
sz
,
srcofs
,
srcstep
,
dstofs
,
dststep
,
total
,
new_sz
,
srcrawofs
,
new_srcofs
,
new_srcstep
,
dstrawofs
,
new_dstofs
,
new_dststep
);
UMatDataAutoLock
src_autolock
(
src
);
UMatDataAutoLock
dst_autolock
(
dst
);
if
(
!
src
->
handle
||
(
src
->
data
&&
src
->
hostCopyObsolete
()
<=
src
->
deviceCopyObsolete
())
)
{
upload
(
dst
,
src
->
data
+
srcrawofs
,
dims
,
sz
,
dstofs
,
dststep
,
srcstep
);
return
;
}
if
(
!
dst
->
handle
||
(
dst
->
data
&&
dst
->
hostCopyObsolete
()
<=
dst
->
deviceCopyObsolete
())
)
{
download
(
src
,
dst
->
data
+
dstrawofs
,
dims
,
sz
,
srcofs
,
srcstep
,
dststep
);
dst
->
markHostCopyObsolete
(
false
);
dst
->
markDeviceCopyObsolete
(
true
);
return
;
}
// there should be no user-visible CPU copies of the UMat which we are going to copy to
CV_Assert
(
dst
->
refcount
==
0
);
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
if
(
iscontinuous
)
{
CV_Assert
(
clEnqueueCopyBuffer
(
q
,
(
cl_mem
)
src
->
handle
,
(
cl_mem
)
dst
->
handle
,
srcrawofs
,
dstrawofs
,
total
,
0
,
0
,
0
)
>=
0
);
}
else
{
CV_Assert
(
clEnqueueCopyBufferRect
(
q
,
(
cl_mem
)
src
->
handle
,
(
cl_mem
)
dst
->
handle
,
new_srcofs
,
new_dstofs
,
new_sz
,
new_srcstep
[
0
],
new_srcstep
[
1
],
new_dststep
[
0
],
new_dststep
[
1
],
0
,
0
,
0
)
>=
0
);
}
dst
->
markHostCopyObsolete
(
true
);
dst
->
markDeviceCopyObsolete
(
false
);
if
(
sync
)
clFinish
(
q
);
}
};
MatAllocator
*
getOpenCLAllocator
()
{
static
OpenCLAllocator
allocator
;
return
&
allocator
;
}
}}
modules/core/src/umatrix.cpp
0 → 100644
View file @
278fb617
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
///////////////////////////////// UMat implementation ///////////////////////////////
namespace
cv
{
// it should be a prime number for the best hash function
enum
{
UMAT_NLOCKS
=
31
};
static
Mutex
umatLocks
[
UMAT_NLOCKS
];
UMatData
::
UMatData
(
const
MatAllocator
*
allocator
)
{
prevAllocator
=
currAllocator
=
allocator
;
urefcount
=
refcount
=
0
;
data
=
origdata
=
0
;
size
=
0
;
flags
=
0
;
handle
=
0
;
}
void
UMatData
::
lock
()
{
umatLocks
[(
size_t
)(
void
*
)
this
%
UMAT_NLOCKS
].
lock
();
}
void
UMatData
::
unlock
()
{
umatLocks
[(
size_t
)(
void
*
)
this
%
UMAT_NLOCKS
].
unlock
();
}
MatAllocator
*
UMat
::
getStdAllocator
()
{
return
ocl
::
getOpenCLAllocator
();
}
void
swap
(
UMat
&
a
,
UMat
&
b
)
{
std
::
swap
(
a
.
flags
,
b
.
flags
);
std
::
swap
(
a
.
dims
,
b
.
dims
);
std
::
swap
(
a
.
rows
,
b
.
rows
);
std
::
swap
(
a
.
cols
,
b
.
cols
);
std
::
swap
(
a
.
allocator
,
b
.
allocator
);
std
::
swap
(
a
.
u
,
b
.
u
);
std
::
swap
(
a
.
offset
,
b
.
offset
);
std
::
swap
(
a
.
size
.
p
,
b
.
size
.
p
);
std
::
swap
(
a
.
step
.
p
,
b
.
step
.
p
);
std
::
swap
(
a
.
step
.
buf
[
0
],
b
.
step
.
buf
[
0
]);
std
::
swap
(
a
.
step
.
buf
[
1
],
b
.
step
.
buf
[
1
]);
if
(
a
.
step
.
p
==
b
.
step
.
buf
)
{
a
.
step
.
p
=
a
.
step
.
buf
;
a
.
size
.
p
=
&
a
.
rows
;
}
if
(
b
.
step
.
p
==
a
.
step
.
buf
)
{
b
.
step
.
p
=
b
.
step
.
buf
;
b
.
size
.
p
=
&
b
.
rows
;
}
}
static
inline
void
setSize
(
UMat
&
m
,
int
_dims
,
const
int
*
_sz
,
const
size_t
*
_steps
,
bool
autoSteps
=
false
)
{
CV_Assert
(
0
<=
_dims
&&
_dims
<=
CV_MAX_DIM
);
if
(
m
.
dims
!=
_dims
)
{
if
(
m
.
step
.
p
!=
m
.
step
.
buf
)
{
fastFree
(
m
.
step
.
p
);
m
.
step
.
p
=
m
.
step
.
buf
;
m
.
size
.
p
=
&
m
.
rows
;
}
if
(
_dims
>
2
)
{
m
.
step
.
p
=
(
size_t
*
)
fastMalloc
(
_dims
*
sizeof
(
m
.
step
.
p
[
0
])
+
(
_dims
+
1
)
*
sizeof
(
m
.
size
.
p
[
0
]));
m
.
size
.
p
=
(
int
*
)(
m
.
step
.
p
+
_dims
)
+
1
;
m
.
size
.
p
[
-
1
]
=
_dims
;
m
.
rows
=
m
.
cols
=
-
1
;
}
}
m
.
dims
=
_dims
;
if
(
!
_sz
)
return
;
size_t
esz
=
CV_ELEM_SIZE
(
m
.
flags
),
total
=
esz
;
int
i
;
for
(
i
=
_dims
-
1
;
i
>=
0
;
i
--
)
{
int
s
=
_sz
[
i
];
CV_Assert
(
s
>=
0
);
m
.
size
.
p
[
i
]
=
s
;
if
(
_steps
)
m
.
step
.
p
[
i
]
=
i
<
_dims
-
1
?
_steps
[
i
]
:
esz
;
else
if
(
autoSteps
)
{
m
.
step
.
p
[
i
]
=
total
;
int64
total1
=
(
int64
)
total
*
s
;
if
(
(
uint64
)
total1
!=
(
size_t
)
total1
)
CV_Error
(
CV_StsOutOfRange
,
"The total matrix size does not fit to
\"
size_t
\"
type"
);
total
=
(
size_t
)
total1
;
}
}
if
(
_dims
==
1
)
{
m
.
dims
=
2
;
m
.
cols
=
1
;
m
.
step
[
1
]
=
esz
;
}
}
static
void
updateContinuityFlag
(
UMat
&
m
)
{
int
i
,
j
;
for
(
i
=
0
;
i
<
m
.
dims
;
i
++
)
{
if
(
m
.
size
[
i
]
>
1
)
break
;
}
for
(
j
=
m
.
dims
-
1
;
j
>
i
;
j
--
)
{
if
(
m
.
step
[
j
]
*
m
.
size
[
j
]
<
m
.
step
[
j
-
1
]
)
break
;
}
uint64
t
=
(
uint64
)
m
.
step
[
0
]
*
m
.
size
[
0
];
if
(
j
<=
i
&&
t
==
(
size_t
)
t
)
m
.
flags
|=
UMat
::
CONTINUOUS_FLAG
;
else
m
.
flags
&=
~
UMat
::
CONTINUOUS_FLAG
;
}
static
void
finalizeHdr
(
UMat
&
m
)
{
updateContinuityFlag
(
m
);
int
d
=
m
.
dims
;
if
(
d
>
2
)
m
.
rows
=
m
.
cols
=
-
1
;
}
UMat
Mat
::
getUMat
(
int
accessFlags
)
const
{
UMat
hdr
;
if
(
!
u
)
return
hdr
;
UMat
::
getStdAllocator
()
->
allocate
(
u
,
accessFlags
);
setSize
(
hdr
,
dims
,
size
.
p
,
step
.
p
);
finalizeHdr
(
hdr
);
hdr
.
u
=
u
;
hdr
.
offset
=
data
-
datastart
;
return
hdr
;
}
void
UMat
::
create
(
int
d
,
const
int
*
_sizes
,
int
_type
)
{
int
i
;
CV_Assert
(
0
<=
d
&&
d
<=
CV_MAX_DIM
&&
_sizes
);
_type
=
CV_MAT_TYPE
(
_type
);
if
(
u
&&
(
d
==
dims
||
(
d
==
1
&&
dims
<=
2
))
&&
_type
==
type
()
)
{
if
(
d
==
2
&&
rows
==
_sizes
[
0
]
&&
cols
==
_sizes
[
1
]
)
return
;
for
(
i
=
0
;
i
<
d
;
i
++
)
if
(
size
[
i
]
!=
_sizes
[
i
]
)
break
;
if
(
i
==
d
&&
(
d
>
1
||
size
[
1
]
==
1
))
return
;
}
release
();
if
(
d
==
0
)
return
;
flags
=
(
_type
&
CV_MAT_TYPE_MASK
)
|
MAGIC_VAL
;
setSize
(
*
this
,
d
,
_sizes
,
0
,
true
);
offset
=
0
;
if
(
total
()
>
0
)
{
MatAllocator
*
a
=
allocator
,
*
a0
=
getStdAllocator
();
if
(
!
a
)
a
=
a0
;
try
{
u
=
a
->
allocate
(
dims
,
size
,
_type
,
step
.
p
);
CV_Assert
(
u
!=
0
);
}
catch
(...)
{
if
(
a
!=
a0
)
u
=
a0
->
allocate
(
dims
,
size
,
_type
,
step
.
p
);
CV_Assert
(
u
!=
0
);
}
CV_Assert
(
step
[
dims
-
1
]
==
(
size_t
)
CV_ELEM_SIZE
(
flags
)
);
}
finalizeHdr
(
*
this
);
}
void
UMat
::
copySize
(
const
UMat
&
m
)
{
setSize
(
*
this
,
m
.
dims
,
0
,
0
);
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
size
[
i
]
=
m
.
size
[
i
];
step
[
i
]
=
m
.
step
[
i
];
}
}
void
UMat
::
deallocate
()
{
u
->
currAllocator
->
deallocate
(
u
);
}
UMat
::
UMat
(
const
UMat
&
m
,
const
Range
&
_rowRange
,
const
Range
&
_colRange
)
:
flags
(
MAGIC_VAL
),
dims
(
0
),
rows
(
0
),
cols
(
0
),
allocator
(
0
),
u
(
0
),
offset
(
0
),
size
(
&
rows
)
{
CV_Assert
(
m
.
dims
>=
2
);
if
(
m
.
dims
>
2
)
{
AutoBuffer
<
Range
>
rs
(
m
.
dims
);
rs
[
0
]
=
_rowRange
;
rs
[
1
]
=
_colRange
;
for
(
int
i
=
2
;
i
<
m
.
dims
;
i
++
)
rs
[
i
]
=
Range
::
all
();
*
this
=
m
(
rs
);
return
;
}
*
this
=
m
;
if
(
_rowRange
!=
Range
::
all
()
&&
_rowRange
!=
Range
(
0
,
rows
)
)
{
CV_Assert
(
0
<=
_rowRange
.
start
&&
_rowRange
.
start
<=
_rowRange
.
end
&&
_rowRange
.
end
<=
m
.
rows
);
rows
=
_rowRange
.
size
();
offset
+=
step
*
_rowRange
.
start
;
flags
|=
SUBMATRIX_FLAG
;
}
if
(
_colRange
!=
Range
::
all
()
&&
_colRange
!=
Range
(
0
,
cols
)
)
{
CV_Assert
(
0
<=
_colRange
.
start
&&
_colRange
.
start
<=
_colRange
.
end
&&
_colRange
.
end
<=
m
.
cols
);
cols
=
_colRange
.
size
();
offset
+=
_colRange
.
start
*
elemSize
();
flags
&=
cols
<
m
.
cols
?
~
CONTINUOUS_FLAG
:
-
1
;
flags
|=
SUBMATRIX_FLAG
;
}
if
(
rows
==
1
)
flags
|=
CONTINUOUS_FLAG
;
if
(
rows
<=
0
||
cols
<=
0
)
{
release
();
rows
=
cols
=
0
;
}
}
UMat
::
UMat
(
const
UMat
&
m
,
const
Rect
&
roi
)
:
flags
(
m
.
flags
),
dims
(
2
),
rows
(
roi
.
height
),
cols
(
roi
.
width
),
allocator
(
m
.
allocator
),
u
(
m
.
u
),
offset
(
m
.
offset
+
roi
.
y
*
m
.
step
[
0
]),
size
(
&
rows
)
{
CV_Assert
(
m
.
dims
<=
2
);
flags
&=
roi
.
width
<
m
.
cols
?
~
CONTINUOUS_FLAG
:
-
1
;
flags
|=
roi
.
height
==
1
?
CONTINUOUS_FLAG
:
0
;
size_t
esz
=
CV_ELEM_SIZE
(
flags
);
offset
+=
roi
.
x
*
esz
;
CV_Assert
(
0
<=
roi
.
x
&&
0
<=
roi
.
width
&&
roi
.
x
+
roi
.
width
<=
m
.
cols
&&
0
<=
roi
.
y
&&
0
<=
roi
.
height
&&
roi
.
y
+
roi
.
height
<=
m
.
rows
);
if
(
u
)
CV_XADD
(
&
(
u
->
urefcount
),
1
);
if
(
roi
.
width
<
m
.
cols
||
roi
.
height
<
m
.
rows
)
flags
|=
SUBMATRIX_FLAG
;
step
[
0
]
=
m
.
step
[
0
];
step
[
1
]
=
esz
;
if
(
rows
<=
0
||
cols
<=
0
)
{
release
();
rows
=
cols
=
0
;
}
}
UMat
::
UMat
(
const
UMat
&
m
,
const
Range
*
ranges
)
:
flags
(
MAGIC_VAL
),
dims
(
0
),
rows
(
0
),
cols
(
0
),
allocator
(
0
),
u
(
0
),
offset
(
0
),
size
(
&
rows
)
{
int
i
,
d
=
m
.
dims
;
CV_Assert
(
ranges
);
for
(
i
=
0
;
i
<
d
;
i
++
)
{
Range
r
=
ranges
[
i
];
CV_Assert
(
r
==
Range
::
all
()
||
(
0
<=
r
.
start
&&
r
.
start
<
r
.
end
&&
r
.
end
<=
m
.
size
[
i
])
);
}
*
this
=
m
;
for
(
i
=
0
;
i
<
d
;
i
++
)
{
Range
r
=
ranges
[
i
];
if
(
r
!=
Range
::
all
()
&&
r
!=
Range
(
0
,
size
.
p
[
i
]))
{
size
.
p
[
i
]
=
r
.
end
-
r
.
start
;
offset
+=
r
.
start
*
step
.
p
[
i
];
flags
|=
SUBMATRIX_FLAG
;
}
}
updateContinuityFlag
(
*
this
);
}
UMat
UMat
::
diag
(
int
d
)
const
{
CV_Assert
(
dims
<=
2
);
UMat
m
=
*
this
;
size_t
esz
=
elemSize
();
int
len
;
if
(
d
>=
0
)
{
len
=
std
::
min
(
cols
-
d
,
rows
);
m
.
offset
+=
esz
*
d
;
}
else
{
len
=
std
::
min
(
rows
+
d
,
cols
);
m
.
offset
-=
step
[
0
]
*
d
;
}
CV_DbgAssert
(
len
>
0
);
m
.
size
[
0
]
=
m
.
rows
=
len
;
m
.
size
[
1
]
=
m
.
cols
=
1
;
m
.
step
[
0
]
+=
(
len
>
1
?
esz
:
0
);
if
(
m
.
rows
>
1
)
m
.
flags
&=
~
CONTINUOUS_FLAG
;
else
m
.
flags
|=
CONTINUOUS_FLAG
;
if
(
size
()
!=
Size
(
1
,
1
)
)
m
.
flags
|=
SUBMATRIX_FLAG
;
return
m
;
}
void
UMat
::
locateROI
(
Size
&
wholeSize
,
Point
&
ofs
)
const
{
CV_Assert
(
dims
<=
2
&&
step
[
0
]
>
0
);
size_t
esz
=
elemSize
(),
minstep
;
ptrdiff_t
delta1
=
(
ptrdiff_t
)
offset
,
delta2
=
(
ptrdiff_t
)
u
->
size
;
if
(
delta1
==
0
)
ofs
.
x
=
ofs
.
y
=
0
;
else
{
ofs
.
y
=
(
int
)(
delta1
/
step
[
0
]);
ofs
.
x
=
(
int
)((
delta1
-
step
[
0
]
*
ofs
.
y
)
/
esz
);
CV_DbgAssert
(
offset
==
(
size_t
)(
ofs
.
y
*
step
[
0
]
+
ofs
.
x
*
esz
)
);
}
minstep
=
(
ofs
.
x
+
cols
)
*
esz
;
wholeSize
.
height
=
(
int
)((
delta2
-
minstep
)
/
step
[
0
]
+
1
);
wholeSize
.
height
=
std
::
max
(
wholeSize
.
height
,
ofs
.
y
+
rows
);
wholeSize
.
width
=
(
int
)((
delta2
-
step
*
(
wholeSize
.
height
-
1
))
/
esz
);
wholeSize
.
width
=
std
::
max
(
wholeSize
.
width
,
ofs
.
x
+
cols
);
}
UMat
&
UMat
::
adjustROI
(
int
dtop
,
int
dbottom
,
int
dleft
,
int
dright
)
{
CV_Assert
(
dims
<=
2
&&
step
[
0
]
>
0
);
Size
wholeSize
;
Point
ofs
;
size_t
esz
=
elemSize
();
locateROI
(
wholeSize
,
ofs
);
int
row1
=
std
::
max
(
ofs
.
y
-
dtop
,
0
),
row2
=
std
::
min
(
ofs
.
y
+
rows
+
dbottom
,
wholeSize
.
height
);
int
col1
=
std
::
max
(
ofs
.
x
-
dleft
,
0
),
col2
=
std
::
min
(
ofs
.
x
+
cols
+
dright
,
wholeSize
.
width
);
offset
+=
(
row1
-
ofs
.
y
)
*
step
+
(
col1
-
ofs
.
x
)
*
esz
;
rows
=
row2
-
row1
;
cols
=
col2
-
col1
;
size
.
p
[
0
]
=
rows
;
size
.
p
[
1
]
=
cols
;
if
(
esz
*
cols
==
step
[
0
]
||
rows
==
1
)
flags
|=
CONTINUOUS_FLAG
;
else
flags
&=
~
CONTINUOUS_FLAG
;
return
*
this
;
}
UMat
UMat
::
reshape
(
int
new_cn
,
int
new_rows
)
const
{
int
cn
=
channels
();
UMat
hdr
=
*
this
;
if
(
dims
>
2
&&
new_rows
==
0
&&
new_cn
!=
0
&&
size
[
dims
-
1
]
*
cn
%
new_cn
==
0
)
{
hdr
.
flags
=
(
hdr
.
flags
&
~
CV_MAT_CN_MASK
)
|
((
new_cn
-
1
)
<<
CV_CN_SHIFT
);
hdr
.
step
[
dims
-
1
]
=
CV_ELEM_SIZE
(
hdr
.
flags
);
hdr
.
size
[
dims
-
1
]
=
hdr
.
size
[
dims
-
1
]
*
cn
/
new_cn
;
return
hdr
;
}
CV_Assert
(
dims
<=
2
);
if
(
new_cn
==
0
)
new_cn
=
cn
;
int
total_width
=
cols
*
cn
;
if
(
(
new_cn
>
total_width
||
total_width
%
new_cn
!=
0
)
&&
new_rows
==
0
)
new_rows
=
rows
*
total_width
/
new_cn
;
if
(
new_rows
!=
0
&&
new_rows
!=
rows
)
{
int
total_size
=
total_width
*
rows
;
if
(
!
isContinuous
()
)
CV_Error
(
CV_BadStep
,
"The matrix is not continuous, thus its number of rows can not be changed"
);
if
(
(
unsigned
)
new_rows
>
(
unsigned
)
total_size
)
CV_Error
(
CV_StsOutOfRange
,
"Bad new number of rows"
);
total_width
=
total_size
/
new_rows
;
if
(
total_width
*
new_rows
!=
total_size
)
CV_Error
(
CV_StsBadArg
,
"The total number of matrix elements "
"is not divisible by the new number of rows"
);
hdr
.
rows
=
new_rows
;
hdr
.
step
[
0
]
=
total_width
*
elemSize1
();
}
int
new_width
=
total_width
/
new_cn
;
if
(
new_width
*
new_cn
!=
total_width
)
CV_Error
(
CV_BadNumChannels
,
"The total width is not divisible by the new number of channels"
);
hdr
.
cols
=
new_width
;
hdr
.
flags
=
(
hdr
.
flags
&
~
CV_MAT_CN_MASK
)
|
((
new_cn
-
1
)
<<
CV_CN_SHIFT
);
hdr
.
step
[
1
]
=
CV_ELEM_SIZE
(
hdr
.
flags
);
return
hdr
;
}
UMat
UMat
::
diag
(
const
UMat
&
d
)
{
CV_Assert
(
d
.
cols
==
1
||
d
.
rows
==
1
);
int
len
=
d
.
rows
+
d
.
cols
-
1
;
UMat
m
(
len
,
len
,
d
.
type
(),
Scalar
(
0
));
UMat
md
=
m
.
diag
();
if
(
d
.
cols
==
1
)
d
.
copyTo
(
md
);
else
transpose
(
d
,
md
);
return
m
;
}
int
UMat
::
checkVector
(
int
_elemChannels
,
int
_depth
,
bool
_requireContinuous
)
const
{
return
(
depth
()
==
_depth
||
_depth
<=
0
)
&&
(
isContinuous
()
||
!
_requireContinuous
)
&&
((
dims
==
2
&&
(((
rows
==
1
||
cols
==
1
)
&&
channels
()
==
_elemChannels
)
||
(
cols
==
_elemChannels
&&
channels
()
==
1
)))
||
(
dims
==
3
&&
channels
()
==
1
&&
size
.
p
[
2
]
==
_elemChannels
&&
(
size
.
p
[
0
]
==
1
||
size
.
p
[
1
]
==
1
)
&&
(
isContinuous
()
||
step
.
p
[
1
]
==
step
.
p
[
2
]
*
size
.
p
[
2
])))
?
(
int
)(
total
()
*
channels
()
/
_elemChannels
)
:
-
1
;
}
UMat
UMat
::
cross
(
InputArray
)
const
{
CV_Error
(
CV_StsNotImplemented
,
""
);
return
UMat
();
}
UMat
UMat
::
reshape
(
int
_cn
,
int
_newndims
,
const
int
*
_newsz
)
const
{
if
(
_newndims
==
dims
)
{
if
(
_newsz
==
0
)
return
reshape
(
_cn
);
if
(
_newndims
==
2
)
return
reshape
(
_cn
,
_newsz
[
0
]);
}
CV_Error
(
CV_StsNotImplemented
,
""
);
// TBD
return
UMat
();
}
Mat
UMat
::
getMat
(
int
accessFlags
)
const
{
if
(
!
u
)
return
Mat
();
u
->
currAllocator
->
map
(
u
,
accessFlags
);
CV_Assert
(
u
->
data
!=
0
);
Mat
hdr
(
dims
,
size
.
p
,
type
(),
u
->
data
+
offset
,
step
.
p
);
hdr
.
refcount
=
&
u
->
refcount
;
hdr
.
u
=
u
;
hdr
.
datastart
=
u
->
data
;
hdr
.
datalimit
=
hdr
.
dataend
=
u
->
data
+
u
->
size
;
CV_XADD
(
hdr
.
refcount
,
1
);
return
hdr
;
}
void
*
UMat
::
handle
(
int
accessFlags
)
const
{
if
(
!
u
)
return
0
;
// check flags: if CPU copy is newer, copy it back to GPU.
if
(
u
->
deviceCopyObsolete
()
)
{
CV_Assert
(
u
->
refcount
==
0
);
u
->
currAllocator
->
unmap
(
u
);
}
else
if
(
u
->
refcount
>
0
&&
(
accessFlags
&
ACCESS_WRITE
)
)
{
CV_Error
(
Error
::
StsError
,
"it's not allowed to access UMat handle for writing "
"while it's mapped; call Mat::release() first for all its mappings"
);
}
return
u
->
handle
;
}
void
UMat
::
ndoffset
(
size_t
*
ofs
)
const
{
// offset = step[0]*ofs[0] + step[1]*ofs[1] + step[2]*ofs[2] + ...;
size_t
t
=
offset
;
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
size_t
s
=
step
.
p
[
i
];
ofs
[
i
]
=
t
/
s
;
t
-=
ofs
[
i
]
*
s
;
}
}
void
UMat
::
copyTo
(
OutputArray
_dst
)
const
{
int
dtype
=
_dst
.
type
();
if
(
_dst
.
fixedType
()
&&
dtype
!=
type
()
)
{
CV_Assert
(
channels
()
==
CV_MAT_CN
(
dtype
)
);
convertTo
(
_dst
,
dtype
);
return
;
}
if
(
empty
()
)
{
_dst
.
release
();
return
;
}
size_t
i
,
sz
[
CV_MAX_DIM
],
srcofs
[
CV_MAX_DIM
],
dstofs
[
CV_MAX_DIM
];
for
(
i
=
0
;
i
<
(
size_t
)
dims
;
i
++
)
sz
[
i
]
=
size
.
p
[
i
];
sz
[
dims
-
1
]
*=
elemSize
();
ndoffset
(
srcofs
);
_dst
.
create
(
dims
,
size
,
type
()
);
if
(
_dst
.
kind
()
==
_InputArray
::
UMAT
)
{
UMat
dst
=
_dst
.
getUMat
();
void
*
srchandle
=
handle
(
ACCESS_READ
);
void
*
dsthandle
=
dst
.
handle
(
ACCESS_WRITE
);
if
(
srchandle
==
dsthandle
&&
dst
.
offset
==
offset
)
return
;
ndoffset
(
dstofs
);
CV_Assert
(
u
->
currAllocator
==
dst
.
u
->
currAllocator
);
u
->
currAllocator
->
copy
(
u
,
dst
.
u
,
dims
,
sz
,
srcofs
,
step
.
p
,
dstofs
,
dst
.
step
.
p
,
false
);
}
else
{
Mat
dst
=
_dst
.
getMat
();
u
->
currAllocator
->
download
(
u
,
dst
.
data
,
dims
,
sz
,
srcofs
,
step
.
p
,
dst
.
step
.
p
);
}
}
void
UMat
::
convertTo
(
OutputArray
,
int
,
double
,
double
)
const
{
CV_Error
(
Error
::
StsNotImplemented
,
""
);
}
UMat
&
UMat
::
operator
=
(
const
Scalar
&
)
{
CV_Error
(
Error
::
StsNotImplemented
,
""
);
return
*
this
;
}
}
/* End of file. */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment