Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
6fae07ba
Commit
6fae07ba
authored
Sep 17, 2013
by
Alexander Smorkalov
Committed by
OpenCV Buildbot
Sep 17, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1460 from ilya-lavrenov:ocl_divUp
parents
14951dc3
58b84c2f
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
41 additions
and
267 deletions
+41
-267
surf.ocl.cpp
modules/nonfree/src/surf.ocl.cpp
+0
-6
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+5
-0
arithm.cpp
modules/ocl/src/arithm.cpp
+0
-0
canny.cpp
modules/ocl/src/canny.cpp
+1
-3
filtering.cpp
modules/ocl/src/filtering.cpp
+1
-21
hog.cpp
modules/ocl/src/hog.cpp
+2
-6
imgproc.cpp
modules/ocl/src/imgproc.cpp
+3
-16
initialization.cpp
modules/ocl/src/initialization.cpp
+3
-13
matrix_operations.cpp
modules/ocl/src/matrix_operations.cpp
+1
-10
mcwutil.cpp
modules/ocl/src/mcwutil.cpp
+0
-6
optical_flow_farneback.cpp
modules/ocl/src/optical_flow_farneback.cpp
+6
-18
split_merge.cpp
modules/ocl/src/split_merge.cpp
+2
-113
stereo_csbp.cpp
modules/ocl/src/stereo_csbp.cpp
+10
-40
stereobm.cpp
modules/ocl/src/stereobm.cpp
+6
-11
stereobp.cpp
modules/ocl/src/stereobp.cpp
+1
-4
No files found.
modules/nonfree/src/surf.ocl.cpp
View file @
6fae07ba
...
...
@@ -82,12 +82,6 @@ namespace cv
}
}
static
inline
size_t
divUp
(
size_t
total
,
size_t
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
static
inline
int
calcSize
(
int
octave
,
int
layer
)
{
/* Wavelet size at first layer of first octave. */
...
...
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
6fae07ba
...
...
@@ -1887,6 +1887,11 @@ namespace cv
oclMat
temp4
;
oclMat
temp5
;
};
static
inline
size_t
divUp
(
size_t
total
,
size_t
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
}
}
#if defined _MSC_VER && _MSC_VER >= 1200
...
...
modules/ocl/src/arithm.cpp
View file @
6fae07ba
This diff is collapsed.
Click to expand it.
modules/ocl/src/canny.cpp
View file @
6fae07ba
...
...
@@ -360,14 +360,13 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
size_t
localThreads
[
3
]
=
{
128
,
1
,
1
};
#define DIVUP(a, b) ((a)+(b)-1)/(b)
int
count_i
[
1
]
=
{
0
};
while
(
count
>
0
)
{
openCLSafeCall
(
clEnqueueWriteBuffer
(
*
(
cl_command_queue
*
)
getoclCommandQueue
(),
(
cl_mem
)
counter
,
1
,
0
,
sizeof
(
int
),
&
count_i
,
0
,
NULL
,
NULL
));
args
.
clear
();
size_t
globalThreads
[
3
]
=
{
std
::
min
(
count
,
65535u
)
*
128
,
DIVUP
(
count
,
65535
),
1
};
size_t
globalThreads
[
3
]
=
{
std
::
min
(
count
,
65535u
)
*
128
,
divUp
(
count
,
65535
),
1
};
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
map
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
st1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
st2
.
data
));
...
...
@@ -382,7 +381,6 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
openCLSafeCall
(
clEnqueueReadBuffer
(
*
(
cl_command_queue
*
)
getoclCommandQueue
(),
(
cl_mem
)
counter
,
1
,
0
,
sizeof
(
int
),
&
count
,
0
,
NULL
,
NULL
));
std
::
swap
(
st1
,
st2
);
}
#undef DIVUP
}
void
canny
::
getEdges_gpu
(
oclMat
&
map
,
oclMat
&
dst
,
int
rows
,
int
cols
)
...
...
modules/ocl/src/filtering.cpp
View file @
6fae07ba
...
...
@@ -68,22 +68,12 @@ extern const char *filtering_adaptive_bilateral;
}
}
namespace
{
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
}
namespace
{
inline
void
normalizeAnchor
(
int
&
anchor
,
int
ksize
)
{
if
(
anchor
<
0
)
{
anchor
=
ksize
>>
1
;
}
CV_Assert
(
0
<=
anchor
&&
anchor
<
ksize
);
}
...
...
@@ -97,9 +87,7 @@ inline void normalizeAnchor(Point &anchor, const Size &ksize)
inline
void
normalizeROI
(
Rect
&
roi
,
const
Size
&
ksize
,
const
Point
&
anchor
,
const
Size
&
src_size
)
{
if
(
roi
==
Rect
(
0
,
0
,
-
1
,
-
1
))
{
roi
=
Rect
(
0
,
0
,
src_size
.
width
,
src_size
.
height
);
}
CV_Assert
(
ksize
.
height
>
0
&&
ksize
.
width
>
0
&&
((
ksize
.
height
&
1
)
==
1
)
&&
((
ksize
.
width
&
1
)
==
1
));
CV_Assert
((
anchor
.
x
==
-
1
&&
anchor
.
y
==
-
1
)
||
(
anchor
.
x
==
ksize
.
width
>>
1
&&
anchor
.
y
==
ksize
.
height
>>
1
));
...
...
@@ -112,10 +100,7 @@ inline void normalizeKernel(const Mat &kernel, oclMat &gpu_krnl, int type = CV_8
int
scale
=
nDivisor
&&
(
kernel
.
depth
()
==
CV_32F
||
kernel
.
depth
()
==
CV_64F
)
?
256
:
1
;
if
(
nDivisor
)
{
*
nDivisor
=
scale
;
}
Mat
temp
(
kernel
.
size
(),
type
);
kernel
.
convertTo
(
temp
,
type
,
scale
);
Mat
cont_krnl
=
temp
.
reshape
(
1
,
1
);
...
...
@@ -125,9 +110,7 @@ inline void normalizeKernel(const Mat &kernel, oclMat &gpu_krnl, int type = CV_8
int
count
=
cont_krnl
.
cols
>>
1
;
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
std
::
swap
(
cont_krnl
.
at
<
int
>
(
0
,
i
),
cont_krnl
.
at
<
int
>
(
0
,
cont_krnl
.
cols
-
1
-
i
));
}
}
gpu_krnl
.
upload
(
cont_krnl
);
...
...
@@ -627,8 +610,6 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, const oclMat &mat_kernel
int
localWidth
=
localThreads
[
0
]
+
paddingPixels
;
int
localHeight
=
localThreads
[
1
]
+
paddingPixels
;
// 260 = divup((localThreads[0] + filterWidth * 2), 4) * 4
// 6 = (ROWS_PER_GROUP_WHICH_IS_4 + filterWidth * 2)
size_t
localMemSize
=
ksize_3x3
?
260
*
6
*
src
.
elemSize
()
:
(
localWidth
*
localHeight
)
*
src
.
elemSize
();
int
vector_lengths
[
4
][
7
]
=
{{
4
,
4
,
4
,
4
,
4
,
4
,
4
},
...
...
@@ -1713,4 +1694,4 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
openCLExecuteKernel
(
Context
::
getContext
(),
&
filtering_adaptive_bilateral
,
kernelName
,
globalThreads
,
localThreads
,
args
,
cn
,
depth
,
build_options
);
}
\ No newline at end of file
}
modules/ocl/src/hog.cpp
View file @
6fae07ba
...
...
@@ -124,11 +124,6 @@ namespace cv
using
namespace
::
cv
::
ocl
::
device
;
static
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
cv
::
ocl
::
HOGDescriptor
::
HOGDescriptor
(
Size
win_size_
,
Size
block_size_
,
Size
block_stride_
,
Size
cell_size_
,
int
nbins_
,
double
win_sigma_
,
double
threshold_L2hys_
,
bool
gamma_correction_
,
int
nlevels_
)
...
...
@@ -1671,7 +1666,8 @@ void cv::ocl::device::hog::compute_hists(int nbins,
{
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
"-D CPU"
);
}
else
}
else
{
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
objdetect_hog
,
kernelName
);
int
wave_size
=
queryDeviceInfo
<
WAVEFRONT_SIZE
,
int
>
(
kernel
);
...
...
modules/ocl/src/imgproc.cpp
View file @
6fae07ba
...
...
@@ -1518,11 +1518,6 @@ namespace cv
// CLAHE
namespace
clahe
{
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
*
grain
;
}
static
void
calcLut
(
const
oclMat
&
src
,
oclMat
&
dst
,
const
int
tilesX
,
const
int
tilesY
,
const
cv
::
Size
tileSize
,
const
int
clipLimit
,
const
float
lutScale
)
...
...
@@ -1546,9 +1541,7 @@ namespace cv
size_t
globalThreads
[
3
]
=
{
tilesX
*
localThreads
[
0
],
tilesY
*
localThreads
[
1
],
1
};
bool
is_cpu
=
queryDeviceInfo
<
IS_CPU_DEVICE
,
bool
>
();
if
(
is_cpu
)
{
openCLExecuteKernel
(
Context
::
getContext
(),
&
imgproc_clahe
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
(
char
*
)
" -D CPU"
);
}
else
{
cl_kernel
kernel
=
openCLGetKernelFromSource
(
Context
::
getContext
(),
&
imgproc_clahe
,
kernelName
);
...
...
@@ -1583,7 +1576,7 @@ namespace cv
String
kernelName
=
"transform"
;
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
src
.
cols
,
localThreads
[
0
]),
divUp
(
src
.
rows
,
localThreads
[
1
])
,
1
};
size_t
globalThreads
[
3
]
=
{
src
.
cols
,
src
.
rows
,
1
};
openCLExecuteKernel
(
Context
::
getContext
(),
&
imgproc_clahe
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
...
...
@@ -1801,10 +1794,7 @@ namespace cv
}
}
//////////////////////////////////convolve////////////////////////////////////////////////////
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
static
void
convolve_run
(
const
oclMat
&
src
,
const
oclMat
&
temp1
,
oclMat
&
dst
,
string
kernelName
,
const
char
**
kernelString
)
{
CV_Assert
(
src
.
depth
()
==
CV_32FC1
);
...
...
@@ -1826,10 +1816,7 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st
int
rows
=
dst
.
rows
;
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
cols
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
size_t
globalThreads
[
3
]
=
{
cols
,
rows
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
...
...
modules/ocl/src/initialization.cpp
View file @
6fae07ba
...
...
@@ -285,11 +285,6 @@ namespace cv
return
0
;
}
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
int
getDevice
(
std
::
vector
<
Info
>
&
oclinfo
,
int
devicetype
)
{
//TODO: cache oclinfo vector
...
...
@@ -707,11 +702,10 @@ namespace cv
if
(
localThreads
!=
NULL
)
{
globalThreads
[
0
]
=
divUp
(
globalThreads
[
0
],
localThreads
[
0
])
*
localThreads
[
0
]
;
globalThreads
[
1
]
=
divUp
(
globalThreads
[
1
],
localThreads
[
1
])
*
localThreads
[
1
]
;
globalThreads
[
2
]
=
divUp
(
globalThreads
[
2
],
localThreads
[
2
])
*
localThreads
[
2
]
;
globalThreads
[
0
]
=
alignSize
(
globalThreads
[
0
],
localThreads
[
0
])
;
globalThreads
[
1
]
=
alignSize
(
globalThreads
[
1
],
localThreads
[
1
])
;
globalThreads
[
2
]
=
alignSize
(
globalThreads
[
2
],
localThreads
[
2
])
;
//size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
cv
::
ocl
::
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
}
for
(
size_t
i
=
0
;
i
<
args
.
size
();
i
++
)
...
...
@@ -742,10 +736,6 @@ namespace cv
execute_time
=
(
double
)(
end_time
-
start_time
)
/
(
1000
*
1000
);
total_time
=
(
double
)(
end_time
-
queue_time
)
/
(
1000
*
1000
);
// cout << setiosflags(ios::left) << setw(15) << execute_time;
// cout << setiosflags(ios::left) << setw(15) << total_time - execute_time;
// cout << setiosflags(ios::left) << setw(15) << total_time << endl;
total_execute_time
+=
execute_time
;
total_kernel_time
+=
total_time
;
clReleaseEvent
(
event
);
...
...
modules/ocl/src/matrix_operations.cpp
View file @
6fae07ba
...
...
@@ -307,11 +307,6 @@ void cv::ocl::oclMat::download(cv::Mat &m) const
m
.
adjustROI
(
-
ofs
.
y
,
ofs
.
y
+
rows
-
wholerows
,
-
ofs
.
x
,
ofs
.
x
+
cols
-
wholecols
);
}
/////////////////////common//////////////////////////////////////
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
...
...
@@ -331,11 +326,7 @@ static void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask
char
compile_option
[
32
];
sprintf
(
compile_option
,
"-D GENTYPE=%s"
,
string_types
[
dst
.
oclchannels
()
-
1
][
dst
.
depth
()].
c_str
());
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
];
globalThreads
[
0
]
=
divUp
(
dst
.
cols
,
localThreads
[
0
])
*
localThreads
[
0
];
globalThreads
[
1
]
=
divUp
(
dst
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
];
globalThreads
[
2
]
=
1
;
size_t
globalThreads
[
3
]
=
{
dst
.
cols
,
dst
.
rows
,
1
};
int
dststep_in_pixel
=
dst
.
step
/
dst
.
elemSize
(),
dstoffset_in_pixel
=
dst
.
offset
/
dst
.
elemSize
();
int
srcstep_in_pixel
=
src
.
step
/
src
.
elemSize
(),
srcoffset_in_pixel
=
src
.
offset
/
src
.
elemSize
();
...
...
modules/ocl/src/mcwutil.cpp
View file @
6fae07ba
...
...
@@ -71,12 +71,6 @@ namespace cv
{
namespace
ocl
{
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
// provide additional methods for the user to interact with the command queue after a task is fired
static
void
openCLExecuteKernel_2
(
Context
*
clCxt
,
const
char
**
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
vector
<
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
...
...
modules/ocl/src/optical_flow_farneback.cpp
View file @
6fae07ba
...
...
@@ -73,11 +73,6 @@ oclMat gKer;
float
ig
[
4
];
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
inline
void
setGaussianBlurKernel
(
const
float
*
c_gKer
,
int
ksizeHalf
)
{
cv
::
Mat
t_gKer
(
1
,
ksizeHalf
+
1
,
CV_32FC1
,
const_cast
<
float
*>
(
c_gKer
));
...
...
@@ -88,7 +83,7 @@ static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
{
string
kernelName
(
"gaussianBlur"
);
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
src
.
cols
,
localThreads
[
0
])
*
localThreads
[
0
]
,
src
.
rows
,
1
};
size_t
globalThreads
[
3
]
=
{
src
.
cols
,
src
.
rows
,
1
};
int
smem_size
=
(
localThreads
[
0
]
+
2
*
ksizeHalf
)
*
sizeof
(
float
);
CV_Assert
(
dst
.
size
()
==
src
.
size
());
...
...
@@ -138,10 +133,7 @@ static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oc
{
string
kernelName
(
"updateMatrices"
);
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
flowx
.
cols
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
flowx
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
size_t
globalThreads
[
3
]
=
{
flowx
.
cols
,
flowx
.
rows
,
1
};
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
M
.
data
));
...
...
@@ -166,7 +158,7 @@ static void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
string
kernelName
(
"boxFilter5"
);
int
height
=
src
.
rows
/
5
;
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
src
.
cols
,
localThreads
[
0
])
*
localThreads
[
0
]
,
height
,
1
};
size_t
globalThreads
[
3
]
=
{
src
.
cols
,
height
,
1
};
int
smem_size
=
(
localThreads
[
0
]
+
2
*
ksizeHalf
)
*
5
*
sizeof
(
float
);
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
...
...
@@ -188,10 +180,7 @@ static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy)
string
kernelName
(
"updateFlow"
);
int
cols
=
divUp
(
flowx
.
cols
,
4
);
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
cols
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
flowx
.
rows
,
localThreads
[
1
])
*
localThreads
[
0
],
1
};
size_t
globalThreads
[
3
]
=
{
cols
,
flowx
.
rows
,
1
};
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
flowx
.
data
));
...
...
@@ -211,9 +200,8 @@ static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
{
string
kernelName
(
"gaussianBlur5"
);
int
height
=
src
.
rows
/
5
;
int
width
=
src
.
cols
;
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
width
,
localThreads
[
0
])
*
localThreads
[
0
]
,
height
,
1
};
size_t
globalThreads
[
3
]
=
{
src
.
cols
,
height
,
1
};
int
smem_size
=
(
localThreads
[
0
]
+
2
*
ksizeHalf
)
*
5
*
sizeof
(
float
);
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
...
...
@@ -222,7 +210,7 @@ static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
gKer
.
data
));
args
.
push_back
(
std
::
make_pair
(
smem_size
,
(
void
*
)
NULL
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
height
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
width
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
ksizeHalf
));
...
...
modules/ocl/src/split_merge.cpp
View file @
6fae07ba
...
...
@@ -73,61 +73,6 @@ namespace cv
{
namespace
split_merge
{
///////////////////////////////////////////////////////////
///////////////common/////////////////////////////////////
/////////////////////////////////////////////////////////
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
////////////////////////////////////////////////////////////////////////////
////////////////////merge//////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
// static void merge_vector_run_no_roi(const oclMat *mat_src, size_t n, oclMat &mat_dst)
// {
// Context *clCxt = mat_dst.clCxt;
// int channels = mat_dst.oclchannels();
// int depth = mat_dst.depth();
// string kernelName = "merge_vector";
// int indexes[4][7] = {{0, 0, 0, 0, 0, 0, 0},
// {4, 4, 2, 2, 1, 1, 1},
// {4, 4, 2, 2 , 1, 1, 1},
// {4, 4, 2, 2, 1, 1, 1}
// };
// size_t index = indexes[channels - 1][mat_dst.depth()];
// int cols = divUp(mat_dst.cols, index);
// size_t localThreads[3] = { 64, 4, 1 };
// size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
// divUp(mat_dst.rows, localThreads[1]) *localThreads[1],
// 1
// };
// vector<pair<size_t , const void *> > args;
// args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst.rows));
// args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
// args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst.data));
// args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst.step));
// args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[0].data));
// args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[0].step));
// args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[1].data));
// args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[1].step));
// if(n >= 3)
// {
// args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[2].data));
// args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].step));
// }
// if(n >= 4)
// {
// args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[3].data));
// args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[3].step));
// }
// openCLExecuteKernel(clCxt, &merge_mat, kernelName, globalThreads, localThreads, args, channels, depth);
// }
static
void
merge_vector_run
(
const
oclMat
*
mat_src
,
size_t
n
,
oclMat
&
mat_dst
)
{
if
(
!
mat_dst
.
clCxt
->
supportsFeature
(
Context
::
CL_DOUBLE
)
&&
mat_dst
.
type
()
==
CV_64F
)
...
...
@@ -153,10 +98,7 @@ namespace cv
int
cols
=
divUp
(
mat_dst
.
cols
+
offset_cols
,
vector_length
);
size_t
localThreads
[
3
]
=
{
64
,
4
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
cols
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
mat_dst
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
size_t
globalThreads
[
3
]
=
{
cols
,
mat_dst
.
rows
,
1
};
int
dst_step1
=
mat_dst
.
cols
*
mat_dst
.
elemSize
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
...
...
@@ -176,10 +118,6 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
mat_src
[
2
].
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
mat_src
[
2
].
offset
));
// if channel == 3, then the matrix will convert to channel =4
//if(n == 3)
// args.push_back( make_pair( sizeof(cl_int), (void *)&offset_cols));
if
(
n
==
3
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
mat_src
[
2
].
data
));
...
...
@@ -229,53 +167,6 @@ namespace cv
mat_dst
.
create
(
size
,
CV_MAKETYPE
(
depth
,
total_channels
));
merge_vector_run
(
mat_src
,
n
,
mat_dst
);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////split/////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
// static void split_vector_run_no_roi(const oclMat &mat_src, oclMat *mat_dst)
// {
// Context *clCxt = mat_src.clCxt;
// int channels = mat_src.oclchannels();
// int depth = mat_src.depth();
// string kernelName = "split_vector";
// int indexes[4][7] = {{0, 0, 0, 0, 0, 0, 0},
// {8, 8, 8, 8, 4, 4, 2},
// {8, 8, 8, 8 , 4, 4, 4},
// {4, 4, 2, 2, 1, 1, 1}
// };
// size_t index = indexes[channels - 1][mat_dst[0].depth()];
// int cols = divUp(mat_src.cols, index);
// size_t localThreads[3] = { 64, 4, 1 };
// size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
// divUp(mat_src.rows, localThreads[1]) *localThreads[1],
// 1
// };
// vector<pair<size_t , const void *> > args;
// args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data));
// args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.step));
// args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.rows));
// args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
// args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[0].data));
// args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[0].step));
// args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[1].data));
// args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[1].step));
// if(channels >= 3)
// {
// args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[2].data));
// args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[2].step));
// }
// if(channels >= 4)
// {
// args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[3].data));
// args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[3].step));
// }
// openCLExecuteKernel(clCxt, &split_mat, kernelName, globalThreads, localThreads, args, channels, depth);
// }
static
void
split_vector_run
(
const
oclMat
&
mat_src
,
oclMat
*
mat_dst
)
{
...
...
@@ -311,9 +202,7 @@ namespace cv
:
divUp
(
mat_src
.
cols
+
max_offset_cols
,
vector_length
);
size_t
localThreads
[
3
]
=
{
64
,
4
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
cols
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
mat_src
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
size_t
globalThreads
[
3
]
=
{
cols
,
mat_src
.
rows
,
1
};
int
dst_step1
=
mat_dst
[
0
].
cols
*
mat_dst
[
0
].
elemSize
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
...
...
modules/ocl/src/stereo_csbp.cpp
View file @
6fae07ba
...
...
@@ -96,13 +96,6 @@ namespace cv
{
namespace
stereoCSBP
{
//////////////////////////////////////////////////////////////////////////
//////////////////////////////common////////////////////////////////////
////////////////////////////////////////////////////////////////////////
static
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
static
string
get_kernel_name
(
string
kernel_name
,
int
data_type
)
{
stringstream
idxStr
;
...
...
@@ -132,10 +125,7 @@ namespace cv
//size_t blockSize = 256;
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
w
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
h
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
size_t
globalThreads
[]
=
{
w
,
h
,
1
};
int
cdisp_step1
=
msg_step
*
h
;
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
...
...
@@ -177,7 +167,7 @@ namespace cv
const
int
threadsNum
=
256
;
//size_t blockSize = threadsNum;
size_t
localThreads
[
3
]
=
{
win_size
,
1
,
threadsNum
/
win_size
};
size_t
globalThreads
[
3
]
=
{
w
*
localThreads
[
0
],
size_t
globalThreads
[
3
]
=
{
w
*
localThreads
[
0
],
h
*
divUp
(
rthis
.
ndisp
,
localThreads
[
2
])
*
localThreads
[
1
],
1
*
localThreads
[
2
]
};
...
...
@@ -222,10 +212,7 @@ namespace cv
//size_t blockSize = 256;
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
w
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
h
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
size_t
globalThreads
[]
=
{
w
,
h
,
1
};
int
disp_step
=
msg_step
*
h
;
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
...
...
@@ -257,10 +244,7 @@ namespace cv
//size_t blockSize = 256;
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
w
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
h
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
size_t
globalThreads
[]
=
{
w
,
h
,
1
};
int
disp_step
=
msg_step
*
h
;
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
...
...
@@ -291,14 +275,10 @@ namespace cv
init_data_cost_reduce_caller
(
left
,
right
,
temp
,
rthis
,
msg_step
,
h
,
w
,
level
);
if
(
rthis
.
use_local_init_data_cost
==
true
)
{
get_first_initial_local_caller
(
data_cost_selected
,
disp_selected_pyr
,
temp
,
rthis
,
h
,
w
,
nr_plane
,
msg_step
);
}
else
{
get_first_initial_global_caller
(
data_cost_selected
,
disp_selected_pyr
,
temp
,
rthis
,
h
,
w
,
nr_plane
,
msg_step
);
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
...
...
@@ -317,12 +297,8 @@ namespace cv
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
stereocsbp
,
kernelName
);
//size_t blockSize = 256;
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
w
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
h
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
w
,
h
,
1
};
int
disp_step1
=
msg_step1
*
h
;
int
disp_step2
=
msg_step2
*
h2
;
...
...
@@ -366,8 +342,8 @@ namespace cv
const
size_t
threadsNum
=
256
;
//size_t blockSize = threadsNum;
size_t
localThreads
[
3
]
=
{
win_size
,
1
,
threadsNum
/
win_size
};
size_t
globalThreads
[
3
]
=
{
w
*
localThreads
[
0
],
size_t
localThreads
[
3
]
=
{
win_size
,
1
,
threadsNum
/
win_size
};
size_t
globalThreads
[
3
]
=
{
w
*
localThreads
[
0
],
h
*
divUp
(
nr_plane
,
localThreads
[
2
])
*
localThreads
[
1
],
1
*
localThreads
[
2
]
};
...
...
@@ -431,10 +407,7 @@ namespace cv
//size_t blockSize = 256;
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
w
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
h
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
size_t
globalThreads
[]
=
{
w
,
h
,
1
};
int
disp_step1
=
msg_step1
*
h
;
int
disp_step2
=
msg_step2
*
h2
;
...
...
@@ -535,10 +508,7 @@ namespace cv
//size_t blockSize = 256;
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
disp
.
cols
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
disp
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
size_t
globalThreads
[]
=
{
disp
.
cols
,
disp
.
rows
,
1
};
int
step_size
=
disp
.
step
/
disp
.
elemSize
();
int
disp_step
=
disp
.
rows
*
msg_step
;
...
...
modules/ocl/src/stereobm.cpp
View file @
6fae07ba
...
...
@@ -96,10 +96,7 @@ static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterC
#define N_DISPARITIES 8
#define ROWSperTHREAD 21
#define BLOCK_W 128
static
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
////////////////////////////////////////////////////////////////////////////
///////////////////////////////stereoBM_GPU////////////////////////////////
////////////////////////////////////////////////////////////////////////////
...
...
@@ -117,11 +114,10 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp,
size_t
local_mem_size
=
(
N_DISPARITIES
*
(
BLOCK_W
+
2
*
winsz2
))
*
sizeof
(
cl_uint
);
//size_t blockSize = 1;
size_t
localThreads
[]
=
{
BLOCK_W
,
1
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
left
.
cols
-
maxdisp
-
2
*
winsz2
,
BLOCK_W
)
*
BLOCK_W
,
size_t
localThreads
[]
=
{
BLOCK_W
,
1
,
1
};
size_t
globalThreads
[]
=
{
left
.
cols
-
maxdisp
-
2
*
winsz2
,
divUp
(
left
.
rows
-
2
*
winsz2
,
ROWSperTHREAD
),
1
};
1
};
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
left
.
data
));
...
...
@@ -151,10 +147,9 @@ static void postfilter_textureness(oclMat &left, int winSize,
size_t
blockSize
=
1
;
size_t
localThreads
[]
=
{
BLOCK_W
,
blockSize
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
left
.
cols
,
BLOCK_W
)
*
BLOCK_W
,
size_t
globalThreads
[]
=
{
left
.
cols
,
divUp
(
left
.
rows
,
2
*
ROWSperTHREAD
),
1
};
1
};
size_t
local_mem_size
=
(
localThreads
[
0
]
+
localThreads
[
0
]
+
(
winSize
/
2
)
*
2
)
*
sizeof
(
float
);
...
...
modules/ocl/src/stereobp.cpp
View file @
6fae07ba
...
...
@@ -104,10 +104,7 @@ namespace cv
{
openCLFree
(
cl_con_struct
);
}
static
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
/////////////////////////////////////////////////////////////////////////////
///////////////////////////comp data////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment