Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b20bd470
Commit
b20bd470
authored
Sep 24, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactored and extended ocl::LUT
parent
311a7233
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
79 additions
and
193 deletions
+79
-193
arithm.cpp
modules/ocl/src/arithm.cpp
+31
-86
arithm_LUT.cl
modules/ocl/src/opencl/arithm_LUT.cl
+48
-107
No files found.
modules/ocl/src/arithm.cpp
View file @
b20bd470
...
...
@@ -793,100 +793,45 @@ void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
//////////////////////////////////////////////////////////////////////////////
////////////////////////////////// LUT //////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
static
void
arithmetic_lut_run
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
string
kernelName
)
static
void
arithmetic_lut_run
(
const
oclMat
&
src
,
const
oclMat
&
lut
,
oclMat
&
dst
,
string
kernelName
)
{
Context
*
clCxt
=
src1
.
clCxt
;
int
channels
=
src1
.
oclchannels
();
int
rows
=
src1
.
rows
;
int
cols
=
src1
.
cols
;
//int step = src1.step;
int
src_step
=
src1
.
step
/
src1
.
elemSize
();
int
dst_step
=
dst
.
step
/
dst
.
elemSize
();
int
whole_rows
=
src1
.
wholerows
;
int
whole_cols
=
src1
.
wholecols
;
int
src_offset
=
src1
.
offset
/
src1
.
elemSize
();
int
dst_offset
=
dst
.
offset
/
dst
.
elemSize
();
int
lut_offset
=
src2
.
offset
/
src2
.
elemSize
();
int
left_col
=
0
,
right_col
=
0
;
size_t
localSize
[]
=
{
16
,
16
,
1
};
//cl_kernel kernel = openCLGetKernelFromSource(clCxt,&arithm_LUT,kernelName);
size_t
globalSize
[]
=
{(
cols
+
localSize
[
0
]
-
1
)
/
localSize
[
0
]
*
localSize
[
0
],
(
rows
+
localSize
[
1
]
-
1
)
/
localSize
[
1
]
*
localSize
[
1
],
1
};
if
(
channels
==
1
&&
cols
>
6
)
{
left_col
=
4
-
(
dst_offset
&
3
);
left_col
&=
3
;
dst_offset
+=
left_col
;
src_offset
+=
left_col
;
cols
-=
left_col
;
right_col
=
cols
&
3
;
cols
-=
right_col
;
globalSize
[
0
]
=
(
cols
/
4
+
localSize
[
0
]
-
1
)
/
localSize
[
0
]
*
localSize
[
0
];
}
else
if
(
channels
==
1
)
{
left_col
=
cols
;
right_col
=
0
;
cols
=
0
;
globalSize
[
0
]
=
0
;
}
CV_Assert
(
clCxt
==
dst
.
clCxt
);
CV_Assert
(
src1
.
cols
==
dst
.
cols
);
CV_Assert
(
src1
.
rows
==
dst
.
rows
);
CV_Assert
(
src1
.
oclchannels
()
==
dst
.
oclchannels
());
// CV_Assert(src1.step == dst.step);
Context
*
clCxt
=
src
.
clCxt
;
int
sdepth
=
src
.
depth
();
int
src_step1
=
src
.
step1
(),
dst_step1
=
dst
.
step1
();
int
src_offset1
=
src
.
offset
/
src
.
elemSize1
(),
dst_offset1
=
dst
.
offset
/
dst
.
elemSize1
();
int
lut_offset1
=
lut
.
offset
/
lut
.
elemSize1
()
+
(
sdepth
==
CV_8U
?
0
:
128
)
*
lut
.
channels
();
int
cols1
=
src
.
cols
*
src
.
oclchannels
();
size_t
localSize
[]
=
{
16
,
16
,
1
};
size_t
globalSize
[]
=
{
lut
.
channels
()
==
1
?
cols1
:
src
.
cols
,
src
.
rows
,
1
};
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
std
::
string
buildOptions
=
format
(
"-D srcT=%s -D dstT=%s"
,
typeMap
[
sdepth
],
typeMap
[
dst
.
depth
()]);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
lut
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_offset1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
lut_offset1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_offset1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_step1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
if
(
globalSize
[
0
]
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src2
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
channels
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
whole_rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
whole_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
lut_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step
));
openCLExecuteKernel
(
clCxt
,
&
arithm_LUT
,
kernelName
,
globalSize
,
localSize
,
args
,
src1
.
oclchannels
(),
src1
.
depth
());
}
if
(
channels
==
1
&&
(
left_col
!=
0
||
right_col
!=
0
))
{
src_offset
=
src1
.
offset
;
dst_offset
=
dst
.
offset
;
localSize
[
0
]
=
1
;
localSize
[
1
]
=
256
;
globalSize
[
0
]
=
left_col
+
right_col
;
globalSize
[
1
]
=
(
rows
+
localSize
[
1
]
-
1
)
/
localSize
[
1
]
*
localSize
[
1
];
//kernel = openCLGetKernelFromSource(clCxt,&arithm_LUT,"LUT2");
args
.
clear
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src2
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
left_col
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
channels
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
whole_rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
lut_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step
));
openCLExecuteKernel
(
clCxt
,
&
arithm_LUT
,
"LUT2"
,
globalSize
,
localSize
,
args
,
src1
.
oclchannels
(),
src1
.
depth
());
}
openCLExecuteKernel
(
clCxt
,
&
arithm_LUT
,
kernelName
,
globalSize
,
localSize
,
args
,
lut
.
oclchannels
(),
-
1
,
buildOptions
.
c_str
());
}
void
cv
::
ocl
::
LUT
(
const
oclMat
&
src
,
const
oclMat
&
lut
,
oclMat
&
dst
)
{
int
cn
=
src
.
channels
();
CV_Assert
(
src
.
depth
()
==
CV_8U
);
CV_Assert
((
lut
.
oclchannels
()
==
1
||
lut
.
oclchannels
()
==
cn
)
&&
lut
.
rows
==
1
&&
lut
.
cols
==
256
);
int
cn
=
src
.
channels
(),
depth
=
src
.
depth
();
CV_Assert
(
depth
==
CV_8U
||
depth
==
CV_8S
);
CV_Assert
(
lut
.
channels
()
==
1
||
lut
.
channels
()
==
src
.
channels
());
CV_Assert
(
lut
.
rows
==
1
&&
lut
.
cols
==
256
);
dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
lut
.
depth
(),
cn
));
//oclMat _lut(lut);
string
kernelName
=
"LUT"
;
arithmetic_lut_run
(
src
,
lut
,
dst
,
kernelName
);
}
...
...
modules/ocl/src/opencl/arithm_LUT.cl
View file @
b20bd470
...
...
@@ -38,125 +38,66 @@
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
__kernel
void
LUT_C1_D0
(
__global
uchar
*dst,
__global
const
uchar
*src,
__constant
uchar
*table,
int
rows,
int
cols,
int
channels,
int
whole_rows,
int
whole_cols,
int
src_offset,
int
dst_offset,
int
lut_offset,
int
src_step,
int
dst_step
)
__kernel
void
LUT_C1
(
__global
const
srcT
*
src,
__global
const
dstT
*lut,
__global
dstT
*dst,
int
cols1,
int
rows,
int
src_offset1,
int
lut_offset1,
int
dst_offset1,
int
src_step1,
int
dst_step1
)
{
int
gidx
=
get_global_id
(
0
)
<<2
;
int
gidy
=
get_global_id
(
1
)
;
int
lidx
=
get_local_id
(
0
)
;
int
lidy
=
get_local_id
(
1
)
;
int
x1
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
__local
uchar
l[256]
;
l[
(
lidy<<4
)
+lidx]
=
table[
(
lidy<<4
)
+lidx+lut_offset]
;
//mem_fence
(
CLK_LOCAL_MEM_FENCE
)
;
//clamp
(
gidx,mask,cols-1
)
;
gidx
=
gidx
>=
cols-4?cols-4:gidx
;
gidy
=
gidy
>=
rows?rows-1:gidy
;
int
src_index
=
src_offset
+
mad24
(
gidy,src_step,gidx
)
;
int
dst_index
=
dst_offset
+
mad24
(
gidy,dst_step,gidx
)
;
uchar4
p,q
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
p.x
=
src[src_index]
;
p.y
=
src[src_index+1]
;
p.z
=
src[src_index+2]
;
p.w
=
src[src_index+3]
;
if
(
x1
<
cols1
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step1,
src_offset1
+
x1
)
;
int
dst_index
=
mad24
(
y,
dst_step1,
dst_offset1
+
x1
)
;
q.x
=
l[p.x]
;
q.y
=
l[p.y]
;
q.z
=
l[p.z]
;
q.w
=
l[p.w]
;
*
(
__global
uchar4*
)(
dst
+
dst_index
)
=
q
;
dst[dst_index]
=
lut[lut_offset1
+
src[src_index]]
;
}
}
__kernel
void
LUT2_C1_D0
(
__global
uchar
*dst,
__global
const
uchar
*src,
__constant
uchar
*table,
int
rows,
int
precols,
int
channels,
int
whole_rows,
int
cols,
int
src_offset,
int
dst_offset,
int
lut_offset,
int
src_step,
int
dst_step
)
__kernel
void
LUT_C2
(
__global
const
srcT
*
src,
__global
const
dstT
*lut,
__global
dstT
*dst,
int
cols1,
int
rows,
int
src_offset1,
int
lut_offset1,
int
dst_offset1,
int
src_step1,
int
dst_step1
)
{
int
gidx
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
//int
lidx
=
get_local_id
(
0
)
;
int
lidy
=
get_local_id
(
1
)
;
__local
uchar
l[256]
;
l[lidy]
=
table[lidy+lut_offset]
;
//mem_fence
(
CLK_LOCAL_MEM_FENCE
)
;
int
x1
=
get_global_id
(
0
)
<<
1
;
int
y
=
get_global_id
(
1
)
;
if
(
x1
<
cols1
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step1,
src_offset1
+
x1
)
;
int
dst_index
=
mad24
(
y,
dst_step1,
dst_offset1
+
x1
)
;
//clamp
(
gidx,mask,cols-1
)
;
gidx
=
gidx
>=
precols
?
cols+gidx
:
gidx
;
gidy
=
gidy
>=
rows?rows-1:gidy
;
int
src_index
=
src_offset
+
mad24
(
gidy,src_step,gidx
)
;
int
dst_index
=
dst_offset
+
mad24
(
gidy,dst_step,gidx
)
;
//uchar4
p,q
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
uchar
p
=
src[src_index]
;
uchar
q
=
l[p]
;
dst[dst_index]
=
q
;
dst[dst_index
]
=
lut[lut_offset1
+
(
src[src_index
]
<<
1
)
]
;
dst[dst_index
+
1]
=
x1
+
1
<
cols1
?
lut[lut_offset1
+
(
src[src_index
+
1]
<<
1
)
+
1]
:
dst[dst_index
+
1]
;
}
}
__kernel
void
LUT_C4_D0
(
__global
uchar4
*dst,
__global
uchar4
*src,
__constant
uchar
*table,
int
rows,
int
cols,
int
channels,
int
whole_rows,
int
whole_cols,
int
src_offset,
int
dst_offset,
int
lut_offset,
int
src_step,
int
dst_step
)
__kernel
void
LUT_C4
(
__global
const
srcT
*
src,
__global
const
dstT
*lut,
__global
dstT
*dst,
int
cols1,
int
rows,
int
src_offset1,
int
lut_offset1,
int
dst_offset1,
int
src_step1,
int
dst_step1
)
{
int
gidx
=
get_global_id
(
0
)
;
int
gid
y
=
get_global_id
(
1
)
;
int
x1
=
get_global_id
(
0
)
<<
2
;
int
y
=
get_global_id
(
1
)
;
int
lidx
=
get_local_id
(
0
)
;
int
lidy
=
get_local_id
(
1
)
;
int
src_index
=
mad24
(
gidy,src_step,gidx+src_offset
)
;
int
dst_index
=
mad24
(
gidy,dst_step,gidx+dst_offset
)
;
__local
uchar
l[256]
;
l[lidy*16+lidx]
=
table[lidy*16+lidx+lut_offset]
;
//mem_fence
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
gidx<cols
&&
gidy<rows
)
if
(
x1
<
cols1
&&
y
<
rows
)
{
uchar4
p
=
src[src_index]
;
uchar4
q
;
q.x
=
l[p.x]
;
q.y
=
l[p.y
]
;
q.z
=
l[p.z
]
;
q.w
=
l[p.w
]
;
dst[dst_index
]
=
q
;
int
src_index
=
mad24
(
y,
src_step1,
src_offset1
+
x1
)
;
int
dst_index
=
mad24
(
y,
dst_step1,
dst_offset1
+
x1
)
;
dst[dst_index
]
=
lut[lut_offset1
+
(
src[src_index
]
<<
2
)
]
;
dst[dst_index
+
1]
=
x1
+
1
<
cols1
?
lut[lut_offset1
+
(
src[src_index
+
1]
<<
2
)
+
1]
:
dst[dst_index
+
1
]
;
dst[dst_index
+
2]
=
x1
+
2
<
cols1
?
lut[lut_offset1
+
(
src[src_index
+
2]
<<
2
)
+
2]
:
dst[dst_index
+
2
]
;
dst[dst_index
+
3]
=
x1
+
3
<
cols1
?
lut[lut_offset1
+
(
src[src_index
+
3]
<<
2
)
+
3]
:
dst[dst_index
+
3]
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment