Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b20bd470
Commit
b20bd470
authored
Sep 24, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactored and extended ocl::LUT
parent
311a7233
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
79 additions
and
193 deletions
+79
-193
arithm.cpp
modules/ocl/src/arithm.cpp
+31
-86
arithm_LUT.cl
modules/ocl/src/opencl/arithm_LUT.cl
+48
-107
No files found.
modules/ocl/src/arithm.cpp
View file @
b20bd470
...
@@ -793,100 +793,45 @@ void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
...
@@ -793,100 +793,45 @@ void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
////////////////////////////////// LUT //////////////////////////////////////
////////////////////////////////// LUT //////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
static
void
arithmetic_lut_run
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
string
kernelName
)
static
void
arithmetic_lut_run
(
const
oclMat
&
src
,
const
oclMat
&
lut
,
oclMat
&
dst
,
string
kernelName
)
{
{
Context
*
clCxt
=
src1
.
clCxt
;
Context
*
clCxt
=
src
.
clCxt
;
int
channels
=
src1
.
oclchannels
();
int
sdepth
=
src
.
depth
();
int
rows
=
src1
.
rows
;
int
src_step1
=
src
.
step1
(),
dst_step1
=
dst
.
step1
();
int
cols
=
src1
.
cols
;
int
src_offset1
=
src
.
offset
/
src
.
elemSize1
(),
dst_offset1
=
dst
.
offset
/
dst
.
elemSize1
();
//int step = src1.step;
int
lut_offset1
=
lut
.
offset
/
lut
.
elemSize1
()
+
(
sdepth
==
CV_8U
?
0
:
128
)
*
lut
.
channels
();
int
src_step
=
src1
.
step
/
src1
.
elemSize
();
int
cols1
=
src
.
cols
*
src
.
oclchannels
();
int
dst_step
=
dst
.
step
/
dst
.
elemSize
();
int
whole_rows
=
src1
.
wholerows
;
size_t
localSize
[]
=
{
16
,
16
,
1
};
int
whole_cols
=
src1
.
wholecols
;
size_t
globalSize
[]
=
{
lut
.
channels
()
==
1
?
cols1
:
src
.
cols
,
src
.
rows
,
1
};
int
src_offset
=
src1
.
offset
/
src1
.
elemSize
();
int
dst_offset
=
dst
.
offset
/
dst
.
elemSize
();
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
int
lut_offset
=
src2
.
offset
/
src2
.
elemSize
();
std
::
string
buildOptions
=
format
(
"-D srcT=%s -D dstT=%s"
,
typeMap
[
sdepth
],
typeMap
[
dst
.
depth
()]);
int
left_col
=
0
,
right_col
=
0
;
size_t
localSize
[]
=
{
16
,
16
,
1
};
//cl_kernel kernel = openCLGetKernelFromSource(clCxt,&arithm_LUT,kernelName);
size_t
globalSize
[]
=
{(
cols
+
localSize
[
0
]
-
1
)
/
localSize
[
0
]
*
localSize
[
0
],
(
rows
+
localSize
[
1
]
-
1
)
/
localSize
[
1
]
*
localSize
[
1
],
1
};
if
(
channels
==
1
&&
cols
>
6
)
{
left_col
=
4
-
(
dst_offset
&
3
);
left_col
&=
3
;
dst_offset
+=
left_col
;
src_offset
+=
left_col
;
cols
-=
left_col
;
right_col
=
cols
&
3
;
cols
-=
right_col
;
globalSize
[
0
]
=
(
cols
/
4
+
localSize
[
0
]
-
1
)
/
localSize
[
0
]
*
localSize
[
0
];
}
else
if
(
channels
==
1
)
{
left_col
=
cols
;
right_col
=
0
;
cols
=
0
;
globalSize
[
0
]
=
0
;
}
CV_Assert
(
clCxt
==
dst
.
clCxt
);
CV_Assert
(
src1
.
cols
==
dst
.
cols
);
CV_Assert
(
src1
.
rows
==
dst
.
rows
);
CV_Assert
(
src1
.
oclchannels
()
==
dst
.
oclchannels
());
// CV_Assert(src1.step == dst.step);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
lut
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_offset1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
lut_offset1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_offset1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_step1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
if
(
globalSize
[
0
]
!=
0
)
openCLExecuteKernel
(
clCxt
,
&
arithm_LUT
,
kernelName
,
globalSize
,
localSize
,
{
args
,
lut
.
oclchannels
(),
-
1
,
buildOptions
.
c_str
());
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src2
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
channels
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
whole_rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
whole_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
lut_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step
));
openCLExecuteKernel
(
clCxt
,
&
arithm_LUT
,
kernelName
,
globalSize
,
localSize
,
args
,
src1
.
oclchannels
(),
src1
.
depth
());
}
if
(
channels
==
1
&&
(
left_col
!=
0
||
right_col
!=
0
))
{
src_offset
=
src1
.
offset
;
dst_offset
=
dst
.
offset
;
localSize
[
0
]
=
1
;
localSize
[
1
]
=
256
;
globalSize
[
0
]
=
left_col
+
right_col
;
globalSize
[
1
]
=
(
rows
+
localSize
[
1
]
-
1
)
/
localSize
[
1
]
*
localSize
[
1
];
//kernel = openCLGetKernelFromSource(clCxt,&arithm_LUT,"LUT2");
args
.
clear
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src2
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
left_col
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
channels
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
whole_rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
lut_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step
));
openCLExecuteKernel
(
clCxt
,
&
arithm_LUT
,
"LUT2"
,
globalSize
,
localSize
,
args
,
src1
.
oclchannels
(),
src1
.
depth
());
}
}
}
void
cv
::
ocl
::
LUT
(
const
oclMat
&
src
,
const
oclMat
&
lut
,
oclMat
&
dst
)
void
cv
::
ocl
::
LUT
(
const
oclMat
&
src
,
const
oclMat
&
lut
,
oclMat
&
dst
)
{
{
int
cn
=
src
.
channels
();
int
cn
=
src
.
channels
(),
depth
=
src
.
depth
();
CV_Assert
(
src
.
depth
()
==
CV_8U
);
CV_Assert
(
depth
==
CV_8U
||
depth
==
CV_8S
);
CV_Assert
((
lut
.
oclchannels
()
==
1
||
lut
.
oclchannels
()
==
cn
)
&&
lut
.
rows
==
1
&&
lut
.
cols
==
256
);
CV_Assert
(
lut
.
channels
()
==
1
||
lut
.
channels
()
==
src
.
channels
());
CV_Assert
(
lut
.
rows
==
1
&&
lut
.
cols
==
256
);
dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
lut
.
depth
(),
cn
));
dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
lut
.
depth
(),
cn
));
//oclMat _lut(lut);
string
kernelName
=
"LUT"
;
string
kernelName
=
"LUT"
;
arithmetic_lut_run
(
src
,
lut
,
dst
,
kernelName
);
arithmetic_lut_run
(
src
,
lut
,
dst
,
kernelName
);
}
}
...
...
modules/ocl/src/opencl/arithm_LUT.cl
View file @
b20bd470
...
@@ -38,125 +38,66 @@
...
@@ -38,125 +38,66 @@
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
__kernel
__kernel
void
LUT_C1
(
__global
const
srcT
*
src,
__global
const
dstT
*lut,
void
LUT_C1_D0
(
__global
uchar
*dst,
__global
dstT
*dst,
__global
const
uchar
*src,
int
cols1,
int
rows,
__constant
uchar
*table,
int
src_offset1,
int
rows,
int
lut_offset1,
int
cols,
int
dst_offset1,
int
channels,
int
src_step1,
int
dst_step1
)
int
whole_rows,
int
whole_cols,
int
src_offset,
int
dst_offset,
int
lut_offset,
int
src_step,
int
dst_step
)
{
{
int
gidx
=
get_global_id
(
0
)
<<2
;
int
x1
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
int
lidx
=
get_local_id
(
0
)
;
int
lidy
=
get_local_id
(
1
)
;
__local
uchar
l[256]
;
if
(
x1
<
cols1
&&
y
<
rows
)
l[
(
lidy<<4
)
+lidx]
=
table[
(
lidy<<4
)
+lidx+lut_offset]
;
{
//mem_fence
(
CLK_LOCAL_MEM_FENCE
)
;
int
src_index
=
mad24
(
y,
src_step1,
src_offset1
+
x1
)
;
int
dst_index
=
mad24
(
y,
dst_step1,
dst_offset1
+
x1
)
;
//clamp
(
gidx,mask,cols-1
)
;
gidx
=
gidx
>=
cols-4?cols-4:gidx
;
gidy
=
gidy
>=
rows?rows-1:gidy
;
int
src_index
=
src_offset
+
mad24
(
gidy,src_step,gidx
)
;
int
dst_index
=
dst_offset
+
mad24
(
gidy,dst_step,gidx
)
;
uchar4
p,q
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
p.x
=
src[src_index]
;
p.y
=
src[src_index+1]
;
p.z
=
src[src_index+2]
;
p.w
=
src[src_index+3]
;
q.x
=
l[p.x]
;
dst[dst_index]
=
lut[lut_offset1
+
src[src_index]]
;
q.y
=
l[p.y]
;
}
q.z
=
l[p.z]
;
q.w
=
l[p.w]
;
*
(
__global
uchar4*
)(
dst
+
dst_index
)
=
q
;
}
}
__kernel
__kernel
void
LUT_C2
(
__global
const
srcT
*
src,
__global
const
dstT
*lut,
void
LUT2_C1_D0
(
__global
uchar
*dst,
__global
dstT
*dst,
__global
const
uchar
*src,
int
cols1,
int
rows,
__constant
uchar
*table,
int
src_offset1,
int
rows,
int
lut_offset1,
int
precols,
int
dst_offset1,
int
channels,
int
src_step1,
int
dst_step1
)
int
whole_rows,
int
cols,
int
src_offset,
int
dst_offset,
int
lut_offset,
int
src_step,
int
dst_step
)
{
{
int
gidx
=
get_global_id
(
0
)
;
int
x1
=
get_global_id
(
0
)
<<
1
;
int
gidy
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
//int
lidx
=
get_local_id
(
0
)
;
int
lidy
=
get_local_id
(
1
)
;
__local
uchar
l[256]
;
l[lidy]
=
table[lidy+lut_offset]
;
//mem_fence
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
x1
<
cols1
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step1,
src_offset1
+
x1
)
;
int
dst_index
=
mad24
(
y,
dst_step1,
dst_offset1
+
x1
)
;
//clamp
(
gidx,mask,cols-1
)
;
dst[dst_index
]
=
lut[lut_offset1
+
(
src[src_index
]
<<
1
)
]
;
gidx
=
gidx
>=
precols
?
cols+gidx
:
gidx
;
dst[dst_index
+
1]
=
x1
+
1
<
cols1
?
lut[lut_offset1
+
(
src[src_index
+
1]
<<
1
)
+
1]
:
dst[dst_index
+
1]
;
gidy
=
gidy
>=
rows?rows-1:gidy
;
}
int
src_index
=
src_offset
+
mad24
(
gidy,src_step,gidx
)
;
int
dst_index
=
dst_offset
+
mad24
(
gidy,dst_step,gidx
)
;
//uchar4
p,q
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
uchar
p
=
src[src_index]
;
uchar
q
=
l[p]
;
dst[dst_index]
=
q
;
}
}
__kernel
__kernel
void
LUT_C4
(
__global
const
srcT
*
src,
__global
const
dstT
*lut,
void
LUT_C4_D0
(
__global
uchar4
*dst,
__global
dstT
*dst,
__global
uchar4
*src,
int
cols1,
int
rows,
__constant
uchar
*table,
int
src_offset1,
int
rows,
int
lut_offset1,
int
cols,
int
dst_offset1,
int
channels,
int
src_step1,
int
dst_step1
)
int
whole_rows,
int
whole_cols,
int
src_offset,
int
dst_offset,
int
lut_offset,
int
src_step,
int
dst_step
)
{
{
int
gidx
=
get_global_id
(
0
)
;
int
x1
=
get_global_id
(
0
)
<<
2
;
int
gid
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
int
lidx
=
get_local_id
(
0
)
;
if
(
x1
<
cols1
&&
y
<
rows
)
int
lidy
=
get_local_id
(
1
)
;
int
src_index
=
mad24
(
gidy,src_step,gidx+src_offset
)
;
int
dst_index
=
mad24
(
gidy,dst_step,gidx+dst_offset
)
;
__local
uchar
l[256]
;
l[lidy*16+lidx]
=
table[lidy*16+lidx+lut_offset]
;
//mem_fence
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
gidx<cols
&&
gidy<rows
)
{
{
uchar4
p
=
src[src_index]
;
int
src_index
=
mad24
(
y,
src_step1,
src_offset1
+
x1
)
;
uchar4
q
;
int
dst_index
=
mad24
(
y,
dst_step1,
dst_offset1
+
x1
)
;
q.x
=
l[p.x]
;
q.y
=
l[p.y
]
;
dst[dst_index
]
=
lut[lut_offset1
+
(
src[src_index
]
<<
2
)
]
;
q.z
=
l[p.z
]
;
dst[dst_index
+
1]
=
x1
+
1
<
cols1
?
lut[lut_offset1
+
(
src[src_index
+
1]
<<
2
)
+
1]
:
dst[dst_index
+
1
]
;
q.w
=
l[p.w
]
;
dst[dst_index
+
2]
=
x1
+
2
<
cols1
?
lut[lut_offset1
+
(
src[src_index
+
2]
<<
2
)
+
2]
:
dst[dst_index
+
2
]
;
dst[dst_index
]
=
q
;
dst[dst_index
+
3]
=
x1
+
3
<
cols1
?
lut[lut_offset1
+
(
src[src_index
+
3]
<<
2
)
+
3]
:
dst[dst_index
+
3]
;
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment