Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
493785a7
Commit
493785a7
authored
Aug 14, 2014
by
lluis
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
moves implementation to cpp file and removes using directive from header file
parent
3ab63082
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
102 additions
and
96 deletions
+102
-96
ocr.hpp
modules/text/include/opencv2/text/ocr.hpp
+9
-19
ocr_tesseract.cpp
modules/text/src/ocr_tesseract.cpp
+93
-77
No files found.
modules/text/include/opencv2/text/ocr.hpp
View file @
493785a7
...
...
@@ -46,7 +46,6 @@
#include <vector>
#include <string>
using
namespace
std
;
namespace
cv
{
...
...
@@ -65,29 +64,20 @@ class CV_EXPORTS BaseOCR
{
public
:
virtual
~
BaseOCR
()
{};
virtual
void
run
(
Mat
&
image
,
st
ring
&
output_text
,
vector
<
Rect
>*
component_rects
=
NULL
,
vector
<
string
>*
component_texts
=
NULL
,
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
0
)
=
0
;
virtual
void
run
(
Mat
&
image
,
st
d
::
string
&
output_text
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
std
::
string
>*
component_texts
=
NULL
,
std
::
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
0
)
=
0
;
};
class
CV_EXPORTS
OCRTesseract
:
public
BaseOCR
{
public
:
virtual
void
run
(
Mat
&
image
,
string
&
output_text
,
vector
<
Rect
>*
component_rects
=
NULL
,
vector
<
string
>*
component_texts
=
NULL
,
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
0
)
{
CV_Assert
(
(
image
.
type
()
==
CV_8UC1
)
||
(
image
.
type
()
==
CV_8UC1
)
);
CV_Assert
(
(
component_level
==
OCR_LEVEL_TEXTLINE
)
||
(
component_level
==
OCR_LEVEL_WORD
)
);
output_text
.
clear
();
if
(
component_rects
!=
NULL
)
component_rects
->
clear
();
if
(
component_texts
!=
NULL
)
component_texts
->
clear
();
if
(
component_confidences
!=
NULL
)
component_confidences
->
clear
();
}
static
Ptr
<
OCRTesseract
>
create
(
const
char
*
datapath
=
NULL
,
const
char
*
language
=
NULL
,
const
char
*
char_whitelist
=
NULL
,
int
oem
=
3
,
int
psmode
=
3
);
virtual
void
run
(
Mat
&
image
,
std
::
string
&
output_text
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
std
::
string
>*
component_texts
=
NULL
,
std
::
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
0
);
static
Ptr
<
OCRTesseract
>
create
(
const
char
*
datapath
=
NULL
,
const
char
*
language
=
NULL
,
const
char
*
char_whitelist
=
NULL
,
int
oem
=
3
,
int
psmode
=
3
);
};
...
...
modules/text/src/ocr_tesseract.cpp
View file @
493785a7
...
...
@@ -47,13 +47,28 @@
#include <iostream>
#include <fstream>
#include <queue>
using
namespace
std
;
namespace
cv
{
namespace
text
{
using
namespace
std
;
void
OCRTesseract
::
run
(
Mat
&
image
,
string
&
output_text
,
vector
<
Rect
>*
component_rects
,
vector
<
string
>*
component_texts
,
vector
<
float
>*
component_confidences
,
int
component_level
)
{
CV_Assert
(
(
image
.
type
()
==
CV_8UC1
)
||
(
image
.
type
()
==
CV_8UC1
)
);
CV_Assert
(
(
component_level
==
OCR_LEVEL_TEXTLINE
)
||
(
component_level
==
OCR_LEVEL_WORD
)
);
output_text
.
clear
();
if
(
component_rects
!=
NULL
)
component_rects
->
clear
();
if
(
component_texts
!=
NULL
)
component_texts
->
clear
();
if
(
component_confidences
!=
NULL
)
component_confidences
->
clear
();
}
class
OCRTesseractImpl
:
public
OCRTesseract
{
...
...
@@ -68,42 +83,42 @@ public:
{
#ifdef HAVE_TESSERACT
const
char
*
lang
=
"eng"
;
if
(
language
!=
NULL
)
lang
=
language
;
if
(
tess
.
Init
(
datapath
,
lang
,
(
tesseract
::
OcrEngineMode
)
oemode
))
{
cout
<<
"OCRTesseract: Could not initialize tesseract."
<<
endl
;
throw
1
;
}
const
char
*
lang
=
"eng"
;
if
(
language
!=
NULL
)
lang
=
language
;
if
(
tess
.
Init
(
datapath
,
lang
,
(
tesseract
::
OcrEngineMode
)
oemode
))
{
cout
<<
"OCRTesseract: Could not initialize tesseract."
<<
endl
;
throw
1
;
}
//cout << "OCRTesseract: tesseract version " << tess.Version() << endl;
//cout << "OCRTesseract: tesseract version " << tess.Version() << endl;
tesseract
::
PageSegMode
pagesegmode
=
(
tesseract
::
PageSegMode
)
psmode
;
tess
.
SetPageSegMode
(
pagesegmode
);
tesseract
::
PageSegMode
pagesegmode
=
(
tesseract
::
PageSegMode
)
psmode
;
tess
.
SetPageSegMode
(
pagesegmode
);
if
(
char_whitelist
!=
NULL
)
tess
.
SetVariable
(
"tessedit_char_whitelist"
,
char_whitelist
);
else
tess
.
SetVariable
(
"tessedit_char_whitelist"
,
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
);
if
(
char_whitelist
!=
NULL
)
tess
.
SetVariable
(
"tessedit_char_whitelist"
,
char_whitelist
);
else
tess
.
SetVariable
(
"tessedit_char_whitelist"
,
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
);
tess
.
SetVariable
(
"save_best_choices"
,
"T"
);
tess
.
SetVariable
(
"save_best_choices"
,
"T"
);
#else
cout
<<
"OCRTesseract("
<<
oemode
<<
psmode
<<
"): Tesseract not found."
<<
endl
;
if
(
datapath
!=
NULL
)
cout
<<
" "
<<
datapath
<<
endl
;
if
(
language
!=
NULL
)
cout
<<
" "
<<
language
<<
endl
;
if
(
char_whitelist
!=
NULL
)
cout
<<
" "
<<
char_whitelist
<<
endl
;
cout
<<
"OCRTesseract("
<<
oemode
<<
psmode
<<
"): Tesseract not found."
<<
endl
;
if
(
datapath
!=
NULL
)
cout
<<
" "
<<
datapath
<<
endl
;
if
(
language
!=
NULL
)
cout
<<
" "
<<
language
<<
endl
;
if
(
char_whitelist
!=
NULL
)
cout
<<
" "
<<
char_whitelist
<<
endl
;
#endif
}
~
OCRTesseractImpl
()
{
#ifdef HAVE_TESSERACT
tess
.
End
();
tess
.
End
();
#endif
}
...
...
@@ -111,72 +126,73 @@ public:
vector
<
string
>*
component_texts
=
NULL
,
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
0
)
{
CV_Assert
(
(
image
.
type
()
==
CV_8UC1
)
||
(
image
.
type
()
==
CV_8UC1
)
);
#ifdef HAVE_TESSERACT
if
(
component_texts
!=
0
)
component_texts
->
clear
();
if
(
component_rects
!=
0
)
component_rects
->
clear
();
if
(
component_confidences
!=
0
)
component_confidences
->
clear
();
CV_Assert
(
(
image
.
type
()
==
CV_8UC1
)
||
(
image
.
type
()
==
CV_8UC1
)
);
tess
.
SetImage
((
uchar
*
)
image
.
data
,
image
.
size
().
width
,
image
.
size
().
height
,
image
.
channels
(),
image
.
step1
());
tess
.
Recognize
(
0
);
output
=
string
(
tess
.
GetUTF8Text
());
#ifdef HAVE_TESSERACT
if
(
(
component_rects
!=
NULL
)
||
(
component_texts
!=
NULL
)
||
(
component_confidences
!=
NULL
)
)
{
tesseract
::
ResultIterator
*
ri
=
tess
.
GetIterator
();
tesseract
::
PageIteratorLevel
level
=
tesseract
::
RIL_WORD
;
if
(
component_level
==
OCR_LEVEL_TEXTLINE
)
level
=
tesseract
::
RIL_TEXTLINE
;
if
(
ri
!=
0
)
{
do
{
const
char
*
word
=
ri
->
GetUTF8Text
(
level
);
if
(
word
==
NULL
)
continue
;
float
conf
=
ri
->
Confidence
(
level
);
int
x1
,
y1
,
x2
,
y2
;
ri
->
BoundingBox
(
level
,
&
x1
,
&
y1
,
&
x2
,
&
y2
);
if
(
component_texts
!=
0
)
component_texts
->
push_back
(
string
(
word
));
if
(
component_rects
!=
0
)
component_rects
->
push_back
(
Rect
(
x1
,
y1
,
x2
-
x1
,
y2
-
y1
));
if
(
component_confidences
!=
0
)
component_confidences
->
push_back
(
conf
);
delete
[]
word
;
}
while
(
ri
->
Next
(
level
));
if
(
component_texts
!=
0
)
component_texts
->
clear
();
if
(
component_rects
!=
0
)
component_rects
->
clear
();
if
(
component_confidences
!=
0
)
component_confidences
->
clear
();
tess
.
SetImage
((
uchar
*
)
image
.
data
,
image
.
size
().
width
,
image
.
size
().
height
,
image
.
channels
(),
image
.
step1
());
tess
.
Recognize
(
0
);
output
=
string
(
tess
.
GetUTF8Text
());
if
(
(
component_rects
!=
NULL
)
||
(
component_texts
!=
NULL
)
||
(
component_confidences
!=
NULL
)
)
{
tesseract
::
ResultIterator
*
ri
=
tess
.
GetIterator
();
tesseract
::
PageIteratorLevel
level
=
tesseract
::
RIL_WORD
;
if
(
component_level
==
OCR_LEVEL_TEXTLINE
)
level
=
tesseract
::
RIL_TEXTLINE
;
if
(
ri
!=
0
)
{
do
{
const
char
*
word
=
ri
->
GetUTF8Text
(
level
);
if
(
word
==
NULL
)
continue
;
float
conf
=
ri
->
Confidence
(
level
);
int
x1
,
y1
,
x2
,
y2
;
ri
->
BoundingBox
(
level
,
&
x1
,
&
y1
,
&
x2
,
&
y2
);
if
(
component_texts
!=
0
)
component_texts
->
push_back
(
string
(
word
));
if
(
component_rects
!=
0
)
component_rects
->
push_back
(
Rect
(
x1
,
y1
,
x2
-
x1
,
y2
-
y1
));
if
(
component_confidences
!=
0
)
component_confidences
->
push_back
(
conf
);
delete
[]
word
;
}
while
(
ri
->
Next
(
level
));
}
delete
ri
;
}
delete
ri
;
}
tess
.
Clear
();
tess
.
Clear
();
#else
cout
<<
"OCRTesseract("
<<
component_level
<<
image
.
type
()
<<
"): Tesseract not found."
<<
endl
;
output
.
clear
();
if
(
component_rects
)
component_rects
->
clear
();
if
(
component_texts
)
component_texts
->
clear
();
if
(
component_confidences
)
component_confidences
->
clear
();
cout
<<
"OCRTesseract("
<<
component_level
<<
image
.
type
()
<<
"): Tesseract not found."
<<
endl
;
output
.
clear
();
if
(
component_rects
)
component_rects
->
clear
();
if
(
component_texts
)
component_texts
->
clear
();
if
(
component_confidences
)
component_confidences
->
clear
();
#endif
}
};
Ptr
<
OCRTesseract
>
OCRTesseract
::
create
(
const
char
*
datapath
,
const
char
*
language
,
const
char
*
char_whitelist
,
int
oem
,
int
psmode
)
Ptr
<
OCRTesseract
>
OCRTesseract
::
create
(
const
char
*
datapath
,
const
char
*
language
,
const
char
*
char_whitelist
,
int
oem
,
int
psmode
)
{
return
makePtr
<
OCRTesseractImpl
>
(
datapath
,
language
,
char_whitelist
,
oem
,
psmode
);
return
makePtr
<
OCRTesseractImpl
>
(
datapath
,
language
,
char_whitelist
,
oem
,
psmode
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment