Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
e2f9c666
Commit
e2f9c666
authored
Aug 27, 2015
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #350 from lluisgomez:ocrbeamsearch_refactor
parents
3431d8ae
c3042c3f
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
198 additions
and
140 deletions
+198
-140
ocr.hpp
modules/text/include/opencv2/text/ocr.hpp
+4
-1
cropped_word_recognition.cpp
modules/text/samples/cropped_word_recognition.cpp
+6
-3
scenetext_word03.jpg
modules/text/samples/scenetext_word03.jpg
+0
-0
scenetext_word04.jpg
modules/text/samples/scenetext_word04.jpg
+0
-0
ocr_beamsearch_decoder.cpp
modules/text/src/ocr_beamsearch_decoder.cpp
+188
-136
No files found.
modules/text/include/opencv2/text/ocr.hpp
View file @
e2f9c666
...
...
@@ -338,6 +338,9 @@ public:
including 0 as start-sequence location.
*/
virtual
void
eval
(
InputArray
image
,
std
::
vector
<
std
::
vector
<
double
>
>&
recognition_probabilities
,
std
::
vector
<
int
>&
oversegmentation
);
int
getWindowSize
()
{
return
0
;}
int
getStepSize
()
{
return
0
;}
};
public
:
...
...
@@ -396,7 +399,7 @@ public:
InputArray
emission_probabilities_table
,
// Table with observation emission probabilities
// cols == rows == vocabulari.size()
decoder_mode
mode
=
OCR_DECODER_VITERBI
,
// HMM Decoding algorithm (only Viterbi for the moment)
int
beam_size
=
50
);
// Size of the beam in Beam Search algorithm
int
beam_size
=
50
0
);
// Size of the beam in Beam Search algorithm
protected
:
...
...
modules/text/samples/cropped_word_recognition.cpp
View file @
e2f9c666
...
...
@@ -39,12 +39,13 @@ int main(int argc, char* argv[])
string
vocabulary
=
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
;
// must have the same order as the clasifier output classes
vector
<
string
>
lexicon
;
// a list of words expected to be found on the input image
lexicon
.
push_back
(
string
(
"abb"
));
lexicon
.
push_back
(
string
(
"
patata
"
));
lexicon
.
push_back
(
string
(
"
riser
"
));
lexicon
.
push_back
(
string
(
"CHINA"
));
lexicon
.
push_back
(
string
(
"HERE"
));
lexicon
.
push_back
(
string
(
"President"
));
lexicon
.
push_back
(
string
(
"smash"
));
lexicon
.
push_back
(
string
(
"KUALA"
));
lexicon
.
push_back
(
string
(
"Produkt"
));
lexicon
.
push_back
(
string
(
"NINTENDO"
));
// Create tailored language model a small given lexicon
...
...
@@ -54,16 +55,18 @@ int main(int argc, char* argv[])
// An alternative would be to load the default generic language model
// (created from ispell 42869 english words list)
/*Mat transition_p;
string filename = "OCRHMM_transitions_table.xml";
// TODO use same order for voc
string filename = "OCRHMM_transitions_table.xml";
FileStorage fs(filename, FileStorage::READ);
fs["transition_probabilities"] >> transition_p;
fs.release();*/
Mat
emission_p
=
Mat
::
eye
(
62
,
62
,
CV_64FC1
);
// Notice we set here a beam size of 50. This is much faster than using the default value (500).
// 50 works well with our tiny lexicon example, but may not with larger dictionaries.
Ptr
<
OCRBeamSearchDecoder
>
ocr
=
OCRBeamSearchDecoder
::
create
(
loadOCRBeamSearchClassifierCNN
(
"OCRBeamSearch_CNN_model_data.xml.gz"
),
vocabulary
,
transition_p
,
emission_p
);
vocabulary
,
transition_p
,
emission_p
,
OCR_DECODER_VITERBI
,
50
);
double
t_r
=
(
double
)
getTickCount
();
string
output
;
...
...
modules/text/samples/scenetext_word03.jpg
0 → 100644
View file @
e2f9c666
17.1 KB
modules/text/samples/scenetext_word04.jpg
0 → 100644
View file @
e2f9c666
46.4 KB
modules/text/src/ocr_beamsearch_decoder.cpp
View file @
e2f9c666
...
...
@@ -72,13 +72,12 @@ void OCRBeamSearchDecoder::run(Mat& image, string& output_text, vector<Rect>* co
if
(
component_confidences
!=
NULL
)
component_confidences
->
clear
();
}
void
OCRBeamSearchDecoder
::
run
(
Mat
&
image
,
Mat
&
mask
,
string
&
output_text
,
vector
<
Rect
>*
component_rects
,
vector
<
string
>*
component_texts
,
vector
<
float
>*
component_confidences
,
int
component_level
)
{
CV_Assert
(
mask
.
type
()
==
CV_8UC1
);
CV_Assert
(
(
image
.
type
()
==
CV_8UC1
)
||
(
image
.
type
()
==
CV_8UC3
)
);
CV_Assert
(
mask
.
type
()
==
CV_8UC1
);
CV_Assert
(
(
component_level
==
OCR_LEVEL_TEXTLINE
)
||
(
component_level
==
OCR_LEVEL_WORD
)
);
output_text
.
clear
();
if
(
component_rects
!=
NULL
)
...
...
@@ -102,11 +101,18 @@ void OCRBeamSearchDecoder::ClassifierCallback::eval( InputArray image, vector< v
oversegmentation
.
clear
();
}
struct
beamSearch_node
{
double
score
;
vector
<
int
>
segmentation
;
bool
expanded
;
// TODO calculating score of its childs would be much faster if we store the last column
// of their "root" path.
};
bool
beam_sort_function
(
pair
<
double
,
vector
<
int
>
>
i
,
pair
<
double
,
vector
<
int
>
>
j
);
bool
beam_sort_function
(
pair
<
double
,
vector
<
int
>
>
i
,
pair
<
double
,
vector
<
int
>
>
j
)
bool
beam_sort_function
(
beamSearch_node
a
,
beamSearch_node
b
);
bool
beam_sort_function
(
beamSearch_node
a
,
beamSearch_node
b
)
{
return
(
i
.
first
>
j
.
first
);
return
(
a
.
score
>
b
.
score
);
}
...
...
@@ -122,17 +128,43 @@ public:
int
_beam_size
)
{
classifier
=
_classifier
;
transition_p
=
transition_probabilities_table
.
getMat
();
step_size
=
classifier
->
getStepSize
();
win_size
=
classifier
->
getWindowSize
();
emission_p
=
emission_probabilities_table
.
getMat
();
vocabulary
=
_vocabulary
;
mode
=
_mode
;
beam_size
=
_beam_size
;
transition_probabilities_table
.
getMat
().
copyTo
(
transition_p
);
for
(
int
i
=
0
;
i
<
transition_p
.
rows
;
i
++
)
{
for
(
int
j
=
0
;
j
<
transition_p
.
cols
;
j
++
)
{
if
(
transition_p
.
at
<
double
>
(
i
,
j
)
==
0
)
transition_p
.
at
<
double
>
(
i
,
j
)
=
-
DBL_MAX
;
else
transition_p
.
at
<
double
>
(
i
,
j
)
=
log
(
transition_p
.
at
<
double
>
(
i
,
j
));
}
}
}
~
OCRBeamSearchDecoderImpl
()
{
}
void
run
(
Mat
&
src
,
Mat
&
mask
,
string
&
out_sequence
,
vector
<
Rect
>*
component_rects
,
vector
<
string
>*
component_texts
,
vector
<
float
>*
component_confidences
,
int
component_level
)
{
CV_Assert
(
mask
.
type
()
==
CV_8UC1
);
//nothing to do with a mask here
run
(
src
,
out_sequence
,
component_rects
,
component_texts
,
component_confidences
,
component_level
);
}
void
run
(
Mat
&
src
,
string
&
out_sequence
,
vector
<
Rect
>*
component_rects
,
...
...
@@ -152,20 +184,62 @@ public:
if
(
component_confidences
!=
NULL
)
component_confidences
->
clear
();
// TODO We must split a line into words or specify we only work with words
if
(
src
.
type
()
==
CV_8UC3
)
{
cvtColor
(
src
,
src
,
COLOR_RGB2GRAY
);
}
vector
<
vector
<
double
>
>
recognition_probabilities
;
vector
<
int
>
oversegmentation
;
// TODO if input is a text line (not a word) we may need to split into words here!
// do sliding window classification along a croped word image
classifier
->
eval
(
src
,
recognition_probabilities
,
oversegmentation
);
/*Now we go here with the beam search algorithm to optimize the recognition score*/
// if the number of oversegmentation points found is less than 2 we can not do nothing!!
if
(
oversegmentation
.
size
()
<
2
)
return
;
//NMS of recognitions
double
last_best_p
=
0
;
int
last_best_idx
=
-
1
;
for
(
size_t
i
=
0
;
i
<
recognition_probabilities
.
size
();
)
{
double
best_p
=
0
;
int
best_idx
=
-
1
;
for
(
size_t
j
=
0
;
j
<
recognition_probabilities
[
i
].
size
();
j
++
)
{
if
(
recognition_probabilities
[
i
][
j
]
>
best_p
)
{
best_p
=
recognition_probabilities
[
i
][
j
];
best_idx
=
(
int
)
j
;
}
}
if
((
i
>
0
)
&&
(
best_idx
==
last_best_idx
)
&&
(
oversegmentation
[
i
]
*
step_size
<
oversegmentation
[
i
-
1
]
*
step_size
+
win_size
)
)
{
if
(
last_best_p
>
best_p
)
{
//remove i'th elements and do not increment i
recognition_probabilities
.
erase
(
recognition_probabilities
.
begin
()
+
i
);
oversegmentation
.
erase
(
oversegmentation
.
begin
()
+
i
);
continue
;
}
else
{
//remove (i-1)'th elements and do not increment i
recognition_probabilities
.
erase
(
recognition_probabilities
.
begin
()
+
i
-
1
);
oversegmentation
.
erase
(
oversegmentation
.
begin
()
+
i
-
1
);
last_best_idx
=
best_idx
;
last_best_p
=
best_p
;
continue
;
}
}
last_best_idx
=
best_idx
;
last_best_p
=
best_p
;
i
++
;
}
/*Now we go with the beam search algorithm to optimize the recognition score*/
//convert probabilities to log probabilities
for
(
size_t
i
=
0
;
i
<
recognition_probabilities
.
size
();
i
++
)
...
...
@@ -178,170 +252,160 @@ public:
recognition_probabilities
[
i
][
j
]
=
log
(
recognition_probabilities
[
i
][
j
]);
}
}
for
(
int
i
=
0
;
i
<
transition_p
.
rows
;
i
++
)
// initialize the beam with all possible character's pairs
int
generated_chids
=
0
;
for
(
size_t
i
=
0
;
i
<
recognition_probabilities
.
size
()
-
1
;
i
++
)
{
for
(
int
j
=
0
;
j
<
transition_p
.
cols
;
j
++
)
{
if
(
transition_p
.
at
<
double
>
(
i
,
j
)
==
0
)
transition_p
.
at
<
double
>
(
i
,
j
)
=
-
DBL_MAX
;
else
transition_p
.
at
<
double
>
(
i
,
j
)
=
log
(
transition_p
.
at
<
double
>
(
i
,
j
));
}
}
for
(
size_t
j
=
i
+
1
;
j
<
recognition_probabilities
.
size
();
j
++
)
{
beamSearch_node
node
;
node
.
segmentation
.
push_back
((
int
)
i
);
node
.
segmentation
.
push_back
((
int
)
j
);
node
.
score
=
score_segmentation
(
node
.
segmentation
,
out_sequence
);
vector
<
vector
<
int
>
>
childs
=
generate_childs
(
node
.
segmentation
);
node
.
expanded
=
true
;
set
<
unsigned
long
long
int
>
visited_nodes
;
//TODO make it member of class
beam
.
push_back
(
node
);
vector
<
int
>
start_segmentation
;
start_segmentation
.
push_back
(
oversegmentation
[
0
]);
start_segmentation
.
push_back
(
oversegmentation
[
oversegmentation
.
size
()
-
1
]);
if
(
!
childs
.
empty
())
update_beam
(
childs
);
vector
<
pair
<
double
,
vector
<
int
>
>
>
beam
;
beam
.
push_back
(
pair
<
double
,
vector
<
int
>
>
(
score_segmentation
(
start_segmentation
,
recognition_probabilities
,
out_sequence
),
start_segmentation
)
);
generated_chids
+=
(
int
)
childs
.
size
();
vector
<
vector
<
int
>
>
childs
=
generate_childs
(
start_segmentation
,
oversegmentation
,
visited_nodes
);
if
(
!
childs
.
empty
())
update_beam
(
beam
,
childs
,
recognition_probabilities
);
//cout << "beam size " << beam.size() << " best score " << beam[0].first<< endl;
}
}
int
generated_chids
=
(
int
)
childs
.
size
();
while
(
generated_chids
!=
0
)
{
generated_chids
=
0
;
vector
<
pair
<
double
,
vector
<
int
>
>
>
old_beam
=
beam
;
for
(
size_t
i
=
0
;
i
<
old_
beam
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
beam
.
size
();
i
++
)
{
childs
=
generate_childs
(
old_beam
[
i
].
second
,
oversegmentation
,
visited_nodes
);
vector
<
vector
<
int
>
>
childs
;
if
(
!
beam
[
i
].
expanded
)
{
childs
=
generate_childs
(
beam
[
i
].
segmentation
);
beam
[
i
].
expanded
=
true
;
}
if
(
!
childs
.
empty
())
update_beam
(
beam
,
childs
,
recognition_probabilities
);
update_beam
(
childs
);
generated_chids
+=
(
int
)
childs
.
size
();
}
//cout << "beam size " << beam.size() << " best score " << beam[0].first << endl;
}
// Done! Get the best prediction found into out_sequence
double
lp
=
score_segmentation
(
beam
[
0
].
segmentation
,
out_sequence
);
// FINISHED ! Get the best prediction found into out_sequence
score_segmentation
(
beam
[
0
].
second
,
recognition_probabilities
,
out_sequence
);
// TODO fill other output parameters
// fill other (dummy) output parameters
component_rects
->
push_back
(
Rect
(
0
,
0
,
src
.
cols
,
src
.
rows
));
component_texts
->
push_back
(
out_sequence
);
component_confidences
->
push_back
((
float
)
exp
(
lp
));
return
;
}
void
run
(
Mat
&
src
,
Mat
&
mask
,
string
&
out_sequence
,
vector
<
Rect
>*
component_rects
,
vector
<
string
>*
component_texts
,
vector
<
float
>*
component_confidences
,
int
component_level
)
{
CV_Assert
(
mask
.
type
()
==
CV_8UC1
);
// Nothing to do with a mask here. We do slidding window anyway.
run
(
src
,
out_sequence
,
component_rects
,
component_texts
,
component_confidences
,
component_level
);
}
private
:
int
win_size
;
int
step_size
;
////////////////////////////////////////////////////////////
vector
<
beamSearch_node
>
beam
;
vector
<
vector
<
double
>
>
recognition_probabilities
;
vector
<
int
>
oversegmentation
;
// TODO the way we expand nodes makes the recognition score heuristic not monotonic
// it should start from left node 0 and grow always to the right.
vector
<
vector
<
int
>
>
generate_childs
(
vector
<
int
>
&
segmentation
,
vector
<
int
>
&
oversegmentation
,
set
<
unsigned
long
long
int
>
&
visited_nodes
)
vector
<
vector
<
int
>
>
generate_childs
(
vector
<
int
>
&
segmentation
)
{
/*cout << " generate childs for [";
for (size_t i = 0 ; i < segmentation .size(); i++)
cout << segmentation[i] << ",";
cout << "] ";*/
vector
<
vector
<
int
>
>
childs
;
for
(
size_t
i
=
0
;
i
<
oversegmentation
.
size
();
i
++
)
for
(
size_t
i
=
segmentation
[
segmentation
.
size
()
-
1
]
+
1
;
i
<
oversegmentation
.
size
();
i
++
)
{
int
seg_point
=
oversegmentation
[
i
]
;
int
seg_point
=
(
int
)
i
;
if
(
find
(
segmentation
.
begin
(),
segmentation
.
end
(),
seg_point
)
==
segmentation
.
end
())
{
//cout << seg_point << " " ;
vector
<
int
>
child
=
segmentation
;
child
.
push_back
(
seg_point
);
sort
(
child
.
begin
(),
child
.
end
());
unsigned
long
long
int
key
=
0
;
for
(
size_t
j
=
0
;
j
<
child
.
size
();
j
++
)
{
key
+=
(
unsigned
long
long
int
)
pow
(
2
,
oversegmentation
.
size
()
-
(
oversegmentation
.
end
()
-
find
(
oversegmentation
.
begin
(),
oversegmentation
.
end
(),
child
[
j
])));
}
//if (!visited_nodes[key])
if
(
visited_nodes
.
find
(
key
)
==
visited_nodes
.
end
())
{
childs
.
push_back
(
child
);
//visited_nodes[key] = true;
visited_nodes
.
insert
(
key
);
}
childs
.
push_back
(
child
);
}
}
//cout << endl;
return
childs
;
}
////////////////////////////////////////////////////////////
//TODO shall the beam itself be a member of the class?
void
update_beam
(
vector
<
pair
<
double
,
vector
<
int
>
>
>
&
beam
,
vector
<
vector
<
int
>
>
&
childs
,
vector
<
vector
<
double
>
>
&
recognition_probabilities
)
void
update_beam
(
vector
<
vector
<
int
>
>
&
childs
)
{
string
out_sequence
;
double
min_score
=
-
DBL_MAX
;
//min score value to be part of the beam
if
((
int
)
beam
.
size
()
==
beam_size
)
min_score
=
beam
[
beam
.
size
()
-
1
].
first
;
//last element has the lowest score
if
((
int
)
beam
.
size
()
>=
beam_size
)
min_score
=
beam
[
beam_size
-
1
].
score
;
//last element has the lowest score
for
(
size_t
i
=
0
;
i
<
childs
.
size
();
i
++
)
{
double
score
=
score_segmentation
(
childs
[
i
],
recognition_probabilities
,
out_sequence
);
double
score
=
score_segmentation
(
childs
[
i
],
out_sequence
);
if
(
score
>
min_score
)
{
beam
.
push_back
(
pair
<
double
,
vector
<
int
>
>
(
score
,
childs
[
i
]));
beamSearch_node
node
;
node
.
score
=
score
;
node
.
segmentation
=
childs
[
i
];
node
.
expanded
=
false
;
beam
.
push_back
(
node
);
sort
(
beam
.
begin
(),
beam
.
end
(),
beam_sort_function
);
if
((
int
)
beam
.
size
()
>
beam_size
)
{
beam
.
pop_back
(
);
min_score
=
beam
[
beam
.
size
()
-
1
].
first
;
beam
.
erase
(
beam
.
begin
()
+
beam_size
,
beam
.
end
()
);
min_score
=
beam
[
beam
.
size
()
-
1
].
score
;
}
}
}
}
////////////////////////////////////////////////////////////
// TODO Add heuristics to the score function (see PhotoOCR paper)
// e.g.: in some cases we discard a segmentation because it includes a very large character
// in other cases we do it because the overlapping between two chars is too large
// etc.
double
score_segmentation
(
vector
<
int
>
&
segmentation
,
vector
<
vector
<
double
>
>
&
observations
,
string
&
outstring
)
double
score_segmentation
(
vector
<
int
>
&
segmentation
,
string
&
outstring
)
{
//TODO This must be extracted from dictionary
// Score Heuristics:
// No need to use Viterbi to know a given segmentation is bad
// e.g.: in some cases we discard a segmentation because it includes a very large character
// in other cases we do it because the overlapping between two chars is too large
// TODO Add more heuristics (e.g. penalize large inter-character variance)
Mat
interdist
((
int
)
segmentation
.
size
()
-
1
,
1
,
CV_32F
,
1
);
for
(
size_t
i
=
0
;
i
<
segmentation
.
size
()
-
1
;
i
++
)
{
interdist
.
at
<
float
>
((
int
)
i
,
0
)
=
(
float
)
oversegmentation
[
segmentation
[(
int
)
i
+
1
]]
*
step_size
-
(
float
)
oversegmentation
[
segmentation
[(
int
)
i
]]
*
step_size
;
if
((
float
)
interdist
.
at
<
float
>
((
int
)
i
,
0
)
/
win_size
>
2.25
)
// TODO explain how did you set this thrs
{
return
-
DBL_MAX
;
}
if
((
float
)
interdist
.
at
<
float
>
((
int
)
i
,
0
)
/
win_size
<
0.15
)
// TODO explain how did you set this thrs
{
return
-
DBL_MAX
;
}
}
Scalar
m
,
std
;
meanStdDev
(
interdist
,
m
,
std
);
//double interdist_std = std[0];
//TODO Extracting start probs from lexicon (if we have it) may boost accuracy!
vector
<
double
>
start_p
(
vocabulary
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vocabulary
.
size
();
i
++
)
start_p
[
i
]
=
log
(
1.0
/
vocabulary
.
size
());
Mat
V
=
Mat
::
ones
((
int
)
segmentation
.
size
()
-
1
,(
int
)
vocabulary
.
size
(),
CV_64FC1
);
Mat
V
=
Mat
::
ones
((
int
)
segmentation
.
size
(),(
int
)
vocabulary
.
size
(),
CV_64FC1
);
V
=
V
*
-
DBL_MAX
;
vector
<
string
>
path
(
vocabulary
.
size
());
// Initialize base cases (t == 0)
for
(
int
i
=
0
;
i
<
(
int
)
vocabulary
.
size
();
i
++
)
{
V
.
at
<
double
>
(
0
,
i
)
=
start_p
[
i
]
+
observations
[
segmentation
[
1
]
-
1
][
i
];
V
.
at
<
double
>
(
0
,
i
)
=
start_p
[
i
]
+
recognition_probabilities
[
segmentation
[
0
]
][
i
];
path
[
i
]
=
vocabulary
.
at
(
i
);
}
// Run Viterbi for t > 0
for
(
int
t
=
1
;
t
<
(
int
)
segmentation
.
size
()
-
1
;
t
++
)
for
(
int
t
=
1
;
t
<
(
int
)
segmentation
.
size
();
t
++
)
{
vector
<
string
>
newpath
(
vocabulary
.
size
());
...
...
@@ -352,7 +416,7 @@ private:
int
best_idx
=
0
;
for
(
int
j
=
0
;
j
<
(
int
)
vocabulary
.
size
();
j
++
)
{
double
prob
=
V
.
at
<
double
>
(
t
-
1
,
j
)
+
transition_p
.
at
<
double
>
(
j
,
i
)
+
observations
[
segmentation
[
t
+
1
]
-
1
][
i
];
double
prob
=
V
.
at
<
double
>
(
t
-
1
,
j
)
+
transition_p
.
at
<
double
>
(
j
,
i
)
+
recognition_probabilities
[
segmentation
[
t
]
][
i
];
if
(
prob
>
max_prob
)
{
max_prob
=
prob
;
...
...
@@ -372,7 +436,7 @@ private:
int
best_idx
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
vocabulary
.
size
();
i
++
)
{
double
prob
=
V
.
at
<
double
>
((
int
)
segmentation
.
size
()
-
2
,
i
);
double
prob
=
V
.
at
<
double
>
((
int
)
segmentation
.
size
()
-
1
,
i
);
if
(
prob
>
max_prob
)
{
max_prob
=
prob
;
...
...
@@ -380,9 +444,8 @@ private:
}
}
//cout << " score " << max_prob / (segmentation.size()-1) << " " << path[best_idx] << endl;
outstring
=
path
[
best_idx
];
return
max_prob
/
(
segmentation
.
size
()
-
1
);
return
(
max_prob
/
(
segmentation
.
size
()
-
1
)
);
}
};
...
...
@@ -408,21 +471,24 @@ public:
void
eval
(
InputArray
src
,
vector
<
vector
<
double
>
>&
recognition_probabilities
,
vector
<
int
>&
oversegmentation
);
int
getWindowSize
()
{
return
window_size
;}
int
getStepSize
()
{
return
step_size
;}
void
setStepSize
(
int
_step_size
)
{
step_size
=
_step_size
;}
protected
:
void
normalizeAndZCA
(
Mat
&
patches
);
double
eval_feature
(
Mat
&
feature
,
double
*
prob_estimates
);
private
:
//TODO implement getters/setters for some of these members (if apply)
int
nr_class
;
// number of classes
int
window_size
;
// window size
int
step_size
;
// sliding window step
int
nr_class
;
// number of classes
int
nr_feature
;
// number of features
Mat
feature_min
;
// scale range
Mat
feature_max
;
Mat
weights
;
// Logistic Regression weights
Mat
kernels
;
// CNN kernels
Mat
M
,
P
;
// ZCA Whitening parameters
int
step_size
;
// sliding window step
int
window_size
;
// window size
int
quad_size
;
int
patch_size
;
int
num_quads
;
// extract 25 quads (12x12) from each image
...
...
@@ -449,26 +515,15 @@ OCRBeamSearchClassifierCNN::OCRBeamSearchClassifierCNN (const string& filename)
else
CV_Error
(
Error
::
StsBadArg
,
"Default classifier data file not found!"
);
// check all matrix dimensions match correctly and no one is empty
CV_Assert
(
(
M
.
cols
>
0
)
&&
(
M
.
rows
>
0
)
);
CV_Assert
(
(
P
.
cols
>
0
)
&&
(
P
.
rows
>
0
)
);
CV_Assert
(
(
kernels
.
cols
>
0
)
&&
(
kernels
.
rows
>
0
)
);
CV_Assert
(
(
weights
.
cols
>
0
)
&&
(
weights
.
rows
>
0
)
);
CV_Assert
(
(
feature_min
.
cols
>
0
)
&&
(
feature_min
.
rows
>
0
)
);
CV_Assert
(
(
feature_max
.
cols
>
0
)
&&
(
feature_max
.
rows
>
0
)
);
nr_feature
=
weights
.
rows
;
nr_class
=
weights
.
cols
;
nr_feature
=
weights
.
rows
;
nr_class
=
weights
.
cols
;
patch_size
=
(
int
)
sqrt
(
kernels
.
cols
);
// algorithm internal parameters
window_size
=
32
;
window_size
=
4
*
patch_size
;
step_size
=
4
;
quad_size
=
12
;
num_quads
=
25
;
num_tiles
=
25
;
alpha
=
0.5
;
step_size
=
4
;
// TODO showld this be a parameter for the user?
alpha
=
0.5
;
// used in non-linear activation function z = max(0, |D*a| - alpha)
}
void
OCRBeamSearchClassifierCNN
::
eval
(
InputArray
_src
,
vector
<
vector
<
double
>
>&
recognition_probabilities
,
vector
<
int
>&
oversegmentation
)
...
...
@@ -493,7 +548,6 @@ void OCRBeamSearchClassifierCNN::eval( InputArray _src, vector< vector<double> >
resize
(
src
,
src
,
Size
(
window_size
*
src
.
cols
/
src
.
rows
,
window_size
));
int
seg_points
=
0
;
oversegmentation
.
push_back
(
seg_points
);
Mat
quad
;
Mat
tmp
;
...
...
@@ -584,19 +638,17 @@ void OCRBeamSearchClassifierCNN::eval( InputArray _src, vector< vector<double> >
double
*
p
=
new
double
[
nr_class
];
double
predict_label
=
eval_feature
(
feature
,
p
);
//cout << " Prediction: " << vocabulary[predict_label] << " with probability " << p[0] << endl;
if
(
predict_label
<
0
)
CV_Error
(
Error
::
StsInternal
,
"OCRBeamSearchClassifierCNN::eval Error: unexpected prediction in eval_feature()"
);
if
(
(
predict_label
<
0
)
||
(
predict_label
>
nr_class
)
)
CV_Error
(
Error
::
StsOutOfRange
,
"OCRBeamSearchClassifierCNN::eval Error: unexpected prediction in eval_feature()"
);
seg_points
++
;
oversegmentation
.
push_back
(
seg_points
);
vector
<
double
>
recognition_p
(
p
,
p
+
nr_class
*
sizeof
(
double
));
recognition_probabilities
.
push_back
(
recognition_p
);
vector
<
double
>
recognition_p
(
p
,
p
+
nr_class
);
recognition_probabilities
.
push_back
(
recognition_p
);
oversegmentation
.
push_back
(
seg_points
);
seg_points
++
;
}
}
// normalize for contrast and apply ZCA whitening to a set of image patches
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment