Unverified Commit 3efa7831 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky Committed by GitHub

Merge pull request #16488 from vpisarev:filestorage_longlines

trying to fix handling file storages with extremely long lines

* trying to fix handling of file storages with extremely long lines: https://github.com/opencv/opencv/issues/11061

* * fixed errorneous pointer access in JSON parser.
* it's now crash-test time! temporarily set the initial parser buffer size to just 40 bytes. let's run all the test and check if the buffer is always correctly resized and handled

* fixed pointer use in JSON parser; added the proper test to catch this case

* fixed the test to make it more challenging. generate test json with
*
**
***
etc. shape
parent aa2777ed
...@@ -606,12 +606,11 @@ public: ...@@ -606,12 +606,11 @@ public:
int last_occurrence = -1; int last_occurrence = -1;
xml_buf_size = MIN(xml_buf_size, int(file_size)); xml_buf_size = MIN(xml_buf_size, int(file_size));
fseek( file, -xml_buf_size, SEEK_END ); fseek( file, -xml_buf_size, SEEK_END );
std::vector<char> xml_buf_(xml_buf_size+2);
// find the last occurrence of </opencv_storage> // find the last occurrence of </opencv_storage>
for(;;) for(;;)
{ {
int line_offset = (int)ftell( file ); int line_offset = (int)ftell( file );
const char* ptr0 = this->gets(&xml_buf_[0], xml_buf_size ); const char* ptr0 = this->gets(xml_buf_size);
const char* ptr = NULL; const char* ptr = NULL;
if( !ptr0 ) if( !ptr0 )
break; break;
...@@ -693,8 +692,8 @@ public: ...@@ -693,8 +692,8 @@ public:
} }
else else
{ {
const size_t buf_size0 = 1 << 20; const size_t buf_size0 = 40;
size_t buf_size = buf_size0; buffer.resize(buf_size0);
if( mem_mode ) if( mem_mode )
{ {
strbuf = (char*)filename_or_buf; strbuf = (char*)filename_or_buf;
...@@ -704,8 +703,7 @@ public: ...@@ -704,8 +703,7 @@ public:
const char* yaml_signature = "%YAML"; const char* yaml_signature = "%YAML";
const char* json_signature = "{"; const char* json_signature = "{";
const char* xml_signature = "<?xml"; const char* xml_signature = "<?xml";
char buf[16]; char* buf = this->gets(16);
this->gets( buf, sizeof(buf)-2 );
char* bufPtr = cv_skip_BOM(buf); char* bufPtr = cv_skip_BOM(buf);
size_t bufOffset = bufPtr - buf; size_t bufOffset = bufPtr - buf;
...@@ -720,23 +718,8 @@ public: ...@@ -720,23 +718,8 @@ public:
else else
CV_Error(CV_BADARG_ERR, "Unsupported file storage format"); CV_Error(CV_BADARG_ERR, "Unsupported file storage format");
if( !isGZ )
{
if( !mem_mode )
{
fseek( file, 0, SEEK_END );
buf_size = ftell( file );
}
else
buf_size = strbufsize;
buf_size = MIN( buf_size, buf_size0 );
buf_size = MAX( buf_size, (size_t)(CV_FS_MAX_LEN*6 + 1024) );
}
rewind(); rewind();
strbufpos = bufOffset; strbufpos = bufOffset;
buffer.reserve(buf_size + 256);
buffer.resize(buf_size);
bufofs = 0; bufofs = 0;
try try
...@@ -809,56 +792,70 @@ public: ...@@ -809,56 +792,70 @@ public:
CV_Error( CV_StsError, "The storage is not opened" ); CV_Error( CV_StsError, "The storage is not opened" );
} }
char* gets( char* str, int maxCount ) char* getsFromFile( char* buf, int count )
{
if( file )
return fgets( buf, count, file );
#if USE_ZLIB
if( gzfile )
return gzgets( gzfile, buf, count );
#endif
CV_Error(CV_StsError, "The storage is not opened");
}
char* gets( size_t maxCount )
{ {
if( strbuf ) if( strbuf )
{ {
size_t i = strbufpos, len = strbufsize; size_t i = strbufpos, len = strbufsize;
int j = 0;
const char* instr = strbuf; const char* instr = strbuf;
while( i < len && j < maxCount-1 ) for( ; i < len; i++ )
{ {
char c = instr[i++]; char c = instr[i];
if( c == '\0' ) if( c == '\0' || c == '\n' )
break; {
str[j++] = c; if( c == '\n' )
if( c == '\n' ) i++;
break; break;
}
} }
str[j++] = '\0'; size_t count = i - strbufpos;
if( maxCount == 0 || maxCount > count )
maxCount = count;
buffer.resize(std::max(buffer.size(), maxCount + 8));
memcpy(&buffer[0], instr + strbufpos, maxCount);
buffer[maxCount] = '\0';
strbufpos = i; strbufpos = i;
if (maxCount > 256 && !(flags & cv::FileStorage::BASE64)) return maxCount > 0 ? &buffer[0] : 0;
CV_Assert(j < maxCount - 1 && "OpenCV persistence doesn't support very long lines");
return j > 1 ? str : 0;
} }
if( file )
{ const size_t MAX_BLOCK_SIZE = INT_MAX/2; // hopefully, that will be enough
char* ptr = fgets( str, maxCount, file ); if( maxCount == 0 )
if (ptr && maxCount > 256 && !(flags & cv::FileStorage::BASE64)) maxCount = MAX_BLOCK_SIZE;
{ else
size_t sz = strnlen(ptr, maxCount); CV_Assert(maxCount < MAX_BLOCK_SIZE);
CV_Assert(sz < (size_t)(maxCount - 1) && "OpenCV persistence doesn't support very long lines"); size_t ofs = 0;
}
return ptr; for(;;)
}
#if USE_ZLIB
if( gzfile )
{ {
char* ptr = gzgets( gzfile, str, maxCount ); int count = (int)std::min(buffer.size() - ofs - 16, maxCount);
if (ptr && maxCount > 256 && !(flags & cv::FileStorage::BASE64)) char* ptr = getsFromFile( &buffer[ofs], count+1 );
{ if( !ptr )
size_t sz = strnlen(ptr, maxCount); break;
CV_Assert(sz < (size_t)(maxCount - 1) && "OpenCV persistence doesn't support very long lines"); int delta = (int)strlen(ptr);
} ofs += delta;
return ptr; maxCount -= delta;
if( ptr[delta-1] == '\n' || maxCount == 0 )
break;
if( delta == count )
buffer.resize((size_t)(buffer.size()*1.5));
} }
#endif return ofs > 0 ? &buffer[0] : 0;
CV_Error(CV_StsError, "The storage is not opened");
} }
char* gets() char* gets()
{ {
char* ptr = this->gets(bufferStart(), (int)(bufferEnd() - bufferStart())); char* ptr = this->gets(0);
if( !ptr ) if( !ptr )
{ {
ptr = bufferStart(); // FIXIT Why do we need this hack? What is about other parsers JSON/YAML? ptr = bufferStart(); // FIXIT Why do we need this hack? What is about other parsers JSON/YAML?
...@@ -868,10 +865,12 @@ public: ...@@ -868,10 +865,12 @@ public:
} }
else else
{ {
FileStorage_API* fs = this; size_t l = strlen(ptr);
int l = (int)strlen(ptr);
if( l > 0 && ptr[l-1] != '\n' && ptr[l-1] != '\r' && !eof() ) if( l > 0 && ptr[l-1] != '\n' && ptr[l-1] != '\r' && !eof() )
CV_PARSE_ERROR_CPP("Too long string or a last string w/o newline"); {
ptr[l] = '\n';
ptr[l+1] = '\0';
}
} }
lineno++; lineno++;
return ptr; return ptr;
......
...@@ -411,7 +411,10 @@ public: ...@@ -411,7 +411,10 @@ public:
if( *ptr != '"' ) if( *ptr != '"' )
CV_PARSE_ERROR_CPP( "Key must end with \'\"\'" ); CV_PARSE_ERROR_CPP( "Key must end with \'\"\'" );
const char * end = ptr; if( ptr == beg )
CV_PARSE_ERROR_CPP( "Key is empty" );
value_placeholder = fs->addNode(collection, std::string(beg, (size_t)(ptr - beg)), FileNode::NONE);
ptr++; ptr++;
ptr = skipSpaces( ptr ); ptr = skipSpaces( ptr );
if( !ptr || !*ptr ) if( !ptr || !*ptr )
...@@ -420,11 +423,6 @@ public: ...@@ -420,11 +423,6 @@ public:
if( *ptr != ':' ) if( *ptr != ':' )
CV_PARSE_ERROR_CPP( "Missing \':\' between key and value" ); CV_PARSE_ERROR_CPP( "Missing \':\' between key and value" );
/* [beg, end) */
if( end <= beg )
CV_PARSE_ERROR_CPP( "Key is empty" );
value_placeholder = fs->addNode(collection, std::string(beg, (size_t)(end - beg)), FileNode::NONE);
return ++ptr; return ++ptr;
} }
...@@ -768,8 +766,6 @@ public: ...@@ -768,8 +766,6 @@ public:
CV_PARSE_ERROR_CPP( "left-brace of top level is missing" ); CV_PARSE_ERROR_CPP( "left-brace of top level is missing" );
} }
if( !ptr || !*ptr )
CV_PARSE_ERROR_CPP( "Unexpected End-Of-File" );
return true; return true;
} }
......
...@@ -1669,4 +1669,46 @@ TEST(Core_InputOutput, FileStorage_YAML_parse_multiple_documents) ...@@ -1669,4 +1669,46 @@ TEST(Core_InputOutput, FileStorage_YAML_parse_multiple_documents)
ASSERT_EQ(0, std::remove(filename.c_str())); ASSERT_EQ(0, std::remove(filename.c_str()));
} }
TEST(Core_InputOutput, FileStorage_JSON_VeryLongLines)
{
for( int iter = 0; iter < 2; iter++ )
{
std::string temp_path = cv::tempfile("temp.json");
{
std::ofstream ofs(temp_path);
ofs << "{ ";
int prev_len = 0, start = 0;
for (int i = 0; i < 52500; i++)
{
std::string str = cv::format("\"KEY%d\"", i);
ofs << str;
if(iter == 1 && i - start > prev_len)
{
// build a stairway with increasing text row width
ofs << "\n";
prev_len = i - start;
start = i;
}
str = cv::format(": \"VALUE%d\", ", i);
ofs << str;
}
ofs << "}";
}
{
cv::FileStorage fs(temp_path, cv::FileStorage::READ);
char key[16], val0[16];
std::string val;
for(int i = 0; i < 52500; i += 100)
{
sprintf(key, "KEY%d", i);
sprintf(val0, "VALUE%d", i);
fs[key] >> val;
ASSERT_EQ(val, val0);
}
}
remove(temp_path.c_str());
}
}
}} // namespace }} // namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment