Merge pull request #16488 from vpisarev:filestorage_longlines

trying to fix handling file storages with extremely long lines

* trying to fix handling of file storages with extremely long lines: https://github.com/opencv/opencv/issues/11061

* * fixed errorneous pointer access in JSON parser.
* it's now crash-test time! temporarily set the initial parser buffer size to just 40 bytes. let's run all the test and check if the buffer is always correctly resized and handled

* fixed pointer use in JSON parser; added the proper test to catch this case

* fixed the test to make it more challenging. generate test json with
*
**
***
etc. shape
pull/16574/head^2
Vadim Pisarevsky 5 years ago committed by GitHub
parent aa2777ed61
commit 3efa78311a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 115
      modules/core/src/persistence.cpp
  2. 12
      modules/core/src/persistence_json.cpp
  3. 42
      modules/core/test/test_io.cpp

@ -606,12 +606,11 @@ public:
int last_occurrence = -1;
xml_buf_size = MIN(xml_buf_size, int(file_size));
fseek( file, -xml_buf_size, SEEK_END );
std::vector<char> xml_buf_(xml_buf_size+2);
// find the last occurrence of </opencv_storage>
for(;;)
{
int line_offset = (int)ftell( file );
const char* ptr0 = this->gets(&xml_buf_[0], xml_buf_size );
const char* ptr0 = this->gets(xml_buf_size);
const char* ptr = NULL;
if( !ptr0 )
break;
@ -693,8 +692,8 @@ public:
}
else
{
const size_t buf_size0 = 1 << 20;
size_t buf_size = buf_size0;
const size_t buf_size0 = 40;
buffer.resize(buf_size0);
if( mem_mode )
{
strbuf = (char*)filename_or_buf;
@ -704,8 +703,7 @@ public:
const char* yaml_signature = "%YAML";
const char* json_signature = "{";
const char* xml_signature = "<?xml";
char buf[16];
this->gets( buf, sizeof(buf)-2 );
char* buf = this->gets(16);
char* bufPtr = cv_skip_BOM(buf);
size_t bufOffset = bufPtr - buf;
@ -720,23 +718,8 @@ public:
else
CV_Error(CV_BADARG_ERR, "Unsupported file storage format");
if( !isGZ )
{
if( !mem_mode )
{
fseek( file, 0, SEEK_END );
buf_size = ftell( file );
}
else
buf_size = strbufsize;
buf_size = MIN( buf_size, buf_size0 );
buf_size = MAX( buf_size, (size_t)(CV_FS_MAX_LEN*6 + 1024) );
}
rewind();
strbufpos = bufOffset;
buffer.reserve(buf_size + 256);
buffer.resize(buf_size);
bufofs = 0;
try
@ -809,56 +792,70 @@ public:
CV_Error( CV_StsError, "The storage is not opened" );
}
char* gets( char* str, int maxCount )
char* getsFromFile( char* buf, int count )
{
if( file )
return fgets( buf, count, file );
#if USE_ZLIB
if( gzfile )
return gzgets( gzfile, buf, count );
#endif
CV_Error(CV_StsError, "The storage is not opened");
}
char* gets( size_t maxCount )
{
if( strbuf )
{
size_t i = strbufpos, len = strbufsize;
int j = 0;
const char* instr = strbuf;
while( i < len && j < maxCount-1 )
for( ; i < len; i++ )
{
char c = instr[i++];
if( c == '\0' )
break;
str[j++] = c;
if( c == '\n' )
char c = instr[i];
if( c == '\0' || c == '\n' )
{
if( c == '\n' )
i++;
break;
}
}
str[j++] = '\0';
size_t count = i - strbufpos;
if( maxCount == 0 || maxCount > count )
maxCount = count;
buffer.resize(std::max(buffer.size(), maxCount + 8));
memcpy(&buffer[0], instr + strbufpos, maxCount);
buffer[maxCount] = '\0';
strbufpos = i;
if (maxCount > 256 && !(flags & cv::FileStorage::BASE64))
CV_Assert(j < maxCount - 1 && "OpenCV persistence doesn't support very long lines");
return j > 1 ? str : 0;
return maxCount > 0 ? &buffer[0] : 0;
}
if( file )
{
char* ptr = fgets( str, maxCount, file );
if (ptr && maxCount > 256 && !(flags & cv::FileStorage::BASE64))
{
size_t sz = strnlen(ptr, maxCount);
CV_Assert(sz < (size_t)(maxCount - 1) && "OpenCV persistence doesn't support very long lines");
}
return ptr;
}
#if USE_ZLIB
if( gzfile )
const size_t MAX_BLOCK_SIZE = INT_MAX/2; // hopefully, that will be enough
if( maxCount == 0 )
maxCount = MAX_BLOCK_SIZE;
else
CV_Assert(maxCount < MAX_BLOCK_SIZE);
size_t ofs = 0;
for(;;)
{
char* ptr = gzgets( gzfile, str, maxCount );
if (ptr && maxCount > 256 && !(flags & cv::FileStorage::BASE64))
{
size_t sz = strnlen(ptr, maxCount);
CV_Assert(sz < (size_t)(maxCount - 1) && "OpenCV persistence doesn't support very long lines");
}
return ptr;
int count = (int)std::min(buffer.size() - ofs - 16, maxCount);
char* ptr = getsFromFile( &buffer[ofs], count+1 );
if( !ptr )
break;
int delta = (int)strlen(ptr);
ofs += delta;
maxCount -= delta;
if( ptr[delta-1] == '\n' || maxCount == 0 )
break;
if( delta == count )
buffer.resize((size_t)(buffer.size()*1.5));
}
#endif
CV_Error(CV_StsError, "The storage is not opened");
return ofs > 0 ? &buffer[0] : 0;
}
char* gets()
{
char* ptr = this->gets(bufferStart(), (int)(bufferEnd() - bufferStart()));
char* ptr = this->gets(0);
if( !ptr )
{
ptr = bufferStart(); // FIXIT Why do we need this hack? What is about other parsers JSON/YAML?
@ -868,10 +865,12 @@ public:
}
else
{
FileStorage_API* fs = this;
int l = (int)strlen(ptr);
size_t l = strlen(ptr);
if( l > 0 && ptr[l-1] != '\n' && ptr[l-1] != '\r' && !eof() )
CV_PARSE_ERROR_CPP("Too long string or a last string w/o newline");
{
ptr[l] = '\n';
ptr[l+1] = '\0';
}
}
lineno++;
return ptr;

@ -411,7 +411,10 @@ public:
if( *ptr != '"' )
CV_PARSE_ERROR_CPP( "Key must end with \'\"\'" );
const char * end = ptr;
if( ptr == beg )
CV_PARSE_ERROR_CPP( "Key is empty" );
value_placeholder = fs->addNode(collection, std::string(beg, (size_t)(ptr - beg)), FileNode::NONE);
ptr++;
ptr = skipSpaces( ptr );
if( !ptr || !*ptr )
@ -420,11 +423,6 @@ public:
if( *ptr != ':' )
CV_PARSE_ERROR_CPP( "Missing \':\' between key and value" );
/* [beg, end) */
if( end <= beg )
CV_PARSE_ERROR_CPP( "Key is empty" );
value_placeholder = fs->addNode(collection, std::string(beg, (size_t)(end - beg)), FileNode::NONE);
return ++ptr;
}
@ -768,8 +766,6 @@ public:
CV_PARSE_ERROR_CPP( "left-brace of top level is missing" );
}
if( !ptr || !*ptr )
CV_PARSE_ERROR_CPP( "Unexpected End-Of-File" );
return true;
}

@ -1669,4 +1669,46 @@ TEST(Core_InputOutput, FileStorage_YAML_parse_multiple_documents)
ASSERT_EQ(0, std::remove(filename.c_str()));
}
TEST(Core_InputOutput, FileStorage_JSON_VeryLongLines)
{
for( int iter = 0; iter < 2; iter++ )
{
std::string temp_path = cv::tempfile("temp.json");
{
std::ofstream ofs(temp_path);
ofs << "{ ";
int prev_len = 0, start = 0;
for (int i = 0; i < 52500; i++)
{
std::string str = cv::format("\"KEY%d\"", i);
ofs << str;
if(iter == 1 && i - start > prev_len)
{
// build a stairway with increasing text row width
ofs << "\n";
prev_len = i - start;
start = i;
}
str = cv::format(": \"VALUE%d\", ", i);
ofs << str;
}
ofs << "}";
}
{
cv::FileStorage fs(temp_path, cv::FileStorage::READ);
char key[16], val0[16];
std::string val;
for(int i = 0; i < 52500; i += 100)
{
sprintf(key, "KEY%d", i);
sprintf(val0, "VALUE%d", i);
fs[key] >> val;
ASSERT_EQ(val, val0);
}
}
remove(temp_path.c_str());
}
}
}} // namespace

Loading…
Cancel
Save