Merge pull request #395 from LeonidBeynenson:fix_ml_large_data_bug__2.4

pull/410/merge
Andrey Kamaev 12 years ago committed by OpenCV Buildbot
commit e2536f1c35
  1. 71
      apps/traincascade/boost.cpp
  2. 8
      modules/ml/include/opencv2/ml/ml.hpp
  3. 14
      modules/ml/src/boost.cpp
  4. 58
      modules/ml/src/ertrees.cpp
  5. 99
      modules/ml/src/tree.cpp

@ -360,7 +360,7 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
if (is_buf_16u)
{
unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
vi*sample_count + data_root->offset);
for( int i = 0; i < num_valid; i++ )
{
@ -373,7 +373,7 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
}
else
{
int* idst_idx = buf->data.i + root->buf_idx*buf->cols +
int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
vi*sample_count + root->offset;
for( int i = 0; i < num_valid; i++ )
{
@ -390,14 +390,14 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
const int* src_lbls = get_cv_labels(data_root, (int*)(uchar*)inn_buf);
if (is_buf_16u)
{
unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
(workVarCount-1)*sample_count + root->offset);
for( int i = 0; i < count; i++ )
udst[i] = (unsigned short)src_lbls[sidx[i]];
}
else
{
int* idst = buf->data.i + root->buf_idx*buf->cols +
int* idst = buf->data.i + root->buf_idx*get_length_subbuf() +
(workVarCount-1)*sample_count + root->offset;
for( int i = 0; i < count; i++ )
idst[i] = src_lbls[sidx[i]];
@ -407,14 +407,14 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
if (is_buf_16u)
{
unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
workVarCount*sample_count + root->offset);
for( int i = 0; i < count; i++ )
sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
}
else
{
int* sample_idx_dst = buf->data.i + root->buf_idx*buf->cols +
int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
workVarCount*sample_count + root->offset;
for( int i = 0; i < count; i++ )
sample_idx_dst[i] = sample_idx_src[sidx[i]];
@ -489,6 +489,10 @@ void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluat
int* idst = 0;
unsigned short* udst = 0;
uint64 effective_buf_size = 0;
int effective_buf_height = 0, effective_buf_width = 0;
clear();
shared = true;
have_labels = true;
@ -548,13 +552,28 @@ void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluat
var_type->data.i[var_count] = cat_var_count;
var_type->data.i[var_count+1] = cat_var_count+1;
work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
buf_size = (work_var_count + 1) * sample_count/*sample_indices*/;
buf_count = 2;
buf_size = -1; // the member buf_size is obsolete
effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
effective_buf_width = sample_count;
effective_buf_height = work_var_count+1;
if (effective_buf_width >= effective_buf_height)
effective_buf_height *= buf_count;
else
effective_buf_width *= buf_count;
if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
{
CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
}
if ( is_buf_16u )
buf = cvCreateMat( buf_count, buf_size, CV_16UC1 );
buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 );
else
buf = cvCreateMat( buf_count, buf_size, CV_32SC1 );
buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 );
cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
@ -609,7 +628,7 @@ void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluat
priors_mult = cvCloneMat( priors );
counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
direction = cvCreateMat( 1, sample_count, CV_8UC1 );
split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );
split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );//TODO: make a pointer
}
void CvCascadeBoostTrainData::free_train_data()
@ -652,10 +671,10 @@ void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* o
if ( vi < numPrecalcIdx )
{
if( !is_buf_16u )
*sortedIndices = buf->data.i + n->buf_idx*buf->cols + vi*sample_count + n->offset;
*sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset;
else
{
const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
vi*sample_count + n->offset );
for( int i = 0; i < nodeSampleCount; i++ )
sortedIndicesBuf[i] = shortIndices[i];
@ -1027,6 +1046,7 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
int newBufIdx = data->get_child_buf_idx( node );
int workVarCount = data->get_work_var_count();
CvMat* buf = data->buf;
size_t length_buf_row = data->get_length_subbuf();
cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
int* tempBuf = (int*)(uchar*)inn_buf;
bool splitInputData;
@ -1070,7 +1090,7 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
if (data->is_buf_16u)
{
ushort *ldst, *rdst;
ldst = (ushort*)(buf->data.s + left->buf_idx*buf->cols +
ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row +
vi*scount + left->offset);
rdst = (ushort*)(ldst + nl);
@ -1096,9 +1116,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
else
{
int *ldst, *rdst;
ldst = buf->data.i + left->buf_idx*buf->cols +
ldst = buf->data.i + left->buf_idx*length_buf_row +
vi*scount + left->offset;
rdst = buf->data.i + right->buf_idx*buf->cols +
rdst = buf->data.i + right->buf_idx*length_buf_row +
vi*scount + right->offset;
// split sorted
@ -1131,9 +1151,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
if (data->is_buf_16u)
{
unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
(workVarCount-1)*scount + left->offset);
unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
(workVarCount-1)*scount + right->offset);
for( int i = 0; i < n; i++ )
@ -1154,9 +1174,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
}
else
{
int *ldst = buf->data.i + left->buf_idx*buf->cols +
int *ldst = buf->data.i + left->buf_idx*length_buf_row +
(workVarCount-1)*scount + left->offset;
int *rdst = buf->data.i + right->buf_idx*buf->cols +
int *rdst = buf->data.i + right->buf_idx*length_buf_row +
(workVarCount-1)*scount + right->offset;
for( int i = 0; i < n; i++ )
@ -1184,9 +1204,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
if (data->is_buf_16u)
{
unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
workVarCount*scount + left->offset);
unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols +
unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
workVarCount*scount + right->offset);
for (int i = 0; i < n; i++)
{
@ -1205,9 +1225,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
}
else
{
int* ldst = buf->data.i + left->buf_idx*buf->cols +
int* ldst = buf->data.i + left->buf_idx*length_buf_row +
workVarCount*scount + left->offset;
int* rdst = buf->data.i + right->buf_idx*buf->cols +
int* rdst = buf->data.i + right->buf_idx*length_buf_row +
workVarCount*scount + right->offset;
for (int i = 0; i < n; i++)
{
@ -1352,6 +1372,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree )
sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
}
CvMat* buf = data->buf;
size_t length_buf_row = data->get_length_subbuf();
if( !tree ) // before training the first tree, initialize weights and other parameters
{
int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
@ -1375,7 +1396,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree )
if (data->is_buf_16u)
{
unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*buf->cols +
unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row +
data->data_root->offset + (data->work_var_count-1)*data->sample_count);
for( int i = 0; i < n; i++ )
{
@ -1393,7 +1414,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree )
}
else
{
int* labels = buf->data.i + data->data_root->buf_idx*buf->cols +
int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row +
data->data_root->offset + (data->work_var_count-1)*data->sample_count;
for( int i = 0; i < n; i++ )

@ -796,7 +796,7 @@ struct CV_EXPORTS CvDTreeTrainData
const CvMat* responses;
CvMat* responses_copy; // used in Boosting
int buf_count, buf_size;
int buf_count, buf_size; // buf_size is obsolete, please do not use it, use expression ((int64)buf->rows * (int64)buf->cols / buf_count) instead
bool shared;
int is_buf_16u;
@ -806,6 +806,12 @@ struct CV_EXPORTS CvDTreeTrainData
CvMat* counts;
CvMat* buf;
inline size_t get_length_subbuf() const
{
size_t res = (size_t)(work_var_count + 1) * (size_t)sample_count;
return res;
}
CvMat* direction;
CvMat* split_buf;

@ -1130,13 +1130,13 @@ CvBoost::update_weights( CvBoostTree* tree )
int *sample_idx_buf;
const int* sample_idx = 0;
cv::AutoBuffer<uchar> inn_buf;
size_t _buf_size = (params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? data->sample_count*sizeof(int) : 0;
size_t _buf_size = (params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? (size_t)(data->sample_count)*sizeof(int) : 0;
if( !tree )
_buf_size += n*sizeof(int);
else
{
if( have_subsample )
_buf_size += data->buf->cols*(sizeof(float)+sizeof(uchar));
_buf_size += data->get_length_subbuf()*(sizeof(float)+sizeof(uchar));
}
inn_buf.allocate(_buf_size);
uchar* cur_buf_pos = (uchar*)inn_buf;
@ -1151,6 +1151,7 @@ CvBoost::update_weights( CvBoostTree* tree )
sample_idx = data->get_sample_indices( data->data_root, sample_idx_buf );
}
CvMat* dtree_data_buf = data->buf;
size_t length_buf_row = data->get_length_subbuf();
if( !tree ) // before training the first tree, initialize weights and other parameters
{
int* class_labels_buf = (int*)cur_buf_pos;
@ -1189,7 +1190,7 @@ CvBoost::update_weights( CvBoostTree* tree )
if (data->is_buf_16u)
{
unsigned short* labels = (unsigned short*)(dtree_data_buf->data.s + data->data_root->buf_idx*dtree_data_buf->cols +
unsigned short* labels = (unsigned short*)(dtree_data_buf->data.s + data->data_root->buf_idx*length_buf_row +
data->data_root->offset + (data->work_var_count-1)*data->sample_count);
for( i = 0; i < n; i++ )
{
@ -1207,7 +1208,7 @@ CvBoost::update_weights( CvBoostTree* tree )
}
else
{
int* labels = dtree_data_buf->data.i + data->data_root->buf_idx*dtree_data_buf->cols +
int* labels = dtree_data_buf->data.i + data->data_root->buf_idx*length_buf_row +
data->data_root->offset + (data->work_var_count-1)*data->sample_count;
for( i = 0; i < n; i++ )
@ -1254,9 +1255,10 @@ CvBoost::update_weights( CvBoostTree* tree )
if( have_subsample )
{
float* values = (float*)cur_buf_pos;
cur_buf_pos = (uchar*)(values + data->buf->cols);
cur_buf_pos = (uchar*)(values + data->get_length_subbuf());
uchar* missing = cur_buf_pos;
cur_buf_pos = missing + data->buf->step;
cur_buf_pos = missing + data->get_length_subbuf() * (size_t)CV_ELEM_SIZE(data->buf->type);
CvMat _sample, _mask;
// invert the subsample mask

@ -80,6 +80,9 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
char err[100];
const int *sidx = 0, *vidx = 0;
uint64 effective_buf_size = 0;
int effective_buf_height = 0, effective_buf_width = 0;
if ( _params.use_surrogates )
CV_ERROR(CV_StsBadArg, "CvERTrees do not support surrogate splits");
@ -179,18 +182,34 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
have_labels = cv_n > 0 || (ord_var_count == 1 && cat_var_count == 0) || _add_labels;
work_var_count = cat_var_count + (is_classifier ? 1 : 0) + (have_labels ? 1 : 0);
buf_size = (work_var_count + 1)*sample_count;
shared = _shared;
buf_count = shared ? 2 : 1;
buf_size = -1; // the member buf_size is obsolete
effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
effective_buf_width = sample_count;
effective_buf_height = work_var_count+1;
if (effective_buf_width >= effective_buf_height)
effective_buf_height *= buf_count;
else
effective_buf_width *= buf_count;
if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
{
CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
}
if ( is_buf_16u )
{
CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_16UC1 ));
CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 ));
CV_CALL( pair16u32s_ptr = (CvPair16u32s*)cvAlloc( sample_count*sizeof(pair16u32s_ptr[0]) ));
}
else
{
CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_32SC1 ));
CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 ));
CV_CALL( int_ptr = (int**)cvAlloc( sample_count*sizeof(int_ptr[0]) ));
}
@ -293,13 +312,13 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
for( i = 0; i < sample_count; i++ )
{
int val = INT_MAX, si = sidx ? sidx[i] : i;
if( !mask || !mask[si*m_step] )
if( !mask || !mask[(size_t)si*m_step] )
{
if( idata )
val = idata[si*step];
val = idata[(size_t)si*step];
else
{
float t = fdata[si*step];
float t = fdata[(size_t)si*step];
val = cvRound(t);
if( val != t )
{
@ -405,12 +424,12 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
{
float val = ord_nan;
int si = sidx ? sidx[i] : i;
if( !mask || !mask[si*m_step] )
if( !mask || !mask[(size_t)si*m_step] )
{
if( idata )
val = (float)idata[si*step];
val = (float)idata[(size_t)si*step];
else
val = fdata[si*step];
val = fdata[(size_t)si*step];
if( fabs(val) >= ord_nan )
{
@ -578,9 +597,9 @@ const int* CvERTreeTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* cat
int ci = get_var_type( vi);
const int* cat_values = 0;
if( !is_buf_16u )
cat_values = buf->data.i + n->buf_idx*buf->cols + ci*sample_count + n->offset;
cat_values = buf->data.i + n->buf_idx*get_length_subbuf() + ci*sample_count + n->offset;
else {
const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
ci*sample_count + n->offset);
for( int i = 0; i < n->sample_count; i++ )
cat_values_buf[i] = short_values[i];
@ -1333,6 +1352,7 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
CvDTreeNode *left = 0, *right = 0;
int new_buf_idx = data->get_child_buf_idx( node );
CvMat* buf = data->buf;
size_t length_buf_row = data->get_length_subbuf();
cv::AutoBuffer<int> temp_buf(n);
complete_node_dir(node);
@ -1385,9 +1405,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
if (data->is_buf_16u)
{
unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
ci*scount + left->offset);
unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
ci*scount + right->offset);
for( i = 0; i < n; i++ )
@ -1415,9 +1435,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
}
else
{
int *ldst = buf->data.i + left->buf_idx*buf->cols +
int *ldst = buf->data.i + left->buf_idx*length_buf_row +
ci*scount + left->offset;
int *rdst = buf->data.i + right->buf_idx*buf->cols +
int *rdst = buf->data.i + right->buf_idx*length_buf_row +
ci*scount + right->offset;
for( i = 0; i < n; i++ )
@ -1460,9 +1480,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
if (data->is_buf_16u)
{
unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
pos*scount + left->offset);
unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols +
unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
pos*scount + right->offset);
for (i = 0; i < n; i++)
@ -1483,9 +1503,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
}
else
{
int* ldst = buf->data.i + left->buf_idx*buf->cols +
int* ldst = buf->data.i + left->buf_idx*length_buf_row +
pos*scount + left->offset;
int* rdst = buf->data.i + right->buf_idx*buf->cols +
int* rdst = buf->data.i + right->buf_idx*length_buf_row +
pos*scount + right->offset;
for (i = 0; i < n; i++)
{

@ -50,7 +50,8 @@ static const int block_size_delta = 1 << 10;
CvDTreeTrainData::CvDTreeTrainData()
{
var_idx = var_type = cat_count = cat_ofs = cat_map =
priors = priors_mult = counts = buf = direction = split_buf = responses_copy = 0;
priors = priors_mult = counts = direction = split_buf = responses_copy = 0;
buf = 0;
tree_storage = temp_storage = 0;
clear();
@ -64,7 +65,8 @@ CvDTreeTrainData::CvDTreeTrainData( const CvMat* _train_data, int _tflag,
bool _shared, bool _add_labels )
{
var_idx = var_type = cat_count = cat_ofs = cat_map =
priors = priors_mult = counts = buf = direction = split_buf = responses_copy = 0;
priors = priors_mult = counts = direction = split_buf = responses_copy = 0;
buf = 0;
tree_storage = temp_storage = 0;
@ -157,6 +159,9 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
char err[100];
const int *sidx = 0, *vidx = 0;
uint64 effective_buf_size = 0;
int effective_buf_height = 0, effective_buf_width = 0;
if( _update_data && data_root )
{
data = new CvDTreeTrainData( _train_data, _tflag, _responses, _var_idx,
@ -285,18 +290,35 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
work_var_count = var_count + (is_classifier ? 1 : 0) // for responses class_labels
+ (have_labels ? 1 : 0); // for cv_labels
buf_size = (work_var_count + 1 /*for sample_indices*/) * sample_count;
shared = _shared;
buf_count = shared ? 2 : 1;
buf_size = -1; // the member buf_size is obsolete
effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
effective_buf_width = sample_count;
effective_buf_height = work_var_count+1;
if (effective_buf_width >= effective_buf_height)
effective_buf_height *= buf_count;
else
effective_buf_width *= buf_count;
if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
{
CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
}
if ( is_buf_16u )
{
CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_16UC1 ));
CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 ));
CV_CALL( pair16u32s_ptr = (CvPair16u32s*)cvAlloc( sample_count*sizeof(pair16u32s_ptr[0]) ));
}
else
{
CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_32SC1 ));
CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 ));
CV_CALL( int_ptr = (int**)cvAlloc( sample_count*sizeof(int_ptr[0]) ));
}
@ -356,7 +378,7 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
{
int ci;
const uchar* mask = 0;
int m_step = 0, step;
int64 m_step = 0, step;
const int* idata = 0;
const float* fdata = 0;
int num_valid = 0;
@ -399,13 +421,13 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
for( i = 0; i < sample_count; i++ )
{
int val = INT_MAX, si = sidx ? sidx[i] : i;
if( !mask || !mask[si*m_step] )
if( !mask || !mask[(size_t)si*m_step] )
{
if( idata )
val = idata[si*step];
val = idata[(size_t)si*step];
else
{
float t = fdata[si*step];
float t = fdata[(size_t)si*step];
val = cvRound(t);
if( fabs(t - val) > FLT_EPSILON )
{
@ -515,12 +537,12 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
{
float val = ord_nan;
int si = sidx ? sidx[i] : i;
if( !mask || !mask[si*m_step] )
if( !mask || !mask[(size_t)si*m_step] )
{
if( idata )
val = (float)idata[si*step];
val = (float)idata[(size_t)si*step];
else
val = fdata[si*step];
val = fdata[(size_t)si*step];
if( fabs(val) >= ord_nan )
{
@ -532,7 +554,7 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
}
if (is_buf_16u)
udst[i] = (unsigned short)i;
udst[i] = (unsigned short)i; // TODO: memory corruption may be here
else
idst[i] = i;
_fdst[i] = val;
@ -751,7 +773,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
if (is_buf_16u)
{
unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
vi*sample_count + root->offset);
for( i = 0; i < count; i++ )
{
@ -762,7 +784,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
}
else
{
int* idst = buf->data.i + root->buf_idx*buf->cols +
int* idst = buf->data.i + root->buf_idx*get_length_subbuf() +
vi*sample_count + root->offset;
for( i = 0; i < count; i++ )
{
@ -788,7 +810,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
if (is_buf_16u)
{
unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
vi*sample_count + data_root->offset);
for( i = 0; i < num_valid; i++ )
{
@ -812,7 +834,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
}
else
{
int* idst_idx = buf->data.i + root->buf_idx*buf->cols +
int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
vi*sample_count + root->offset;
for( i = 0; i < num_valid; i++ )
{
@ -840,14 +862,14 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
if (is_buf_16u)
{
unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
workVarCount*sample_count + root->offset);
for (i = 0; i < count; i++)
sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
}
else
{
int* sample_idx_dst = buf->data.i + root->buf_idx*buf->cols +
int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
workVarCount*sample_count + root->offset;
for (i = 0; i < count; i++)
sample_idx_dst[i] = sample_idx_src[sidx[i]];
@ -1158,10 +1180,10 @@ void CvDTreeTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ord_valu
const int* sample_indices = get_sample_indices(n, sample_indices_buf);
if( !is_buf_16u )
*sorted_indices = buf->data.i + n->buf_idx*buf->cols +
*sorted_indices = buf->data.i + n->buf_idx*get_length_subbuf() +
vi*sample_count + n->offset;
else {
const unsigned short* short_indices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
const unsigned short* short_indices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
vi*sample_count + n->offset );
for( int i = 0; i < node_sample_count; i++ )
sorted_indices_buf[i] = short_indices[i];
@ -1232,10 +1254,10 @@ const int* CvDTreeTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* cat_
{
const int* cat_values = 0;
if( !is_buf_16u )
cat_values = buf->data.i + n->buf_idx*buf->cols +
cat_values = buf->data.i + n->buf_idx*get_length_subbuf() +
vi*sample_count + n->offset;
else {
const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
vi*sample_count + n->offset);
for( int i = 0; i < n->sample_count; i++ )
cat_values_buf[i] = short_values[i];
@ -3004,6 +3026,7 @@ void CvDTree::split_node_data( CvDTreeNode* node )
int new_buf_idx = data->get_child_buf_idx( node );
int work_var_count = data->get_work_var_count();
CvMat* buf = data->buf;
size_t length_buf_row = data->get_length_subbuf();
cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int) + sizeof(float)));
int* temp_buf = (int*)(uchar*)inn_buf;
@ -3049,7 +3072,7 @@ void CvDTree::split_node_data( CvDTreeNode* node )
{
unsigned short *ldst, *rdst, *ldst0, *rdst0;
//unsigned short tl, tr;
ldst0 = ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
ldst0 = ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
vi*scount + left->offset);
rdst0 = rdst = (unsigned short*)(ldst + nl);
@ -3095,9 +3118,9 @@ void CvDTree::split_node_data( CvDTreeNode* node )
else
{
int *ldst0, *ldst, *rdst0, *rdst;
ldst0 = ldst = buf->data.i + left->buf_idx*buf->cols +
ldst0 = ldst = buf->data.i + left->buf_idx*length_buf_row +
vi*scount + left->offset;
rdst0 = rdst = buf->data.i + right->buf_idx*buf->cols +
rdst0 = rdst = buf->data.i + right->buf_idx*length_buf_row +
vi*scount + right->offset;
// split sorted
@ -3158,9 +3181,9 @@ void CvDTree::split_node_data( CvDTreeNode* node )
if (data->is_buf_16u)
{
unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
vi*scount + left->offset);
unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
vi*scount + right->offset);
for( i = 0; i < n; i++ )
@ -3188,9 +3211,9 @@ void CvDTree::split_node_data( CvDTreeNode* node )
}
else
{
int *ldst = buf->data.i + left->buf_idx*buf->cols +
int *ldst = buf->data.i + left->buf_idx*length_buf_row +
vi*scount + left->offset;
int *rdst = buf->data.i + right->buf_idx*buf->cols +
int *rdst = buf->data.i + right->buf_idx*length_buf_row +
vi*scount + right->offset;
for( i = 0; i < n; i++ )
@ -3230,9 +3253,9 @@ void CvDTree::split_node_data( CvDTreeNode* node )
int pos = data->get_work_var_count();
if (data->is_buf_16u)
{
unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
pos*scount + left->offset);
unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols +
unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
pos*scount + right->offset);
for (i = 0; i < n; i++)
{
@ -3252,9 +3275,9 @@ void CvDTree::split_node_data( CvDTreeNode* node )
}
else
{
int* ldst = buf->data.i + left->buf_idx*buf->cols +
int* ldst = buf->data.i + left->buf_idx*length_buf_row +
pos*scount + left->offset;
int* rdst = buf->data.i + right->buf_idx*buf->cols +
int* rdst = buf->data.i + right->buf_idx*length_buf_row +
pos*scount + right->offset;
for (i = 0; i < n; i++)
{
@ -3310,7 +3333,7 @@ float CvDTree::calc_error( CvMLData* _data, int type, vector<float> *resp )
float r = (float)predict( &sample, missing ? &miss : 0 )->value;
if( pred_resp )
pred_resp[i] = r;
int d = fabs((double)r - response->data.fl[si*r_step]) <= FLT_EPSILON ? 0 : 1;
int d = fabs((double)r - response->data.fl[(size_t)si*r_step]) <= FLT_EPSILON ? 0 : 1;
err += d;
}
err = sample_count ? err / (float)sample_count * 100 : -FLT_MAX;
@ -3327,7 +3350,7 @@ float CvDTree::calc_error( CvMLData* _data, int type, vector<float> *resp )
float r = (float)predict( &sample, missing ? &miss : 0 )->value;
if( pred_resp )
pred_resp[i] = r;
float d = r - response->data.fl[si*r_step];
float d = r - response->data.fl[(size_t)si*r_step];
err += d*d;
}
err = sample_count ? err / (float)sample_count : -FLT_MAX;
@ -3633,8 +3656,8 @@ CvDTreeNode* CvDTree::predict( const CvMat* _sample,
int vi = split->var_idx;
int ci = vtype[vi];
i = vidx ? vidx[vi] : vi;
float val = sample[i*step];
if( m && m[i*mstep] )
float val = sample[(size_t)i*step];
if( m && m[(size_t)i*mstep] )
continue;
if( ci < 0 ) // ordered
dir = val <= split->ord.c ? -1 : 1;

Loading…
Cancel
Save