diff --git a/modules/text/src/ocr_beamsearch_decoder.cpp b/modules/text/src/ocr_beamsearch_decoder.cpp index 41be8560a..4e7942eec 100644 --- a/modules/text/src/ocr_beamsearch_decoder.cpp +++ b/modules/text/src/ocr_beamsearch_decoder.cpp @@ -566,7 +566,7 @@ OCRBeamSearchClassifierCNN::OCRBeamSearchClassifierCNN (const string& filename) nr_feature = weights.rows; nr_class = weights.cols; - patch_size = (int)sqrt((float)kernels.cols); + patch_size = (int)round(sqrt((float)kernels.cols)); window_size = 4*patch_size; step_size = 4; quad_size = 12; @@ -602,8 +602,12 @@ void OCRBeamSearchClassifierCNN::eval( InputArray _src, vector< vector > Mat tmp; Mat img; + int sz = src.cols - window_size; + int sz_window_quad = window_size - quad_size; + int sz_half_quad = (int)(quad_size/2-1); + int sz_quad_patch = quad_size - patch_size; // begin sliding window loop foreach detection window - for (int x_c=0; x_c<=src.cols-window_size; x_c=x_c+step_size) + for (int x_c = 0; x_c <= sz; x_c += step_size) { img = src(Rect(Point(x_c,0),Size(window_size,window_size))); @@ -613,21 +617,21 @@ void OCRBeamSearchClassifierCNN::eval( InputArray _src, vector< vector > int quad_id = 1; - for (int q_x=0; q_x<=window_size-quad_size; q_x=q_x+(quad_size/2-1)) + + for (int q_x = 0; q_x <= sz_window_quad; q_x += sz_half_quad) { - for (int q_y=0; q_y<=window_size-quad_size; q_y=q_y+(quad_size/2-1)) + for (int q_y = 0; q_y <= sz_window_quad; q_y += sz_half_quad) { Rect quad_rect = Rect(q_x,q_y,quad_size,quad_size); quad = img(quad_rect); //start sliding window (8x8) in each tile and store the patch as row in data_pool - for (int w_x=0; w_x<=quad_size-patch_size; w_x++) + for (int w_x = 0; w_x <= sz_quad_patch; w_x++) { - for (int w_y=0; w_y<=quad_size-patch_size; w_y++) + for (int w_y = 0; w_y <= sz_quad_patch; w_y++) { - quad(Rect(w_x,w_y,patch_size,patch_size)).copyTo(tmp); + quad(Rect(w_x,w_y,patch_size,patch_size)).convertTo(tmp, CV_64F); tmp = tmp.reshape(0,1); - tmp.convertTo(tmp, CV_64F); normalizeAndZCA(tmp); vector patch; tmp.copyTo(patch); @@ -657,6 +661,7 @@ void OCRBeamSearchClassifierCNN::eval( InputArray _src, vector< vector > } } + //do dot product of each normalized and whitened patch //each pool is averaged and this yields a representation of 9xD Mat feature = Mat::zeros(9,kernels.rows,CV_64FC1); diff --git a/modules/text/src/ocr_hmm_decoder.cpp b/modules/text/src/ocr_hmm_decoder.cpp index e4367493b..a5fba688f 100644 --- a/modules/text/src/ocr_hmm_decoder.cpp +++ b/modules/text/src/ocr_hmm_decoder.cpp @@ -982,7 +982,7 @@ OCRHMMClassifierCNN::OCRHMMClassifierCNN (const string& filename) nr_feature = weights.rows; nr_class = weights.cols; - patch_size = (int)sqrt((float)kernels.cols); + patch_size = (int)round(sqrt((float)kernels.cols)); // algorithm internal parameters window_size = 32; num_quads = 25; @@ -1017,21 +1017,23 @@ void OCRHMMClassifierCNN::eval( InputArray _src, vector& out_class, vector< int quad_id = 1; - for (int q_x=0; q_x<=window_size-quad_size; q_x=q_x+(int)(quad_size/2-1)) + int sz_window_quad = window_size - quad_size; + int sz_half_quad = (int)(quad_size/2-1); + int sz_quad_patch = quad_size - patch_size; + for (int q_x=0; q_x <= sz_window_quad; q_x += sz_half_quad) { - for (int q_y=0; q_y<=window_size-quad_size; q_y=q_y+(int)(quad_size/2-1)) + for (int q_y=0; q_y <= sz_window_quad; q_y += sz_half_quad) { Rect quad_rect = Rect(q_x,q_y,quad_size,quad_size); quad = img(quad_rect); //start sliding window (8x8) in each tile and store the patch as row in data_pool - for (int w_x=0; w_x<=quad_size-patch_size; w_x++) + for (int w_x = 0; w_x <= sz_quad_patch; w_x++) { - for (int w_y=0; w_y<=quad_size-patch_size; w_y++) + for (int w_y = 0; w_y <= sz_quad_patch; w_y++) { - quad(Rect(w_x,w_y,patch_size,patch_size)).copyTo(tmp); + quad(Rect(w_x,w_y,patch_size,patch_size)).convertTo(tmp, CV_64F); tmp = tmp.reshape(0,1); - tmp.convertTo(tmp, CV_64F); normalizeAndZCA(tmp); vector patch; tmp.copyTo(patch);