@ -40,12 +40,10 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*
This is a variation of
" Stereo Processing by Semiglobal Matching and Mutual Information "
by Heiko Hirschmuller .
We match blocks rather than individual pixels , thus the algorithm is called
SGBM ( Semi - global block matching )
*/
@ -62,8 +60,6 @@ namespace cv
typedef short DispType ;
enum { NR = 16 , NR2 = NR / 2 } ;
struct StereoBinarySGBMParams
{
StereoBinarySGBMParams ( )
@ -96,7 +92,6 @@ namespace cv
speckleRange = _speckleRange ;
mode = _mode ;
}
int minDisparity ;
int numDisparities ;
int SADWindowSize ;
@ -109,7 +104,6 @@ namespace cv
int disp12MaxDiff ;
int mode ;
} ;
/*
For each pixel row1 [ x ] , max ( - maxD , 0 ) < = minX < = x < maxX < = width - max ( 0 , - minD ) ,
and for each disparity minD < = d < maxD the function
@ -117,7 +111,6 @@ namespace cv
row1 [ x ] and row2 [ x - d ] . The subpixel algorithm from
" Depth Discontinuities by Pixel-to-Pixel Stereo " by Stan Birchfield and C . Tomasi
is used , hence the suffix BT .
the temporary buffer should contain width2 * 2 elements
*/
static void calcPixelCostBT ( const Mat & img1 , const Mat & img2 , int y ,
@ -131,25 +124,20 @@ namespace cv
int D = maxD - minD , width1 = maxX1 - minX1 , width2 = maxX2 - minX2 ;
const PixType * row1 = img1 . ptr < PixType > ( y ) , * row2 = img2 . ptr < PixType > ( y ) ;
PixType * prow1 = buffer + width2 * 2 , * prow2 = prow1 + width * cn * 2 ;
tab + = tabOfs ;
for ( c = 0 ; c < cn * 2 ; c + + )
{
prow1 [ width * c ] = prow1 [ width * c + width - 1 ] =
prow2 [ width * c ] = prow2 [ width * c + width - 1 ] = tab [ 0 ] ;
}
int n1 = y > 0 ? - ( int ) img1 . step : 0 , s1 = y < img1 . rows - 1 ? ( int ) img1 . step : 0 ;
int n2 = y > 0 ? - ( int ) img2 . step : 0 , s2 = y < img2 . rows - 1 ? ( int ) img2 . step : 0 ;
if ( cn = = 1 )
{
for ( x = 1 ; x < width - 1 ; x + + )
{
prow1 [ x ] = tab [ ( row1 [ x + 1 ] - row1 [ x - 1 ] ) * 2 + row1 [ x + n1 + 1 ] - row1 [ x + n1 - 1 ] + row1 [ x + s1 + 1 ] - row1 [ x + s1 - 1 ] ] ;
prow2 [ width - 1 - x ] = tab [ ( row2 [ x + 1 ] - row2 [ x - 1 ] ) * 2 + row2 [ x + n2 + 1 ] - row2 [ x + n2 - 1 ] + row2 [ x + s2 + 1 ] - row2 [ x + s2 - 1 ] ] ;
prow1 [ x + width ] = row1 [ x ] ;
prow2 [ width - 1 - x + width ] = row2 [ x ] ;
}
@ -161,21 +149,17 @@ namespace cv
prow1 [ x ] = tab [ ( row1 [ x * 3 + 3 ] - row1 [ x * 3 - 3 ] ) * 2 + row1 [ x * 3 + n1 + 3 ] - row1 [ x * 3 + n1 - 3 ] + row1 [ x * 3 + s1 + 3 ] - row1 [ x * 3 + s1 - 3 ] ] ;
prow1 [ x + width ] = tab [ ( row1 [ x * 3 + 4 ] - row1 [ x * 3 - 2 ] ) * 2 + row1 [ x * 3 + n1 + 4 ] - row1 [ x * 3 + n1 - 2 ] + row1 [ x * 3 + s1 + 4 ] - row1 [ x * 3 + s1 - 2 ] ] ;
prow1 [ x + width * 2 ] = tab [ ( row1 [ x * 3 + 5 ] - row1 [ x * 3 - 1 ] ) * 2 + row1 [ x * 3 + n1 + 5 ] - row1 [ x * 3 + n1 - 1 ] + row1 [ x * 3 + s1 + 5 ] - row1 [ x * 3 + s1 - 1 ] ] ;
prow2 [ width - 1 - x ] = tab [ ( row2 [ x * 3 + 3 ] - row2 [ x * 3 - 3 ] ) * 2 + row2 [ x * 3 + n2 + 3 ] - row2 [ x * 3 + n2 - 3 ] + row2 [ x * 3 + s2 + 3 ] - row2 [ x * 3 + s2 - 3 ] ] ;
prow2 [ width - 1 - x + width ] = tab [ ( row2 [ x * 3 + 4 ] - row2 [ x * 3 - 2 ] ) * 2 + row2 [ x * 3 + n2 + 4 ] - row2 [ x * 3 + n2 - 2 ] + row2 [ x * 3 + s2 + 4 ] - row2 [ x * 3 + s2 - 2 ] ] ;
prow2 [ width - 1 - x + width * 2 ] = tab [ ( row2 [ x * 3 + 5 ] - row2 [ x * 3 - 1 ] ) * 2 + row2 [ x * 3 + n2 + 5 ] - row2 [ x * 3 + n2 - 1 ] + row2 [ x * 3 + s2 + 5 ] - row2 [ x * 3 + s2 - 1 ] ] ;
prow1 [ x + width * 3 ] = row1 [ x * 3 ] ;
prow1 [ x + width * 4 ] = row1 [ x * 3 + 1 ] ;
prow1 [ x + width * 5 ] = row1 [ x * 3 + 2 ] ;
prow2 [ width - 1 - x + width * 3 ] = row2 [ x * 3 ] ;
prow2 [ width - 1 - x + width * 4 ] = row2 [ x * 3 + 1 ] ;
prow2 [ width - 1 - x + width * 5 ] = row2 [ x * 3 + 2 ] ;
}
}
memset ( cost , 0 , width1 * D * sizeof ( cost [ 0 ] ) ) ;
buffer - = minX2 ;
@ -184,12 +168,10 @@ namespace cv
# if CV_SSE2
volatile bool useSIMD = checkHardwareSupport ( CV_CPU_SSE2 ) ;
# endif
# if 1
for ( c = 0 ; c < cn * 2 ; c + + , prow1 + = width , prow2 + = width )
{
int diff_scale = c < cn ? 0 : 2 ;
// precompute
// v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and
// v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and
@ -203,7 +185,6 @@ namespace cv
buffer [ x ] = ( PixType ) v0 ;
buffer [ x + width2 ] = ( PixType ) v1 ;
}
for ( x = minX1 ; x < maxX1 ; x + + )
{
int u = prow1 [ x ] ;
@ -211,14 +192,12 @@ namespace cv
int ur = x < width - 1 ? ( u + prow1 [ x + 1 ] ) / 2 : u ;
int u0 = std : : min ( ul , ur ) ; u0 = std : : min ( u0 , u ) ;
int u1 = std : : max ( ul , ur ) ; u1 = std : : max ( u1 , u ) ;
# if CV_SSE2
if ( useSIMD )
{
__m128i _u = _mm_set1_epi8 ( ( char ) u ) , _u0 = _mm_set1_epi8 ( ( char ) u0 ) ;
__m128i _u1 = _mm_set1_epi8 ( ( char ) u1 ) , z = _mm_setzero_si128 ( ) ;
__m128i ds = _mm_cvtsi32_si128 ( diff_scale ) ;
for ( int d = minD ; d < maxD ; d + = 16 )
{
__m128i _v = _mm_loadu_si128 ( ( const __m128i * ) ( prow2 + width - x - 1 + d ) ) ;
@ -227,10 +206,8 @@ namespace cv
__m128i c0 = _mm_max_epu8 ( _mm_subs_epu8 ( _u , _v1 ) , _mm_subs_epu8 ( _v0 , _u ) ) ;
__m128i c1 = _mm_max_epu8 ( _mm_subs_epu8 ( _v , _u1 ) , _mm_subs_epu8 ( _u0 , _v ) ) ;
__m128i diff = _mm_min_epu8 ( c0 , c1 ) ;
c0 = _mm_load_si128 ( ( __m128i * ) ( cost + x * D + d ) ) ;
c1 = _mm_load_si128 ( ( __m128i * ) ( cost + x * D + d + 8 ) ) ;
_mm_store_si128 ( ( __m128i * ) ( cost + x * D + d ) , _mm_adds_epi16 ( c0 , _mm_srl_epi16 ( _mm_unpacklo_epi8 ( diff , z ) , ds ) ) ) ;
_mm_store_si128 ( ( __m128i * ) ( cost + x * D + d + 8 ) , _mm_adds_epi16 ( c1 , _mm_srl_epi16 ( _mm_unpackhi_epi8 ( diff , z ) , ds ) ) ) ;
}
@ -245,7 +222,6 @@ namespace cv
int v1 = buffer [ width - x - 1 + d + width2 ] ;
int c0 = std : : max ( 0 , u - v1 ) ; c0 = std : : max ( c0 , v0 - u ) ;
int c1 = std : : max ( 0 , v - u1 ) ; c1 = std : : max ( c1 , u0 - v ) ;
cost [ x * D + d ] = ( CostType ) ( cost [ x * D + d ] + ( std : : min ( c0 , c1 ) > > diff_scale ) ) ;
}
}
@ -286,25 +262,20 @@ namespace cv
}
# endif
}
/*
computes disparity for " roi " in img1 w . r . t . img2 and write it to disp1buf .
that is , disp1buf ( x , y ) = d means that img1 ( x + roi . x , y + roi . y ) ~ img2 ( x + roi . x - d , y + roi . y ) .
minD < = d < maxD .
disp2full is the reverse disparity map , that is :
disp2full ( x + roi . x , y + roi . y ) = d means that img2 ( x + roi . x , y + roi . y ) ~ img1 ( x + roi . x + d , y + roi . y )
note that disp1buf will have the same size as the roi and
disp2full will have the same size as img1 ( or img2 ) .
On exit disp2buf is not the final disparity , it is an intermediate result that becomes
final after all the tiles are processed .
the disparity in disp1buf is written with sub - pixel accuracy
( 4 fractional bits , see StereoSGBM : : DISP_SCALE ) ,
using quadratic interpolation , while the disparity in disp2buf
is written as is , without interpolation .
disp2cost also has the same size as img1 ( or img2 ) .
It contains the minimum current cost , used to find the best disparity , corresponding to the minimal cost .
*/
@ -332,7 +303,6 @@ namespace cv
const int DISP_SHIFT = StereoMatcher : : DISP_SHIFT ;
const int DISP_SCALE = ( 1 < < DISP_SHIFT ) ;
const CostType MAX_COST = SHRT_MAX ;
int minD = params . minDisparity , maxD = minD + params . numDisparities ;
Size SADWindowSize ;
SADWindowSize . width = SADWindowSize . height = params . SADWindowSize > 0 ? params . SADWindowSize : 5 ;
@ -349,28 +319,22 @@ namespace cv
int npasses = fullDP ? 2 : 1 ;
const int TAB_OFS = 256 * 4 , TAB_SIZE = 256 + TAB_OFS * 2 ;
PixType clipTab [ TAB_SIZE ] ;
for ( k = 0 ; k < TAB_SIZE ; k + + )
clipTab [ k ] = ( PixType ) ( std : : min ( std : : max ( k - TAB_OFS , - ftzero ) , ftzero ) + ftzero ) ;
if ( minX1 > = maxX1 )
{
disp1 = Scalar : : all ( INVALID_DISP_SCALED ) ;
return ;
}
CV_Assert ( D % 16 = = 0 ) ;
// NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
// if you change NR, please, modify the loop as well.
int D2 = D + 16 , NRD2 = NR2 * D2 ;
// the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
// for 8-way dynamic programming we need the current row and
// the previous row, i.e. 2 rows in total
const int NLR = 2 ;
const int LrBorder = NLR - 1 ;
// for each possible stereo match (img1(x,y) <=> img2(x-d,y))
// we keep pixel difference cost (C) and the summary cost over NR directions (S).
// we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
@ -383,29 +347,23 @@ namespace cv
CSBufSize * 2 * sizeof ( CostType ) + // C, S
width * 16 * img1 . channels ( ) * sizeof ( PixType ) + // temp buffer for computing per-pixel cost
width * ( sizeof ( CostType ) + sizeof ( DispType ) ) + 1024 ; // disp2cost + disp2
if ( buffer . empty ( ) | | ! buffer . isContinuous ( ) | |
buffer . cols * buffer . rows * buffer . elemSize ( ) < totalBufSize )
buffer . create ( 1 , ( int ) totalBufSize , CV_8U ) ;
// summary cost over different (nDirs) directions
CostType * Cbuf = ( CostType * ) alignPtr ( buffer . ptr ( ) , ALIGN ) ;
CostType * Sbuf = Cbuf + CSBufSize ;
CostType * hsumBuf = Sbuf + CSBufSize ;
CostType * pixDiff = hsumBuf + costBufSize * hsumBufNRows ;
CostType * disp2cost = pixDiff + costBufSize + ( LrSize + minLrSize ) * NLR ;
DispType * disp2ptr = ( DispType * ) ( disp2cost + width ) ;
PixType * tempBuf = ( PixType * ) ( disp2ptr + width ) ;
// add P2 to every C(x,y). it saves a few operations in the inner loops
for ( k = 0 ; k < width1 * D ; k + + )
Cbuf [ k ] = ( CostType ) P2 ;
for ( int pass = 1 ; pass < = npasses ; pass + + )
{
int x1 , y1 , x2 , y2 , dx , dy ;
if ( pass = = 1 )
{
y1 = 0 ; y2 = height ; dy = 1 ;
@ -416,9 +374,7 @@ namespace cv
y1 = height - 1 ; y2 = - 1 ; dy = - 1 ;
x1 = width1 - 1 ; x2 = - 1 ; dx = - 1 ;
}
CostType * Lr [ NLR ] = { 0 } , * minLr [ NLR ] = { 0 } ;
for ( k = 0 ; k < NLR ; k + + )
{
// shift Lr[k] and minLr[k] pointers, because we allocated them with the borders,
@ -431,18 +387,15 @@ namespace cv
minLr [ k ] = pixDiff + costBufSize + LrSize * NLR + minLrSize * k + NR2 * LrBorder ;
memset ( minLr [ k ] - LrBorder * NR2 , 0 , minLrSize * sizeof ( CostType ) ) ;
}
for ( int y = y1 ; y ! = y2 ; y + = dy )
{
int x , d ;
DispType * disp1ptr = disp1 . ptr < DispType > ( y ) ;
CostType * C = Cbuf + ( ! fullDP ? 0 : y * costBufSize ) ;
CostType * S = Sbuf + ( ! fullDP ? 0 : y * costBufSize ) ;
if ( pass = = 1 ) // compute C on the first pass, and reuse it on the second pass, if any.
{
int dy1 = y = = 0 ? 0 : y + SH2 , dy2 = y = = 0 ? SH2 : dy1 ;
for ( k = dy1 ; k < = dy2 ; k + + )
{
CostType * hsumAdd = hsumBuf + ( std : : min ( k , height - 1 ) % hsumBufNRows ) * costBufSize ;
@ -450,7 +403,6 @@ namespace cv
if ( k < height )
{
calcPixelCostBT ( img1 , img2 , k , minD , maxD , pixDiff , tempBuf , clipTab , TAB_OFS , ftzero ) ;
memset ( hsumAdd , 0 , D * sizeof ( CostType ) ) ;
for ( x = 0 ; x < = SW2 * D ; x + = D )
{
@ -503,13 +455,11 @@ namespace cv
{
const CostType * pixAdd = pixDiff + std : : min ( x + SW2 * D , ( width1 - 1 ) * D ) ;
const CostType * pixSub = pixDiff + std : : max ( x - ( SW2 + 1 ) * D , 0 ) ;
for ( d = 0 ; d < D ; d + + )
hsumAdd [ x + d ] = ( CostType ) ( hsumAdd [ x - D + d ] + pixAdd [ d ] - pixSub [ d ] ) ;
}
}
}
if ( y = = 0 )
{
int scale = k = = 0 ? SH2 + 1 : 1 ;
@ -517,18 +467,15 @@ namespace cv
C [ x ] = ( CostType ) ( C [ x ] + hsumAdd [ x ] * scale ) ;
}
}
// also, clear the S buffer
for ( k = 0 ; k < width1 * D ; k + + )
S [ k ] = 0 ;
}
// clear the left and the right borders
memset ( Lr [ 0 ] - NRD2 * LrBorder - 8 , 0 , NRD2 * LrBorder * sizeof ( CostType ) ) ;
memset ( Lr [ 0 ] + width1 * NRD2 - 8 , 0 , NRD2 * LrBorder * sizeof ( CostType ) ) ;
memset ( minLr [ 0 ] - NR2 * LrBorder , 0 , NR2 * LrBorder * sizeof ( CostType ) ) ;
memset ( minLr [ 0 ] + width1 * NR2 , 0 , NR2 * LrBorder * sizeof ( CostType ) ) ;
/*
[ formula 13 in the paper ]
compute L_r ( p , d ) = C ( p , d ) +
@ -550,87 +497,65 @@ namespace cv
for ( x = x1 ; x ! = x2 ; x + = dx )
{
int xm = x * NR2 , xd = xm * D2 ;
int delta0 = minLr [ 0 ] [ xm - dx * NR2 ] + P2 , delta1 = minLr [ 1 ] [ xm - NR2 + 1 ] + P2 ;
int delta2 = minLr [ 1 ] [ xm + 2 ] + P2 , delta3 = minLr [ 1 ] [ xm + NR2 + 3 ] + P2 ;
CostType * Lr_p0 = Lr [ 0 ] + xd - dx * NRD2 ;
CostType * Lr_p1 = Lr [ 1 ] + xd - NRD2 + D2 ;
CostType * Lr_p2 = Lr [ 1 ] + xd + D2 * 2 ;
CostType * Lr_p3 = Lr [ 1 ] + xd + NRD2 + D2 * 3 ;
Lr_p0 [ - 1 ] = Lr_p0 [ D ] = Lr_p1 [ - 1 ] = Lr_p1 [ D ] =
Lr_p2 [ - 1 ] = Lr_p2 [ D ] = Lr_p3 [ - 1 ] = Lr_p3 [ D ] = MAX_COST ;
CostType * Lr_p = Lr [ 0 ] + xd ;
const CostType * Cp = C + x * D ;
CostType * Sp = S + x * D ;
# if CV_SSE2
if ( useSIMD )
{
__m128i _P1 = _mm_set1_epi16 ( ( short ) P1 ) ;
__m128i _delta0 = _mm_set1_epi16 ( ( short ) delta0 ) ;
__m128i _delta1 = _mm_set1_epi16 ( ( short ) delta1 ) ;
__m128i _delta2 = _mm_set1_epi16 ( ( short ) delta2 ) ;
__m128i _delta3 = _mm_set1_epi16 ( ( short ) delta3 ) ;
__m128i _minL0 = _mm_set1_epi16 ( ( short ) MAX_COST ) ;
for ( d = 0 ; d < D ; d + = 8 )
{
__m128i Cpd = _mm_load_si128 ( ( const __m128i * ) ( Cp + d ) ) ;
__m128i L0 , L1 , L2 , L3 ;
L0 = _mm_load_si128 ( ( const __m128i * ) ( Lr_p0 + d ) ) ;
L1 = _mm_load_si128 ( ( const __m128i * ) ( Lr_p1 + d ) ) ;
L2 = _mm_load_si128 ( ( const __m128i * ) ( Lr_p2 + d ) ) ;
L3 = _mm_load_si128 ( ( const __m128i * ) ( Lr_p3 + d ) ) ;
L0 = _mm_min_epi16 ( L0 , _mm_adds_epi16 ( _mm_loadu_si128 ( ( const __m128i * ) ( Lr_p0 + d - 1 ) ) , _P1 ) ) ;
L0 = _mm_min_epi16 ( L0 , _mm_adds_epi16 ( _mm_loadu_si128 ( ( const __m128i * ) ( Lr_p0 + d + 1 ) ) , _P1 ) ) ;
L1 = _mm_min_epi16 ( L1 , _mm_adds_epi16 ( _mm_loadu_si128 ( ( const __m128i * ) ( Lr_p1 + d - 1 ) ) , _P1 ) ) ;
L1 = _mm_min_epi16 ( L1 , _mm_adds_epi16 ( _mm_loadu_si128 ( ( const __m128i * ) ( Lr_p1 + d + 1 ) ) , _P1 ) ) ;
L2 = _mm_min_epi16 ( L2 , _mm_adds_epi16 ( _mm_loadu_si128 ( ( const __m128i * ) ( Lr_p2 + d - 1 ) ) , _P1 ) ) ;
L2 = _mm_min_epi16 ( L2 , _mm_adds_epi16 ( _mm_loadu_si128 ( ( const __m128i * ) ( Lr_p2 + d + 1 ) ) , _P1 ) ) ;
L3 = _mm_min_epi16 ( L3 , _mm_adds_epi16 ( _mm_loadu_si128 ( ( const __m128i * ) ( Lr_p3 + d - 1 ) ) , _P1 ) ) ;
L3 = _mm_min_epi16 ( L3 , _mm_adds_epi16 ( _mm_loadu_si128 ( ( const __m128i * ) ( Lr_p3 + d + 1 ) ) , _P1 ) ) ;
L0 = _mm_min_epi16 ( L0 , _delta0 ) ;
L0 = _mm_adds_epi16 ( _mm_subs_epi16 ( L0 , _delta0 ) , Cpd ) ;
L1 = _mm_min_epi16 ( L1 , _delta1 ) ;
L1 = _mm_adds_epi16 ( _mm_subs_epi16 ( L1 , _delta1 ) , Cpd ) ;
L2 = _mm_min_epi16 ( L2 , _delta2 ) ;
L2 = _mm_adds_epi16 ( _mm_subs_epi16 ( L2 , _delta2 ) , Cpd ) ;
L3 = _mm_min_epi16 ( L3 , _delta3 ) ;
L3 = _mm_adds_epi16 ( _mm_subs_epi16 ( L3 , _delta3 ) , Cpd ) ;
_mm_store_si128 ( ( __m128i * ) ( Lr_p + d ) , L0 ) ;
_mm_store_si128 ( ( __m128i * ) ( Lr_p + d + D2 ) , L1 ) ;
_mm_store_si128 ( ( __m128i * ) ( Lr_p + d + D2 * 2 ) , L2 ) ;
_mm_store_si128 ( ( __m128i * ) ( Lr_p + d + D2 * 3 ) , L3 ) ;
__m128i t0 = _mm_min_epi16 ( _mm_unpacklo_epi16 ( L0 , L2 ) , _mm_unpackhi_epi16 ( L0 , L2 ) ) ;
__m128i t1 = _mm_min_epi16 ( _mm_unpacklo_epi16 ( L1 , L3 ) , _mm_unpackhi_epi16 ( L1 , L3 ) ) ;
t0 = _mm_min_epi16 ( _mm_unpacklo_epi16 ( t0 , t1 ) , _mm_unpackhi_epi16 ( t0 , t1 ) ) ;
_minL0 = _mm_min_epi16 ( _minL0 , t0 ) ;
__m128i Sval = _mm_load_si128 ( ( const __m128i * ) ( Sp + d ) ) ;
L0 = _mm_adds_epi16 ( L0 , L1 ) ;
L2 = _mm_adds_epi16 ( L2 , L3 ) ;
Sval = _mm_adds_epi16 ( Sval , L0 ) ;
Sval = _mm_adds_epi16 ( Sval , L2 ) ;
_mm_store_si128 ( ( __m128i * ) ( Sp + d ) , Sval ) ;
}
_minL0 = _mm_min_epi16 ( _minL0 , _mm_srli_si128 ( _minL0 , 8 ) ) ;
_mm_storel_epi64 ( ( __m128i * ) & minLr [ 0 ] [ xm ] , _minL0 ) ;
}
@ -703,22 +628,18 @@ namespace cv
__m128i _minL0 = _mm_set1_epi16 ( ( short ) minL0 ) ;
__m128i _minS = _mm_set1_epi16 ( MAX_COST ) , _bestDisp = _mm_set1_epi16 ( - 1 ) ;
__m128i _d8 = _mm_setr_epi16 ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) , _8 = _mm_set1_epi16 ( 8 ) ;
for ( d = 0 ; d < D ; d + = 8 )
{
__m128i Cpd = _mm_load_si128 ( ( const __m128i * ) ( Cp + d ) ) , L0 ;
L0 = _mm_load_si128 ( ( const __m128i * ) ( Lr_p0 + d ) ) ;
L0 = _mm_min_epi16 ( L0 , _mm_adds_epi16 ( _mm_loadu_si128 ( ( const __m128i * ) ( Lr_p0 + d - 1 ) ) , _P1 ) ) ;
L0 = _mm_min_epi16 ( L0 , _mm_adds_epi16 ( _mm_loadu_si128 ( ( const __m128i * ) ( Lr_p0 + d + 1 ) ) , _P1 ) ) ;
L0 = _mm_min_epi16 ( L0 , _delta0 ) ;
L0 = _mm_adds_epi16 ( _mm_subs_epi16 ( L0 , _delta0 ) , Cpd ) ;
_mm_store_si128 ( ( __m128i * ) ( Lr_p + d ) , L0 ) ;
_minL0 = _mm_min_epi16 ( _minL0 , L0 ) ;
L0 = _mm_adds_epi16 ( L0 , * ( __m128i * ) ( Sp + d ) ) ;
_mm_store_si128 ( ( __m128i * ) ( Sp + d ) , L0 ) ;
__m128i mask = _mm_cmpgt_epi16 ( _minS , L0 ) ;
_minS = _mm_min_epi16 ( _minS , L0 ) ;
_bestDisp = _mm_xor_si128 ( _bestDisp , _mm_and_si128 ( _mm_xor_si128 ( _bestDisp , _d8 ) , mask ) ) ;
@ -727,22 +648,17 @@ namespace cv
short CV_DECL_ALIGNED ( 16 ) bestDispBuf [ 8 ] ;
_mm_store_si128 ( ( __m128i * ) bestDispBuf , _bestDisp ) ;
_minL0 = _mm_min_epi16 ( _minL0 , _mm_srli_si128 ( _minL0 , 8 ) ) ;
_minL0 = _mm_min_epi16 ( _minL0 , _mm_srli_si128 ( _minL0 , 4 ) ) ;
_minL0 = _mm_min_epi16 ( _minL0 , _mm_srli_si128 ( _minL0 , 2 ) ) ;
__m128i qS = _mm_min_epi16 ( _minS , _mm_srli_si128 ( _minS , 8 ) ) ;
qS = _mm_min_epi16 ( qS , _mm_srli_si128 ( qS , 4 ) ) ;
qS = _mm_min_epi16 ( qS , _mm_srli_si128 ( qS , 2 ) ) ;
minLr [ 0 ] [ xm ] = ( CostType ) _mm_cvtsi128_si32 ( _minL0 ) ;
minS = ( CostType ) _mm_cvtsi128_si32 ( qS ) ;
qS = _mm_shuffle_epi32 ( _mm_unpacklo_epi16 ( qS , qS ) , 0 ) ;
qS = _mm_cmpeq_epi16 ( _minS , qS ) ;
int idx = _mm_movemask_epi8 ( _mm_packs_epi16 ( qS , qS ) ) & 255 ;
bestDisp = bestDispBuf [ LSBTab [ idx ] ] ;
}
else
@ -751,10 +667,8 @@ namespace cv
for ( d = 0 ; d < D ; d + + )
{
int L0 = Cp [ d ] + std : : min ( ( int ) Lr_p0 [ d ] , std : : min ( Lr_p0 [ d - 1 ] + P1 , std : : min ( Lr_p0 [ d + 1 ] + P1 , delta0 ) ) ) - delta0 ;
Lr_p [ d ] = ( CostType ) L0 ;
minL0 = std : : min ( minL0 , L0 ) ;
int Sval = Sp [ d ] = saturate_cast < CostType > ( Sp [ d ] + L0 ) ;
if ( Sval < minS )
{
@ -777,7 +691,6 @@ namespace cv
}
}
}
for ( d = 0 ; d < D ; d + + )
{
if ( Sp [ d ] * ( 100 - uniquenessRatio ) < minS * 100 & & std : : abs ( bestDisp - d ) > 1 )
@ -792,7 +705,6 @@ namespace cv
disp2cost [ _x2 ] = ( CostType ) minS ;
disp2ptr [ _x2 ] = ( DispType ) ( d + minD ) ;
}
if ( 0 < d & & d < D - 1 )
{
// do subpixel quadratic interpolation:
@ -805,7 +717,6 @@ namespace cv
d * = DISP_SCALE ;
disp1ptr [ x + minX1 ] = ( DispType ) ( d + minD * DISP_SCALE ) ;
}
for ( x = minX1 ; x < maxX1 ; x + + )
{
// we round the computed disparity both towards -inf and +inf and check
@ -822,14 +733,12 @@ namespace cv
disp1ptr [ x ] = ( DispType ) INVALID_DISP_SCALED ;
}
}
// now shift the cyclic buffers
std : : swap ( Lr [ 0 ] , Lr [ 1 ] ) ;
std : : swap ( minLr [ 0 ] , minLr [ 1 ] ) ;
}
}
}
class StereoBinarySGBMImpl : public StereoBinarySGBM
{
public :
@ -837,7 +746,6 @@ namespace cv
{
params = StereoBinarySGBMParams ( ) ;
}
StereoBinarySGBMImpl ( int _minDisparity , int _numDisparities , int _SADWindowSize ,
int _P1 , int _P2 , int _disp12MaxDiff , int _preFilterCap ,
int _uniquenessRatio , int _speckleWindowSize , int _speckleRange ,
@ -848,16 +756,13 @@ namespace cv
_uniquenessRatio , _speckleWindowSize , _speckleRange ,
_mode ) ;
}
void compute ( InputArray leftarr , InputArray rightarr , OutputArray disparr )
{
Mat left = leftarr . getMat ( ) , right = rightarr . getMat ( ) ;
CV_Assert ( left . size ( ) = = right . size ( ) & & left . type ( ) = = right . type ( ) & &
left . depth ( ) = = CV_8U ) ;
disparr . create ( left . size ( ) , CV_16S ) ;
Mat disp = disparr . getMat ( ) ;
computeDisparityBinarySGBM ( left , right , disp , params , buffer ) ;
medianBlur ( disp , disp , 3 ) ;
@ -865,7 +770,6 @@ namespace cv
filterSpeckles ( disp , ( params . minDisparity - 1 ) * StereoMatcher : : DISP_SCALE , params . speckleWindowSize ,
StereoMatcher : : DISP_SCALE * params . speckleRange , buffer ) ;
}
int getMinDisparity ( ) const { return params . minDisparity ; }
void setMinDisparity ( int minDisparity ) { params . minDisparity = minDisparity ; }