Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/11938/head
Alexander Alekhin 6 years ago
commit fa66c6b797
  1. 6
      modules/core/include/opencv2/core/mat.hpp
  2. 169
      modules/dnn/src/opencl/conv_layer_spatial.cl
  3. 18
      modules/dnn/test/test_backends.cpp
  4. 10
      modules/dnn/test/test_caffe_importer.cpp
  5. 2
      modules/dnn/test/test_googlenet.cpp
  6. 48
      modules/dnn/test/test_halide_layers.cpp
  7. 61
      modules/dnn/test/test_precomp.hpp
  8. 2
      modules/dnn/test/test_tf_importer.cpp
  9. 4
      modules/dnn/test/test_torch_importer.cpp

@ -1318,7 +1318,7 @@ public:
/** @brief Returns a zero array of the specified size and type. /** @brief Returns a zero array of the specified size and type.
The method returns a Matlab-style zero array initializer. It can be used to quickly form a constant The method returns a Matlab-style zero array initializer. It can be used to quickly form a constant
array as a function parameter, part of a matrix expression, or as a matrix initializer. : array as a function parameter, part of a matrix expression, or as a matrix initializer:
@code @code
Mat A; Mat A;
A = Mat::zeros(3, 3, CV_32F); A = Mat::zeros(3, 3, CV_32F);
@ -1354,6 +1354,8 @@ public:
The above operation does not form a 100x100 matrix of 1's and then multiply it by 3. Instead, it The above operation does not form a 100x100 matrix of 1's and then multiply it by 3. Instead, it
just remembers the scale factor (3 in this case) and use it when actually invoking the matrix just remembers the scale factor (3 in this case) and use it when actually invoking the matrix
initializer. initializer.
@note In case of multi-channels type, only the first channel will be initialized with 1's, the
others will be set to 0's.
@param rows Number of rows. @param rows Number of rows.
@param cols Number of columns. @param cols Number of columns.
@param type Created matrix type. @param type Created matrix type.
@ -1381,6 +1383,8 @@ public:
// make a 4x4 diagonal matrix with 0.1's on the diagonal. // make a 4x4 diagonal matrix with 0.1's on the diagonal.
Mat A = Mat::eye(4, 4, CV_32F)*0.1; Mat A = Mat::eye(4, 4, CV_32F)*0.1;
@endcode @endcode
@note In case of multi-channels type, identity matrix will be initialized only for the first channel,
the others will be set to 0's
@param rows Number of rows. @param rows Number of rows.
@param cols Number of columns. @param cols Number of columns.
@param type Created matrix type. @param type Created matrix type.

@ -467,7 +467,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
int saved_y = curr_y; int saved_y = curr_y;
#endif #endif
const __global Dtype *src0_read = src0 const __global Dtype *src0_read = src0
+ aligned_input_size * global_z // batch offset + aligned_input_size * global_z // batch offset
+ (curr_y - INPUT_PAD_H) * ROW_PITCH // y offset + (curr_y - INPUT_PAD_H) * ROW_PITCH // y offset
+ (curr_x - INPUT_PAD_W); // x offset + (curr_x - INPUT_PAD_W); // x offset
@ -502,15 +502,23 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1; const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0 #if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
#if KERNEL_WIDTH == 3
Dtype_t blockA00 = vload3(0, src0_read);
Dtype* pblockA00 = (Dtype*)(&blockA00);
#else
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ]; Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
Dtype* pblockA00 = (Dtype*)(&blockA00); Dtype* pblockA00 = (Dtype*)(&blockA00);
#endif
#else #else
Dtype_t blockA00; Dtype_t blockA00;
Dtype* pblockA00 = (Dtype*)(&blockA00); Dtype* pblockA00 = (Dtype*)(&blockA00);
int pos = 0; int pos = 0;
LOOP(KERNEL_WIDTH, pos, LOOP(KERNEL_WIDTH, pos,
{ {
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W) if (curr_y >= INPUT_PAD_H &&
curr_y < input_height + INPUT_PAD_H &&
curr_x + pos * DILATION_X >= INPUT_PAD_W &&
curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
pblockA00[pos] = src0_read[pos * DILATION_X]; pblockA00[pos] = src0_read[pos * DILATION_X];
else else
pblockA00[pos] = 0; pblockA00[pos] = 0;
@ -564,17 +572,18 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
//while( ++patch_row < 1 ); //debug //while( ++patch_row < 1 ); //debug
while( ++patch_row < KERNEL_HEIGHT ); while( ++patch_row < KERNEL_HEIGHT );
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y); // reset to start of next slice of patch // reset to start of next slice of patch
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
} }
//while ( ++patch_depth < 1 ); //debug //while ( ++patch_depth < 1 ); //debug
while ( ++patch_depth < INPUT_DEPTH ); while ( ++patch_depth < INPUT_DEPTH );
// Dst resembles a cube of width x height x (output channel * batches). Each tile writes: // Dst resembles a cube of width x height x (output channel * batches). Each tile writes:
// (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used. // (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used.
int out_offset = global_z * out_pitch_z // batch offset int out_offset = global_z * out_pitch_z // batch offset
+ ( group_x * TILE_N ) * out_pitch_y // channel offset + ( group_x * TILE_N ) * out_pitch_y // channel offset
+ ( ( global_y * TILE_M ) / output_width + OUT_PADDING_HEIGHT) * OUT_PITCH_X // y offset + ( ( global_y * TILE_M ) / output_width + OUT_PADDING_HEIGHT) * OUT_PITCH_X // y offset
+ ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT; // x offset + ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT; // x offset
__global Dtype *out = dst + out_offset; __global Dtype *out = dst + out_offset;
#if APPLY_BIAS #if APPLY_BIAS
@ -621,7 +630,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
int saved_y = curr_y; int saved_y = curr_y;
#endif #endif
const __global Dtype *src0_read = src0 const __global Dtype *src0_read = src0
+ aligned_input_size * global_z // batch offset + aligned_input_size * global_z // batch offset
+ (curr_y - INPUT_PAD_H) * ROW_PITCH // y offset + (curr_y - INPUT_PAD_H) * ROW_PITCH // y offset
+ (curr_x - INPUT_PAD_W); // x offset + (curr_x - INPUT_PAD_W); // x offset
@ -653,7 +662,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
int pos = 0; int pos = 0;
LOOP(KERNEL_WIDTH, pos, LOOP(KERNEL_WIDTH, pos,
{ {
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W) if (curr_y >= INPUT_PAD_H &&
curr_y < input_height + INPUT_PAD_H &&
curr_x + pos * DILATION_X >= INPUT_PAD_W &&
curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
pblockA00[pos] = src0_read[pos * DILATION_X]; pblockA00[pos] = src0_read[pos * DILATION_X];
else else
pblockA00[pos] = 0; pblockA00[pos] = 0;
@ -730,17 +742,18 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
//while( ++patch_row < 1 ); //debug //while( ++patch_row < 1 ); //debug
while( ++patch_row < KERNEL_HEIGHT ); while( ++patch_row < KERNEL_HEIGHT );
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch // reset to start of next slice of patch
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
} }
//while ( ++patch_depth < 1 ); //debug //while ( ++patch_depth < 1 ); //debug
while ( ++patch_depth < INPUT_DEPTH ); while ( ++patch_depth < INPUT_DEPTH );
// Dst resembles a cube of width x height x (output channel * batches). Each tile writes: // Dst resembles a cube of width x height x (output channel * batches). Each tile writes:
// (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used. // (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used.
int out_offset = global_z * out_pitch_z // batch offset int out_offset = global_z * out_pitch_z // batch offset
+ ( group_x * TILE_N ) * out_pitch_y // channel offset + ( group_x * TILE_N ) * out_pitch_y // channel offset
+ ( ( global_y * TILE_M ) / output_width + OUT_PADDING_HEIGHT) * OUT_PITCH_X // y offset + ( ( global_y * TILE_M ) / output_width + OUT_PADDING_HEIGHT) * OUT_PITCH_X // y offset
+ ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT; // x offset + ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT; // x offset
__global Dtype *out = dst + out_offset; __global Dtype *out = dst + out_offset;
#if APPLY_BIAS #if APPLY_BIAS
Dtype bias[4]; Dtype bias[4];
@ -849,11 +862,11 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
int saved_y1 = curr_y1; int saved_y1 = curr_y1;
#endif #endif
const __global Dtype *src0_read0 = src0 const __global Dtype *src0_read0 = src0
+ aligned_input_size * global_z // batch offset + aligned_input_size * global_z // batch offset
+ (curr_y0 - INPUT_PAD_H) * ROW_PITCH // y offset + (curr_y0 - INPUT_PAD_H) * ROW_PITCH // y offset
+ curr_x0 - INPUT_PAD_W; // x offset + curr_x0 - INPUT_PAD_W; // x offset
const __global Dtype *src0_read1 = src0 const __global Dtype *src0_read1 = src0
+ aligned_input_size * global_z // batch offset + aligned_input_size * global_z // batch offset
+ (curr_y1 - INPUT_PAD_H) * ROW_PITCH // y offset + (curr_y1 - INPUT_PAD_H) * ROW_PITCH // y offset
+ curr_x1 - INPUT_PAD_W; // x offset + curr_x1 - INPUT_PAD_W; // x offset
@ -883,17 +896,38 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
// ... // ...
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1; const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0 #if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read0 )[ 0 ]; src0_read0 += ROW_PITCH; #if KERNEL_WIDTH == 3
Dtype_t blockA01 = ( (const __global Dtype_t*)src0_read1 )[ 0 ]; src0_read1 += ROW_PITCH; Dtype_t blockA00 = vload3(0, src0_read0); src0_read0 += ROW_PITCH;
Dtype_t blockA01 = vload3(0, src0_read1); src0_read1 += ROW_PITCH;
Dtype* pblockA00 = (Dtype*)(&blockA00); Dtype* pblockA00 = (Dtype*)(&blockA00);
Dtype* pblockA01 = (Dtype*)(&blockA01); Dtype* pblockA01 = (Dtype*)(&blockA01);
#else
Dtype_t blockA00 = { (Dtype)0.f };
Dtype_t blockA01 = { (Dtype)0.f };
Dtype* pblockA00 = (Dtype*)(&blockA00);
Dtype* pblockA01 = (Dtype*)(&blockA01);
int pos = 0;
LOOP(KERNEL_WIDTH, pos,
{
if (curr_x0 + pos < input_width)
pblockA00[pos] = src0_read0[pos];
if (curr_x1 + pos < input_width)
pblockA01[pos] = src0_read1[pos];
})
src0_read0 += ROW_PITCH;
src0_read1 += ROW_PITCH;
#endif
#else #else
Dtype_t blockA00; Dtype_t blockA00;
Dtype* pblockA00 = (Dtype*)(&blockA00); Dtype* pblockA00 = (Dtype*)(&blockA00);
int pos = 0; int pos = 0;
LOOP(KERNEL_WIDTH, pos, LOOP(KERNEL_WIDTH, pos,
{ {
if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W) if (curr_y0 >= INPUT_PAD_H &&
curr_y0 < input_height + INPUT_PAD_H &&
curr_x0 + pos * DILATION_X >= INPUT_PAD_W &&
curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
pblockA00[pos] = src0_read0[pos * DILATION_X]; pblockA00[pos] = src0_read0[pos * DILATION_X];
else else
pblockA00[pos] = 0; pblockA00[pos] = 0;
@ -904,7 +938,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
pos = 0; pos = 0;
LOOP(KERNEL_WIDTH, pos, LOOP(KERNEL_WIDTH, pos,
{ {
if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W) if (curr_y1 >= INPUT_PAD_H &&
curr_y1 < input_height + INPUT_PAD_H &&
curr_x1 + pos * DILATION_X >= INPUT_PAD_W &&
curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
pblockA01[pos] = src0_read1[pos * DILATION_X]; pblockA01[pos] = src0_read1[pos * DILATION_X];
else else
pblockA01[pos] = 0; pblockA01[pos] = 0;
@ -972,7 +1009,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
curr_y0 = saved_y0; curr_y0 = saved_y0;
curr_y1 = saved_y1; curr_y1 = saved_y1;
#endif #endif
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch // reset to start of next slice of patch
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
} }
//while ( ++patch_depth < 1 ); //debug //while ( ++patch_depth < 1 ); //debug
@ -980,14 +1018,14 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
// Dst resembles a cube of width x height x (output channel * batches). Each tile writes: // Dst resembles a cube of width x height x (output channel * batches). Each tile writes:
// (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used. // (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used.
int out0_offset = global_z * out_pitch_z // batch offset int out0_offset = global_z * out_pitch_z // batch offset
+ ( group_x * TILE_N ) * out_pitch_y // channel offset + ( group_x * TILE_N ) * out_pitch_y // channel offset
+ ( ( global_y * TILE_M + 0 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset + ( ( global_y * TILE_M + 0 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
+ ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT; // x offset + ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT; // x offset
int out1_offset = global_z * out_pitch_z // batch offset int out1_offset = global_z * out_pitch_z // batch offset
+ ( group_x * TILE_N ) * out_pitch_y // channel offset + ( group_x * TILE_N ) * out_pitch_y // channel offset
+ ( ( global_y * TILE_M + 1 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset + ( ( global_y * TILE_M + 1 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
+ ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT; // x offset + ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT; // x offset
#if APPLY_BIAS #if APPLY_BIAS
Dtype bias[4]; Dtype bias[4];
@ -1049,11 +1087,11 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
int saved_y1 = curr_y1; int saved_y1 = curr_y1;
#endif #endif
const __global Dtype *src0_read0 = src0 const __global Dtype *src0_read0 = src0
+ aligned_input_size * global_z // batch offset + aligned_input_size * global_z // batch offset
+ (curr_y0 - INPUT_PAD_H) * ROW_PITCH // y offset + (curr_y0 - INPUT_PAD_H) * ROW_PITCH // y offset
+ curr_x0 - INPUT_PAD_W; // x offset + curr_x0 - INPUT_PAD_W; // x offset
const __global Dtype *src0_read1 = src0 const __global Dtype *src0_read1 = src0
+ aligned_input_size * global_z // batch offset + aligned_input_size * global_z // batch offset
+ (curr_y1 - INPUT_PAD_H) * ROW_PITCH // y offset + (curr_y1 - INPUT_PAD_H) * ROW_PITCH // y offset
+ curr_x1 - INPUT_PAD_W; // x offset + curr_x1 - INPUT_PAD_W; // x offset
@ -1084,7 +1122,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
int pos = 0; int pos = 0;
LOOP(KERNEL_WIDTH, pos, LOOP(KERNEL_WIDTH, pos,
{ {
if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W) if (curr_y0 >= INPUT_PAD_H &&
curr_y0 < input_height + INPUT_PAD_H &&
curr_x0 + pos * DILATION_X >= INPUT_PAD_W &&
curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
pblockA00[pos] = src0_read0[pos * DILATION_X]; pblockA00[pos] = src0_read0[pos * DILATION_X];
else else
pblockA00[pos] = 0; pblockA00[pos] = 0;
@ -1095,7 +1136,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
pos = 0; pos = 0;
LOOP(KERNEL_WIDTH, pos, LOOP(KERNEL_WIDTH, pos,
{ {
if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W) if (curr_y1 >= INPUT_PAD_H &&
curr_y1 < input_height + INPUT_PAD_H &&
curr_x1 + pos * DILATION_X >= INPUT_PAD_W &&
curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
pblockA01[pos] = src0_read1[pos * DILATION_X]; pblockA01[pos] = src0_read1[pos * DILATION_X];
else else
pblockA01[pos] = 0; pblockA01[pos] = 0;
@ -1185,7 +1229,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
curr_y0 = saved_y0; curr_y0 = saved_y0;
curr_y1 = saved_y1; curr_y1 = saved_y1;
#endif #endif
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch // reset to start of next slice of patch
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
} }
//while ( ++patch_depth < 1 ); //debug //while ( ++patch_depth < 1 ); //debug
@ -1193,14 +1238,14 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
// Dst resembles a cube of width x height x (output channel * batches). Each tile writes: // Dst resembles a cube of width x height x (output channel * batches). Each tile writes:
// (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used. // (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used.
int out0_offset = global_z * out_pitch_z // batch offset int out0_offset = global_z * out_pitch_z // batch offset
+ ( group_x * TILE_N ) * out_pitch_y // channel offset + ( group_x * TILE_N ) * out_pitch_y // channel offset
+ ( ( global_y * TILE_M + 0 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset + ( ( global_y * TILE_M + 0 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
+ ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT; // x offset + ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT; // x offset
int out1_offset = global_z * out_pitch_z // batch offset int out1_offset = global_z * out_pitch_z // batch offset
+ ( group_x * TILE_N ) * out_pitch_y // channel offset + ( group_x * TILE_N ) * out_pitch_y // channel offset
+ ( ( global_y * TILE_M + 1 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset + ( ( global_y * TILE_M + 1 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
+ ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT; // x offset + ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT; // x offset
__global Dtype *out1 = dst + out1_offset; __global Dtype *out1 = dst + out1_offset;
#if APPLY_BIAS #if APPLY_BIAS
@ -1352,9 +1397,9 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
int saved_y = curr_y; int saved_y = curr_y;
#endif #endif
const __global Dtype *src0_read = src0 const __global Dtype *src0_read = src0
+ aligned_input_size * global_z // batch offset + aligned_input_size * global_z // batch offset
+ (curr_y - INPUT_PAD_H) * ROW_PITCH // y offset + (curr_y - INPUT_PAD_H) * ROW_PITCH // y offset
+ curr_x - INPUT_PAD_W; // x offset + curr_x - INPUT_PAD_W; // x offset
const __global Dtype *src0_read_orig = src0_read; const __global Dtype *src0_read_orig = src0_read;
// Src1 (filter) is directly used as btile. // Src1 (filter) is directly used as btile.
@ -1409,15 +1454,23 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1; const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0 #if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
#if KERNEL_WIDTH == 3
Dtype_t blockA00 = vload3(0, src0_read);
Dtype* pblockA00 = (Dtype*)(&blockA00);
#else
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ]; Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
Dtype* pblockA00 = (Dtype*)(&blockA00); Dtype* pblockA00 = (Dtype*)(&blockA00);
#endif
#else #else
Dtype_t blockA00; Dtype_t blockA00;
Dtype* pblockA00 = (Dtype*)(&blockA00); Dtype* pblockA00 = (Dtype*)(&blockA00);
int pos = 0; int pos = 0;
LOOP(KERNEL_WIDTH, pos, LOOP(KERNEL_WIDTH, pos,
{ {
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W) if (curr_y >= INPUT_PAD_H &&
curr_y < input_height + INPUT_PAD_H &&
curr_x + pos * DILATION_X >= INPUT_PAD_W &&
curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
pblockA00[pos] = src0_read[pos * DILATION_X]; pblockA00[pos] = src0_read[pos * DILATION_X];
else else
pblockA00[pos] = 0; pblockA00[pos] = 0;
@ -1463,17 +1516,18 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
//while( ++patch_row < 1 ); //debug //while( ++patch_row < 1 ); //debug
while( ++patch_row < KERNEL_HEIGHT ); while( ++patch_row < KERNEL_HEIGHT );
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch // reset to start of next slice of patch
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
} }
//while ( ++patch_depth < 1 ); //debug //while ( ++patch_depth < 1 ); //debug
while ( ++patch_depth < INPUT_DEPTH ); while ( ++patch_depth < INPUT_DEPTH );
// Dst resembles a cube of width x height x (output channel * batches). Each tile writes: // Dst resembles a cube of width x height x (output channel * batches). Each tile writes:
// (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used. // (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used.
int out_offset = global_z * out_pitch_z // batch offset int out_offset = global_z * out_pitch_z // batch offset
+ ( group_x * TILE_N ) * out_pitch_y // channel offset + ( group_x * TILE_N ) * out_pitch_y // channel offset
+ ( ( global_y * TILE_M ) / output_width + OUT_PADDING_HEIGHT) * OUT_PITCH_X // y offset + ( ( global_y * TILE_M ) / output_width + OUT_PADDING_HEIGHT) * OUT_PITCH_X // y offset
+ ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT; // x offset + ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT; // x offset
__global Dtype *out = dst + out_offset; __global Dtype *out = dst + out_offset;
#if APPLY_BIAS #if APPLY_BIAS
@ -1556,11 +1610,11 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
int saved_y1 = curr_y1; int saved_y1 = curr_y1;
#endif #endif
const __global Dtype *src0_read0 = src0 const __global Dtype *src0_read0 = src0
+ aligned_input_size * global_z // batch offset + aligned_input_size * global_z // batch offset
+ (curr_y0 - INPUT_PAD_H) * ROW_PITCH // y offset + (curr_y0 - INPUT_PAD_H) * ROW_PITCH // y offset
+ curr_x0 - INPUT_PAD_W; // x offset + curr_x0 - INPUT_PAD_W; // x offset
const __global Dtype *src0_read1 = src0 const __global Dtype *src0_read1 = src0
+ aligned_input_size * global_z // batch offset + aligned_input_size * global_z // batch offset
+ (curr_y1 - INPUT_PAD_H) * ROW_PITCH // y offset + (curr_y1 - INPUT_PAD_H) * ROW_PITCH // y offset
+ curr_x1 - INPUT_PAD_W; // x offset + curr_x1 - INPUT_PAD_W; // x offset
@ -1600,7 +1654,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
int pos = 0; int pos = 0;
LOOP(KERNEL_WIDTH, pos, LOOP(KERNEL_WIDTH, pos,
{ {
if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W) if (curr_y0 >= INPUT_PAD_H &&
curr_y0 < input_height + INPUT_PAD_H &&
curr_x0 + pos * DILATION_X >= INPUT_PAD_W &&
curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
pblockA00[pos] = src0_read0[pos * DILATION_X]; pblockA00[pos] = src0_read0[pos * DILATION_X];
else else
pblockA00[pos] = 0; pblockA00[pos] = 0;
@ -1611,7 +1668,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
pos = 0; pos = 0;
LOOP(KERNEL_WIDTH, pos, LOOP(KERNEL_WIDTH, pos,
{ {
if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W) if (curr_y1 >= INPUT_PAD_H &&
curr_y1 < input_height + INPUT_PAD_H &&
curr_x1 + pos * DILATION_X >= INPUT_PAD_W &&
curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
pblockA01[pos] = src0_read1[pos * DILATION_X]; pblockA01[pos] = src0_read1[pos * DILATION_X];
else else
pblockA01[pos] = 0; pblockA01[pos] = 0;
@ -1667,7 +1727,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
curr_y0 = saved_y0; curr_y0 = saved_y0;
curr_y1 = saved_y1; curr_y1 = saved_y1;
#endif #endif
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y); // reset to start of next slice of patch // reset to start of next slice of patch
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y); src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
} }
//while ( ++patch_depth < 1 ); //debug //while ( ++patch_depth < 1 ); //debug
@ -1675,14 +1736,14 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
// Dst resembles a cube of width x height x (output channel * batches). Each tile writes: // Dst resembles a cube of width x height x (output channel * batches). Each tile writes:
// (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used. // (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used.
int out0_offset = global_z * out_pitch_z // batch offset int out0_offset = global_z * out_pitch_z // batch offset
+ ( group_x * TILE_N ) * out_pitch_y // channel offset + ( group_x * TILE_N ) * out_pitch_y // channel offset
+ ( ( global_y * TILE_M + 0 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset + ( ( global_y * TILE_M + 0 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
+ ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT; // x offset + ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT; // x offset
int out1_offset = global_z * out_pitch_z // batch offset int out1_offset = global_z * out_pitch_z // batch offset
+ ( group_x * TILE_N ) * out_pitch_y // channel offset + ( group_x * TILE_N ) * out_pitch_y // channel offset
+ ( ( global_y * TILE_M + 1 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset + ( ( global_y * TILE_M + 1 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
+ ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT; // x offset + ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT; // x offset
#if APPLY_BIAS #if APPLY_BIAS
Dtype bias[2]; Dtype bias[2];

@ -278,19 +278,19 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf); processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf);
} }
const tuple<DNNBackend, DNNTarget> testCases[] = { const tuple<Backend, Target> testCases[] = {
#ifdef HAVE_HALIDE #ifdef HAVE_HALIDE
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU), tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL), tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
#endif #endif
#ifdef HAVE_INF_ENGINE #ifdef HAVE_INF_ENGINE
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU), tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL), tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16), tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD), tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
#endif #endif
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL), tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16) tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
}; };
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, testing::ValuesIn(testCases)); INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, testing::ValuesIn(testCases));

@ -82,7 +82,7 @@ TEST(Test_Caffe, read_googlenet)
ASSERT_FALSE(net.empty()); ASSERT_FALSE(net.empty());
} }
typedef testing::TestWithParam<tuple<bool, DNNTarget> > Reproducibility_AlexNet; typedef testing::TestWithParam<tuple<bool, Target> > Reproducibility_AlexNet;
TEST_P(Reproducibility_AlexNet, Accuracy) TEST_P(Reproducibility_AlexNet, Accuracy)
{ {
bool readFromMemory = get<0>(GetParam()); bool readFromMemory = get<0>(GetParam());
@ -179,7 +179,7 @@ TEST(Reproducibility_SSD, Accuracy)
normAssertDetections(ref, out); normAssertDetections(ref, out);
} }
typedef testing::TestWithParam<DNNTarget> Reproducibility_MobileNet_SSD; typedef testing::TestWithParam<Target> Reproducibility_MobileNet_SSD;
TEST_P(Reproducibility_MobileNet_SSD, Accuracy) TEST_P(Reproducibility_MobileNet_SSD, Accuracy)
{ {
const string proto = findDataFile("dnn/MobileNetSSD_deploy.prototxt", false); const string proto = findDataFile("dnn/MobileNetSSD_deploy.prototxt", false);
@ -234,7 +234,7 @@ TEST_P(Reproducibility_MobileNet_SSD, Accuracy)
INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_MobileNet_SSD, INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_MobileNet_SSD,
Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16)); Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16));
typedef testing::TestWithParam<DNNTarget> Reproducibility_ResNet50; typedef testing::TestWithParam<Target> Reproducibility_ResNet50;
TEST_P(Reproducibility_ResNet50, Accuracy) TEST_P(Reproducibility_ResNet50, Accuracy)
{ {
Net net = readNetFromCaffe(findDataFile("dnn/ResNet-50-deploy.prototxt", false), Net net = readNetFromCaffe(findDataFile("dnn/ResNet-50-deploy.prototxt", false),
@ -270,7 +270,7 @@ TEST_P(Reproducibility_ResNet50, Accuracy)
INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_ResNet50, INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_ResNet50,
Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16)); Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16));
typedef testing::TestWithParam<DNNTarget> Reproducibility_SqueezeNet_v1_1; typedef testing::TestWithParam<Target> Reproducibility_SqueezeNet_v1_1;
TEST_P(Reproducibility_SqueezeNet_v1_1, Accuracy) TEST_P(Reproducibility_SqueezeNet_v1_1, Accuracy)
{ {
Net net = readNetFromCaffe(findDataFile("dnn/squeezenet_v1.1.prototxt", false), Net net = readNetFromCaffe(findDataFile("dnn/squeezenet_v1.1.prototxt", false),
@ -413,7 +413,7 @@ TEST(Test_Caffe, multiple_inputs)
normAssert(out, first_image + second_image); normAssert(out, first_image + second_image);
} }
typedef testing::TestWithParam<tuple<std::string, DNNTarget> > opencv_face_detector; typedef testing::TestWithParam<tuple<std::string, Target> > opencv_face_detector;
TEST_P(opencv_face_detector, Accuracy) TEST_P(opencv_face_detector, Accuracy)
{ {
std::string proto = findDataFile("dnn/opencv_face_detector.prototxt", false); std::string proto = findDataFile("dnn/opencv_face_detector.prototxt", false);

@ -52,7 +52,7 @@ static std::string _tf(TString filename)
return (getOpenCVExtraDir() + "/dnn/") + filename; return (getOpenCVExtraDir() + "/dnn/") + filename;
} }
typedef testing::TestWithParam<DNNTarget> Reproducibility_GoogLeNet; typedef testing::TestWithParam<Target> Reproducibility_GoogLeNet;
TEST_P(Reproducibility_GoogLeNet, Batching) TEST_P(Reproducibility_GoogLeNet, Batching)
{ {
Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt", false), Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt", false),

@ -41,21 +41,21 @@ static void test(LayerParams& params, Mat& input, int backendId, int targetId)
test(input, net, backendId, targetId); test(input, net, backendId, targetId);
} }
static testing::internal::ParamGenerator<tuple<DNNBackend, DNNTarget> > dnnBackendsAndTargetsWithHalide() static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsWithHalide()
{ {
static const tuple<DNNBackend, DNNTarget> testCases[] = { static const tuple<Backend, Target> testCases[] = {
#ifdef HAVE_HALIDE #ifdef HAVE_HALIDE
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU), tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL), tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
#endif #endif
#ifdef HAVE_INF_ENGINE #ifdef HAVE_INF_ENGINE
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU), tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL), tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16), tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD), tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
#endif #endif
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL), tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16) tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
}; };
return testing::ValuesIn(testCases); return testing::ValuesIn(testCases);
} }
@ -89,7 +89,7 @@ TEST_P(Test_Halide_layers, Padding)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Convolution // Convolution
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Size, bool, tuple<DNNBackend, DNNTarget> > > Convolution; typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Size, bool, tuple<Backend, Target> > > Convolution;
TEST_P(Convolution, Accuracy) TEST_P(Convolution, Accuracy)
{ {
int inChannels = get<0>(GetParam())[0]; int inChannels = get<0>(GetParam())[0];
@ -154,7 +154,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Convolution, Combine(
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Deconvolution // Deconvolution
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Vec4i, bool, tuple<DNNBackend, DNNTarget> > > Deconvolution; typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Vec4i, bool, tuple<Backend, Target> > > Deconvolution;
TEST_P(Deconvolution, Accuracy) TEST_P(Deconvolution, Accuracy)
{ {
int inChannels = get<0>(GetParam())[0]; int inChannels = get<0>(GetParam())[0];
@ -220,7 +220,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Deconvolution, Combine(
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// LRN // LRN
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
typedef TestWithParam<tuple<Vec3i, int, Vec3f, bool, std::string, tuple<DNNBackend, DNNTarget> > > LRN; typedef TestWithParam<tuple<Vec3i, int, Vec3f, bool, std::string, tuple<Backend, Target> > > LRN;
TEST_P(LRN, Accuracy) TEST_P(LRN, Accuracy)
{ {
int inChannels = get<0>(GetParam())[0]; int inChannels = get<0>(GetParam())[0];
@ -265,7 +265,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, LRN, Combine(
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Average pooling // Average pooling
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
typedef TestWithParam<tuple<int, Size, Size, Size, tuple<DNNBackend, DNNTarget> > > AvePooling; typedef TestWithParam<tuple<int, Size, Size, Size, tuple<Backend, Target> > > AvePooling;
TEST_P(AvePooling, Accuracy) TEST_P(AvePooling, Accuracy)
{ {
int inChannels = get<0>(GetParam()); int inChannels = get<0>(GetParam());
@ -305,7 +305,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, AvePooling, Combine(
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Maximum pooling // Maximum pooling
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
typedef TestWithParam<tuple<int, Size, Size, Size, Size, tuple<DNNBackend, DNNTarget> > > MaxPooling; typedef TestWithParam<tuple<int, Size, Size, Size, Size, tuple<Backend, Target> > > MaxPooling;
TEST_P(MaxPooling, Accuracy) TEST_P(MaxPooling, Accuracy)
{ {
int inChannels = get<0>(GetParam()); int inChannels = get<0>(GetParam());
@ -344,7 +344,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, MaxPooling, Combine(
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Fully-connected // Fully-connected
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
typedef TestWithParam<tuple<int, Size, int, bool, tuple<DNNBackend, DNNTarget> > > FullyConnected; typedef TestWithParam<tuple<int, Size, int, bool, tuple<Backend, Target> > > FullyConnected;
TEST_P(FullyConnected, Accuracy) TEST_P(FullyConnected, Accuracy)
{ {
int inChannels = get<0>(GetParam()); int inChannels = get<0>(GetParam());
@ -387,7 +387,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, FullyConnected, Combine(
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// SoftMax // SoftMax
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
typedef TestWithParam<tuple<int, tuple<DNNBackend, DNNTarget> > > SoftMax; typedef TestWithParam<tuple<int, tuple<Backend, Target> > > SoftMax;
TEST_P(SoftMax, Accuracy) TEST_P(SoftMax, Accuracy)
{ {
int inChannels = get<0>(GetParam()); int inChannels = get<0>(GetParam());
@ -476,7 +476,7 @@ void testInPlaceActivation(LayerParams& lp, int backendId, int targetId)
test(input, net, backendId, targetId); test(input, net, backendId, targetId);
} }
typedef TestWithParam<tuple<bool, bool, float, tuple<DNNBackend, DNNTarget> > > BatchNorm; typedef TestWithParam<tuple<bool, bool, float, tuple<Backend, Target> > > BatchNorm;
TEST_P(BatchNorm, Accuracy) TEST_P(BatchNorm, Accuracy)
{ {
bool hasWeights = get<0>(GetParam()); bool hasWeights = get<0>(GetParam());
@ -511,7 +511,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, BatchNorm, Combine(
dnnBackendsAndTargetsWithHalide() dnnBackendsAndTargetsWithHalide()
)); ));
typedef TestWithParam<tuple<float, tuple<DNNBackend, DNNTarget> > > ReLU; typedef TestWithParam<tuple<float, tuple<Backend, Target> > > ReLU;
TEST_P(ReLU, Accuracy) TEST_P(ReLU, Accuracy)
{ {
float negativeSlope = get<0>(GetParam()); float negativeSlope = get<0>(GetParam());
@ -530,7 +530,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, ReLU, Combine(
dnnBackendsAndTargetsWithHalide() dnnBackendsAndTargetsWithHalide()
)); ));
typedef TestWithParam<tuple<std::string, tuple<DNNBackend, DNNTarget> > > NoParamActivation; typedef TestWithParam<tuple<std::string, tuple<Backend, Target> > > NoParamActivation;
TEST_P(NoParamActivation, Accuracy) TEST_P(NoParamActivation, Accuracy)
{ {
int backendId = get<0>(get<1>(GetParam())); int backendId = get<0>(get<1>(GetParam()));
@ -546,7 +546,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, NoParamActivation, Combine(
dnnBackendsAndTargetsWithHalide() dnnBackendsAndTargetsWithHalide()
)); ));
typedef TestWithParam<tuple<Vec3f, tuple<DNNBackend, DNNTarget> > > Power; typedef TestWithParam<tuple<Vec3f, tuple<Backend, Target> > > Power;
TEST_P(Power, Accuracy) TEST_P(Power, Accuracy)
{ {
float power = get<0>(GetParam())[0]; float power = get<0>(GetParam())[0];
@ -582,7 +582,7 @@ TEST_P(Test_Halide_layers, ChannelsPReLU)
testInPlaceActivation(lp, backend, target); testInPlaceActivation(lp, backend, target);
} }
typedef TestWithParam<tuple<bool, tuple<DNNBackend, DNNTarget> > > Scale; typedef TestWithParam<tuple<bool, tuple<Backend, Target> > > Scale;
TEST_P(Scale, Accuracy) TEST_P(Scale, Accuracy)
{ {
bool hasBias = get<0>(GetParam()); bool hasBias = get<0>(GetParam());
@ -616,7 +616,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Scale, Combine(
// `--- conv ----^ ^ ^ // `--- conv ----^ ^ ^
// `---- ... ------' ' // `---- ... ------' '
// `-----------------' // `-----------------'
typedef TestWithParam<tuple<Vec3i, Vec3i, tuple<DNNBackend, DNNTarget> > > Concat; typedef TestWithParam<tuple<Vec3i, Vec3i, tuple<Backend, Target> > > Concat;
TEST_P(Concat, Accuracy) TEST_P(Concat, Accuracy)
{ {
Vec3i inSize = get<0>(GetParam()); Vec3i inSize = get<0>(GetParam());
@ -682,7 +682,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Concat, Combine(
// `--- conv ----^ ^ ^ // `--- conv ----^ ^ ^
// `---- ... ------' ' // `---- ... ------' '
// `-----------------' // `-----------------'
typedef TestWithParam<tuple<Vec3i, std::string, int, bool, tuple<DNNBackend, DNNTarget> > > Eltwise; typedef TestWithParam<tuple<Vec3i, std::string, int, bool, tuple<Backend, Target> > > Eltwise;
TEST_P(Eltwise, Accuracy) TEST_P(Eltwise, Accuracy)
{ {
Vec3i inSize = get<0>(GetParam()); Vec3i inSize = get<0>(GetParam());

@ -49,15 +49,41 @@
#include "opencv2/dnn.hpp" #include "opencv2/dnn.hpp"
#include "test_common.hpp" #include "test_common.hpp"
namespace opencv_test { namespace { namespace cv {
using namespace cv::dnn; namespace dnn {
CV__DNN_EXPERIMENTAL_NS_BEGIN
static inline void PrintTo(const cv::dnn::Backend& v, std::ostream* os)
{
switch (v) {
case DNN_BACKEND_DEFAULT: *os << "DNN_BACKEND_DEFAULT"; return;
case DNN_BACKEND_HALIDE: *os << "DNN_BACKEND_HALIDE"; return;
case DNN_BACKEND_INFERENCE_ENGINE: *os << "DNN_BACKEND_INFERENCE_ENGINE"; return;
case DNN_BACKEND_OPENCV: *os << "DNN_BACKEND_OPENCV"; return;
} // don't use "default:" to emit compiler warnings
*os << "DNN_BACKEND_UNKNOWN(" << v << ")";
}
static inline void PrintTo(const cv::dnn::Target& v, std::ostream* os)
{
switch (v) {
case DNN_TARGET_CPU: *os << "DNN_TARGET_CPU"; return;
case DNN_TARGET_OPENCL: *os << "DNN_TARGET_OPENCL"; return;
case DNN_TARGET_OPENCL_FP16: *os << "DNN_TARGET_OPENCL_FP16"; return;
case DNN_TARGET_MYRIAD: *os << "DNN_TARGET_MYRIAD"; return;
} // don't use "default:" to emit compiler warnings
*os << "DNN_TARGET_UNKNOWN(" << v << ")";
}
CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE, DNN_BACKEND_OPENCV) CV__DNN_EXPERIMENTAL_NS_END
CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16, DNN_TARGET_MYRIAD) }} // namespace
static testing::internal::ParamGenerator<DNNTarget> availableDnnTargets() namespace opencv_test {
using namespace cv::dnn;
static testing::internal::ParamGenerator<Target> availableDnnTargets()
{ {
static std::vector<DNNTarget> targets; static std::vector<Target> targets;
if (targets.empty()) if (targets.empty())
{ {
targets.push_back(DNN_TARGET_CPU); targets.push_back(DNN_TARGET_CPU);
@ -69,23 +95,23 @@ static testing::internal::ParamGenerator<DNNTarget> availableDnnTargets()
return testing::ValuesIn(targets); return testing::ValuesIn(targets);
} }
static testing::internal::ParamGenerator<tuple<DNNBackend, DNNTarget> > dnnBackendsAndTargets() static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargets()
{ {
static const tuple<DNNBackend, DNNTarget> testCases[] = { static const tuple<Backend, Target> testCases[] = {
#ifdef HAVE_INF_ENGINE #ifdef HAVE_INF_ENGINE
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU), tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL), tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16), tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD), tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
#endif #endif
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_CPU), tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_CPU),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL), tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16) tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
}; };
return testing::ValuesIn(testCases); return testing::ValuesIn(testCases);
} }
class DNNTestLayer : public TestWithParam <tuple<DNNBackend, DNNTarget> > class DNNTestLayer : public TestWithParam<tuple<Backend, Target> >
{ {
public: public:
dnn::Backend backend; dnn::Backend backend;
@ -156,6 +182,5 @@ protected:
} }
}; };
}} } // namespace
#endif #endif

@ -243,7 +243,7 @@ TEST_P(Test_TensorFlow_layers, l2_normalize_3d)
runTensorFlowNet("l2_normalize_3d"); runTensorFlowNet("l2_normalize_3d");
} }
typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets; typedef testing::TestWithParam<Target> Test_TensorFlow_nets;
TEST_P(Test_TensorFlow_nets, MobileNet_SSD) TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
{ {

@ -100,7 +100,7 @@ static void runTorchNet(String prefix, int targetId = DNN_TARGET_CPU, String out
} }
} }
typedef testing::TestWithParam<DNNTarget> Test_Torch_layers; typedef testing::TestWithParam<Target> Test_Torch_layers;
TEST_P(Test_Torch_layers, run_convolution) TEST_P(Test_Torch_layers, run_convolution)
{ {
@ -208,7 +208,7 @@ TEST_P(Test_Torch_layers, net_non_spatial)
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, availableDnnTargets()); INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, availableDnnTargets());
typedef testing::TestWithParam<DNNTarget> Test_Torch_nets; typedef testing::TestWithParam<Target> Test_Torch_nets;
TEST_P(Test_Torch_nets, OpenFace_accuracy) TEST_P(Test_Torch_nets, OpenFace_accuracy)
{ {

Loading…
Cancel
Save