Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/13433/head
Alexander Alekhin 6 years ago
commit 0c16d8f6c3
  1. 2
      cmake/OpenCVDetectCUDA.cmake
  2. 2
      cmake/OpenCVUtils.cmake
  3. 6
      modules/core/include/opencv2/core/hal/intrin_avx.hpp
  4. 7
      modules/core/include/opencv2/core/hal/intrin_neon.hpp
  5. 7
      modules/core/include/opencv2/core/hal/intrin_sse.hpp
  6. 5
      modules/core/include/opencv2/core/hal/intrin_vsx.hpp
  7. 126
      modules/dnn/src/onnx/onnx_importer.cpp
  8. 6
      modules/dnn/src/op_inf_engine.cpp
  9. 3
      modules/dnn/src/op_inf_engine.hpp
  10. 1
      modules/dnn/test/test_caffe_importer.cpp
  11. 2
      modules/dnn/test/test_darknet_importer.cpp
  12. 2
      modules/dnn/test/test_halide_layers.cpp
  13. 94
      modules/dnn/test/test_layers.cpp
  14. 4
      modules/dnn/test/test_onnx_importer.cpp
  15. 4
      modules/dnn/test/test_torch_importer.cpp
  16. 25
      modules/imgproc/perf/perf_histogram.cpp
  17. 207
      modules/imgproc/src/histogram.cpp
  18. 4
      modules/videoio/src/cap_msmf.cpp
  19. 10
      platforms/ios/build_framework.py
  20. 3
      platforms/osx/build_framework.py

@ -115,7 +115,7 @@ if(CUDA_FOUND)
string(REGEX REPLACE ".*\n" "" _nvcc_out "${_nvcc_out}") #Strip leading warning messages, if any
if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
set(__cuda_arch_bin "5.3 6.2 7.0 7.5")
set(__cuda_arch_bin "5.3 6.2 7.2")
else()
set(__cuda_arch_bin "${_nvcc_out}")
string(REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}")

@ -508,7 +508,7 @@ macro(ocv_warnings_disable)
foreach(var ${_flag_vars})
foreach(warning ${_gxx_warnings})
if(NOT warning MATCHES "^-Wno-")
string(REGEX REPLACE "${warning}(=[^ ]*)?" "" ${var} "${${var}}")
string(REGEX REPLACE "(^|[ ]+)${warning}(=[^ ]*)?([ ]+|$)" " " ${var} "${${var}}")
string(REPLACE "-W" "-Wno-" warning "${warning}")
endif()
ocv_check_flag_support(${var} "${warning}" _varname "")

@ -1125,6 +1125,12 @@ inline float v_reduce_sum(const v_float32x8& a)
return _mm_cvtss_f32(s1);
}
inline double v_reduce_sum(const v_float64x4& a)
{
__m256d s0 = _mm256_hadd_pd(a.val, a.val);
return _mm_cvtsd_f64(_mm_add_pd(_v256_extract_low(s0), _v256_extract_high(s0)));
}
inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b,
const v_float32x8& c, const v_float32x8& d)
{

@ -984,6 +984,13 @@ OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32)
#if CV_SIMD128_64F
inline double v_reduce_sum(const v_float64x2& a)
{
return vgetq_lane_f64(a.val, 0) + vgetq_lane_f64(a.val, 1);
}
#endif
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
const v_float32x4& c, const v_float32x4& d)
{

@ -1456,6 +1456,13 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_uint32x4, unsigned, __m128i, epi32, OPENCV
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_int32x4, int, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32)
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_si128, _mm_castsi128_ps, ss_f32)
inline double v_reduce_sum(const v_float64x2& a)
{
double CV_DECL_ALIGNED(32) idx[2];
v_store_aligned(idx, a);
return idx[0] + idx[1];
}
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
const v_float32x4& c, const v_float32x4& d)
{

@ -716,6 +716,11 @@ OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add)
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max)
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min)
inline double v_reduce_sum(const v_float64x2& a)
{
return vec_extract(vec_add(a.val, vec_sld(a.val, a.val, 8)), 0);
}
#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \
inline scalartype v_reduce_##suffix(const _Tpvec& a) \
{ \

@ -6,6 +6,7 @@
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_PROTOBUF
@ -134,9 +135,38 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
else
CV_Error(Error::StsUnsupportedFormat, "Unsupported data type: " +
opencv_onnx::TensorProto_DataType_Name(datatype));
if (tensor_proto.dims_size() == 0)
blob.dims = 1; // To force 1-dimensional cv::Mat for scalars.
return blob;
}
void runLayer(Ptr<Layer> layer, const std::vector<Mat>& inputs,
std::vector<Mat>& outputs)
{
std::vector<MatShape> inpShapes(inputs.size());
int ddepth = CV_32F;
for (size_t i = 0; i < inputs.size(); ++i)
{
inpShapes[i] = shape(inputs[i]);
if (i > 0 && ddepth != inputs[i].depth())
CV_Error(Error::StsNotImplemented, "Mixed input data types.");
ddepth = inputs[i].depth();
}
std::vector<MatShape> outShapes, internalShapes;
layer->getMemoryShapes(inpShapes, 0, outShapes, internalShapes);
std::vector<Mat> internals(internalShapes.size());
outputs.resize(outShapes.size());
for (size_t i = 0; i < outShapes.size(); ++i)
outputs[i].create(outShapes[i], ddepth);
for (size_t i = 0; i < internalShapes.size(); ++i)
internals[i].create(internalShapes[i], ddepth);
layer->finalize(inputs, outputs);
layer->forward(inputs, outputs, internals);
}
std::map<std::string, Mat> ONNXImporter::getGraphTensors(
const opencv_onnx::GraphProto& graph_proto)
{
@ -292,6 +322,26 @@ void ONNXImporter::populateNet(Net dstNet)
CV_Assert(model_proto.has_graph());
opencv_onnx::GraphProto graph_proto = model_proto.graph();
std::map<std::string, Mat> constBlobs = getGraphTensors(graph_proto);
// List of internal blobs shapes.
std::map<std::string, MatShape> outShapes;
// Add all the inputs shapes. It includes as constant blobs as network's inputs shapes.
for (int i = 0; i < graph_proto.input_size(); ++i)
{
opencv_onnx::ValueInfoProto valueInfoProto = graph_proto.input(i);
CV_Assert(valueInfoProto.has_type());
opencv_onnx::TypeProto typeProto = valueInfoProto.type();
CV_Assert(typeProto.has_tensor_type());
opencv_onnx::TypeProto::Tensor tensor = typeProto.tensor_type();
CV_Assert(tensor.has_shape());
opencv_onnx::TensorShapeProto tensorShape = tensor.shape();
MatShape inpShape(tensorShape.dim_size());
for (int j = 0; j < inpShape.size(); ++j)
{
inpShape[j] = tensorShape.dim(j).dim_value();
}
outShapes[valueInfoProto.name()] = inpShape;
}
std::string framework_name;
if (model_proto.has_producer_name()) {
@ -301,6 +351,7 @@ void ONNXImporter::populateNet(Net dstNet)
// create map with network inputs (without const blobs)
std::map<std::string, LayerInfo> layer_id;
std::map<std::string, LayerInfo>::iterator layerId;
std::map<std::string, MatShape>::iterator shapeIt;
// fill map: push layer name, layer id and output id
std::vector<String> netInputs;
for (int j = 0; j < graph_proto.input_size(); j++)
@ -317,9 +368,9 @@ void ONNXImporter::populateNet(Net dstNet)
LayerParams layerParams;
opencv_onnx::NodeProto node_proto;
for(int i = 0; i < layersSize; i++)
for(int li = 0; li < layersSize; li++)
{
node_proto = graph_proto.node(i);
node_proto = graph_proto.node(li);
layerParams = getLayerParams(node_proto);
CV_Assert(node_proto.output_size() >= 1);
layerParams.name = node_proto.output(0);
@ -598,6 +649,65 @@ void ONNXImporter::populateNet(Net dstNet)
{
layerParams.type = "Padding";
}
else if (layer_type == "Shape")
{
CV_Assert(node_proto.input_size() == 1);
shapeIt = outShapes.find(node_proto.input(0));
CV_Assert(shapeIt != outShapes.end());
MatShape inpShape = shapeIt->second;
Mat shapeMat(inpShape.size(), 1, CV_32S);
for (int j = 0; j < inpShape.size(); ++j)
shapeMat.at<int>(j) = inpShape[j];
shapeMat.dims = 1;
constBlobs.insert(std::make_pair(layerParams.name, shapeMat));
continue;
}
else if (layer_type == "Gather")
{
CV_Assert(node_proto.input_size() == 2);
CV_Assert(layerParams.has("axis"));
Mat input = getBlob(node_proto, constBlobs, 0);
Mat indexMat = getBlob(node_proto, constBlobs, 1);
CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1);
int index = indexMat.at<int>(0);
int axis = layerParams.get<int>("axis");
std::vector<cv::Range> ranges(input.dims, Range::all());
ranges[axis] = Range(index, index + 1);
Mat out = input(ranges);
constBlobs.insert(std::make_pair(layerParams.name, out));
continue;
}
else if (layer_type == "Concat")
{
bool hasVariableInps = false;
for (int i = 0; i < node_proto.input_size(); ++i)
{
if (layer_id.find(node_proto.input(i)) != layer_id.end())
{
hasVariableInps = true;
break;
}
}
if (!hasVariableInps)
{
std::vector<Mat> inputs(node_proto.input_size()), concatenated;
for (size_t i = 0; i < inputs.size(); ++i)
{
inputs[i] = getBlob(node_proto, constBlobs, i);
}
Ptr<Layer> concat = ConcatLayer::create(layerParams);
runLayer(concat, inputs, concatenated);
CV_Assert(concatenated.size() == 1);
constBlobs.insert(std::make_pair(layerParams.name, concatenated[0]));
continue;
}
}
else
{
for (int j = 0; j < node_proto.input_size(); j++) {
@ -609,12 +719,24 @@ void ONNXImporter::populateNet(Net dstNet)
int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams);
layer_id.insert(std::make_pair(layerParams.name, LayerInfo(id, 0)));
std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes;
for (int j = 0; j < node_proto.input_size(); j++) {
layerId = layer_id.find(node_proto.input(j));
if (layerId != layer_id.end()) {
dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j);
// Collect input shapes.
shapeIt = outShapes.find(node_proto.input(j));
CV_Assert(shapeIt != outShapes.end());
layerInpShapes.push_back(shapeIt->second);
}
}
// Compute shape of output blob for this layer.
Ptr<Layer> layer = dstNet.getLayer(id);
layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes);
CV_Assert(!layerOutShapes.empty());
outShapes[layerParams.name] = layerOutShapes[0];
}
}

@ -152,6 +152,7 @@ InfEngineBackendNet::InfEngineBackendNet()
{
targetDevice = InferenceEngine::TargetDevice::eCPU;
precision = InferenceEngine::Precision::FP32;
hasNetOwner = false;
}
InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net)
@ -162,6 +163,7 @@ InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net)
outputs = net.getOutputsInfo();
layers.resize(net.layerCount()); // A hack to execute InfEngineBackendNet::layerCount correctly.
netOwner = net;
hasNetOwner = true;
}
void InfEngineBackendNet::Release() CV_NOEXCEPT
@ -178,12 +180,12 @@ void InfEngineBackendNet::setPrecision(InferenceEngine::Precision p) CV_NOEXCEPT
InferenceEngine::Precision InfEngineBackendNet::getPrecision() CV_NOEXCEPT
{
return precision;
return hasNetOwner ? netOwner.getPrecision() : precision;
}
InferenceEngine::Precision InfEngineBackendNet::getPrecision() const CV_NOEXCEPT
{
return precision;
return hasNetOwner ? netOwner.getPrecision() : precision;
}
// Assume that outputs of network is unconnected blobs.

@ -136,6 +136,9 @@ private:
InferenceEngine::InferRequest infRequest;
// In case of models from Model Optimizer we need to manage their lifetime.
InferenceEngine::CNNNetwork netOwner;
// There is no way to check if netOwner is initialized or not so we use
// a separate flag to determine if the model has been loaded from IR.
bool hasNetOwner;
std::string name;

@ -471,6 +471,7 @@ TEST(Test_Caffe, shared_weights)
net.setInput(blob_1, "input_1");
net.setInput(blob_2, "input_2");
net.setPreferableBackend(DNN_BACKEND_OPENCV);
Mat sum = net.forward();

@ -306,7 +306,7 @@ TEST_P(Test_Darknet_nets, TinyYoloVoc)
// batch size 1
testDarknetModel(config_file, weights_file, ref.rowRange(0, 2), scoreDiff, iouDiff);
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018040000
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_MYRIAD)
#endif
// batch size 2

@ -166,7 +166,7 @@ TEST_P(Deconvolution, Accuracy)
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU &&
dilation.width == 2 && dilation.height == 2)
throw SkipTestException("");
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018040000
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU &&
hasBias && group != 1)
throw SkipTestException("Test is disabled for OpenVINO 2018R4");

@ -137,7 +137,7 @@ TEST_P(Test_Caffe_layers, Convolution)
TEST_P(Test_Caffe_layers, DeConvolution)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018040000
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_CPU)
throw SkipTestException("Test is disabled for OpenVINO 2018R4");
#endif
@ -918,8 +918,11 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_DWconv_Prelu, Combine(Values(3, 6), Val
// Using Intel's Model Optimizer generate .xml and .bin files:
// ./ModelOptimizer -w /path/to/caffemodel -d /path/to/prototxt \
// -p FP32 -i -b ${batch_size} -o /path/to/output/folder
TEST(Layer_Test_Convolution_DLDT, Accuracy)
typedef testing::TestWithParam<Target> Layer_Test_Convolution_DLDT;
TEST_P(Layer_Test_Convolution_DLDT, Accuracy)
{
Target targetId = GetParam();
Net netDefault = readNet(_tf("layer_convolution.caffemodel"), _tf("layer_convolution.prototxt"));
Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
@ -930,17 +933,29 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
Mat outDefault = netDefault.forward();
net.setInput(inp);
Mat out = net.forward();
net.setPreferableTarget(targetId);
if (targetId != DNN_TARGET_MYRIAD)
{
Mat out = net.forward();
normAssert(outDefault, out);
normAssert(outDefault, out);
std::vector<int> outLayers = net.getUnconnectedOutLayers();
ASSERT_EQ(net.getLayer(outLayers[0])->name, "output_merge");
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat");
std::vector<int> outLayers = net.getUnconnectedOutLayers();
ASSERT_EQ(net.getLayer(outLayers[0])->name, "output_merge");
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat");
}
else
{
// An assertion is expected because the model is in FP32 format but
// Myriad plugin supports only FP16 models.
ASSERT_ANY_THROW(net.forward());
}
}
TEST(Layer_Test_Convolution_DLDT, setInput_uint8)
TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8)
{
Target targetId = GetParam();
Mat inp = blobFromNPY(_tf("blob.npy"));
Mat inputs[] = {Mat(inp.dims, inp.size, CV_8U), Mat()};
@ -951,12 +966,25 @@ TEST(Layer_Test_Convolution_DLDT, setInput_uint8)
for (int i = 0; i < 2; ++i)
{
Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
net.setPreferableTarget(targetId);
net.setInput(inputs[i]);
outs[i] = net.forward();
ASSERT_EQ(outs[i].type(), CV_32F);
if (targetId != DNN_TARGET_MYRIAD)
{
outs[i] = net.forward();
ASSERT_EQ(outs[i].type(), CV_32F);
}
else
{
// An assertion is expected because the model is in FP32 format but
// Myriad plugin supports only FP16 models.
ASSERT_ANY_THROW(net.forward());
}
}
normAssert(outs[0], outs[1]);
if (targetId != DNN_TARGET_MYRIAD)
normAssert(outs[0], outs[1]);
}
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Convolution_DLDT,
testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE)));
// 1. Create a .prototxt file with the following network:
// layer {
@ -980,14 +1008,17 @@ TEST(Layer_Test_Convolution_DLDT, setInput_uint8)
// net.save('/path/to/caffemodel')
//
// 3. Convert using ModelOptimizer.
typedef testing::TestWithParam<tuple<int, int> > Test_DLDT_two_inputs;
typedef testing::TestWithParam<tuple<int, int, Target> > Test_DLDT_two_inputs;
TEST_P(Test_DLDT_two_inputs, as_IR)
{
int firstInpType = get<0>(GetParam());
int secondInpType = get<1>(GetParam());
// TODO: It looks like a bug in Inference Engine.
Target targetId = get<2>(GetParam());
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018040000
if (secondInpType == CV_8U)
throw SkipTestException("");
throw SkipTestException("Test is enabled starts from OpenVINO 2018R4");
#endif
Net net = readNet(_tf("net_two_inputs.xml"), _tf("net_two_inputs.bin"));
int inpSize[] = {1, 2, 3};
@ -998,11 +1029,21 @@ TEST_P(Test_DLDT_two_inputs, as_IR)
net.setInput(firstInp, "data");
net.setInput(secondInp, "second_input");
Mat out = net.forward();
net.setPreferableTarget(targetId);
if (targetId != DNN_TARGET_MYRIAD)
{
Mat out = net.forward();
Mat ref;
cv::add(firstInp, secondInp, ref, Mat(), CV_32F);
normAssert(out, ref);
Mat ref;
cv::add(firstInp, secondInp, ref, Mat(), CV_32F);
normAssert(out, ref);
}
else
{
// An assertion is expected because the model is in FP32 format but
// Myriad plugin supports only FP16 models.
ASSERT_ANY_THROW(net.forward());
}
}
TEST_P(Test_DLDT_two_inputs, as_backend)
@ -1010,6 +1051,8 @@ TEST_P(Test_DLDT_two_inputs, as_backend)
static const float kScale = 0.5f;
static const float kScaleInv = 1.0f / kScale;
Target targetId = get<2>(GetParam());
Net net;
LayerParams lp;
lp.type = "Eltwise";
@ -1018,9 +1061,9 @@ TEST_P(Test_DLDT_two_inputs, as_backend)
int eltwiseId = net.addLayerToPrev(lp.name, lp.type, lp); // connect to a first input
net.connect(0, 1, eltwiseId, 1); // connect to a second input
int inpSize[] = {1, 2, 3};
Mat firstInp(3, &inpSize[0], get<0>(GetParam()));
Mat secondInp(3, &inpSize[0], get<1>(GetParam()));
int inpSize[] = {1, 2, 3, 4};
Mat firstInp(4, &inpSize[0], get<0>(GetParam()));
Mat secondInp(4, &inpSize[0], get<1>(GetParam()));
randu(firstInp, 0, 255);
randu(secondInp, 0, 255);
@ -1028,15 +1071,20 @@ TEST_P(Test_DLDT_two_inputs, as_backend)
net.setInput(firstInp, "data", kScale);
net.setInput(secondInp, "second_input", kScaleInv);
net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
net.setPreferableTarget(targetId);
Mat out = net.forward();
Mat ref;
addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F);
normAssert(out, ref);
// Output values are in range [0, 637.5].
double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.06 : 1e-6;
double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.3 : 1e-5;
normAssert(out, ref, "", l1, lInf);
}
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_two_inputs, Combine(
Values(CV_8U, CV_32F), Values(CV_8U, CV_32F)
Values(CV_8U, CV_32F), Values(CV_8U, CV_32F),
testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE))
));
class UnsupportedLayer : public Layer

@ -162,6 +162,10 @@ TEST_P(Test_ONNX_layers, MultyInputs)
normAssert(ref, out, "", default_l1, default_lInf);
}
TEST_P(Test_ONNX_layers, DynamicReshape)
{
testONNXModels("dynamic_reshape");
}
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());

@ -136,7 +136,7 @@ TEST_P(Test_Torch_layers, run_reshape_change_batch_size)
TEST_P(Test_Torch_layers, run_reshape)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018040000
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("Test is disabled for OpenVINO 2018R4");
#endif
@ -172,7 +172,7 @@ TEST_P(Test_Torch_layers, run_depth_concat)
TEST_P(Test_Torch_layers, run_deconv)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018040000
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("Test is disabled for OpenVINO 2018R4");
#endif

@ -116,6 +116,31 @@ PERF_TEST_P(MatSize, equalizeHist,
}
#undef MatSize
typedef TestBaseWithParam< tuple<int, int> > Dim_Cmpmethod;
PERF_TEST_P(Dim_Cmpmethod, compareHist,
testing::Combine(testing::Values(1, 3),
testing::Values(HISTCMP_CORREL, HISTCMP_CHISQR, HISTCMP_INTERSECT, HISTCMP_BHATTACHARYYA, HISTCMP_CHISQR_ALT, HISTCMP_KL_DIV))
)
{
int dims = get<0>(GetParam());
int method = get<1>(GetParam());
int histSize[] = { 2048, 128, 64 };
Mat hist1(dims, histSize, CV_32FC1);
Mat hist2(dims, histSize, CV_32FC1);
randu(hist1, 0, 256);
randu(hist2, 0, 256);
declare.in(hist1.reshape(1, 256), hist2.reshape(1, 256));
TEST_CYCLE()
{
compareHist(hist1, hist2, method);
}
SANITY_CHECK_NOTHING();
}
typedef tuple<Size, double> Sz_ClipLimit_t;
typedef TestBaseWithParam<Sz_ClipLimit_t> Sz_ClipLimit;

@ -41,6 +41,7 @@
#include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/openvx/ovx_defs.hpp"
@ -1938,10 +1939,6 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
CV_Assert( it.planes[0].isContinuous() && it.planes[1].isContinuous() );
#if CV_SSE2
bool haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
#endif
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
const float* h1 = it.planes[0].ptr<float>();
@ -1961,50 +1958,63 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
}
else if( method == CV_COMP_CORREL )
{
#if CV_SSE2
if (haveSIMD)
#if CV_SIMD_64F
v_float64 v_s1 = vx_setzero_f64();
v_float64 v_s2 = vx_setzero_f64();
v_float64 v_s11 = vx_setzero_f64();
v_float64 v_s12 = vx_setzero_f64();
v_float64 v_s22 = vx_setzero_f64();
for ( ; j <= len - v_float32::nlanes; j += v_float32::nlanes)
{
__m128d v_s1 = _mm_setzero_pd(), v_s2 = v_s1;
__m128d v_s11 = v_s1, v_s22 = v_s1, v_s12 = v_s1;
for ( ; j <= len - 4; j += 4)
{
__m128 v_a = _mm_loadu_ps(h1 + j);
__m128 v_b = _mm_loadu_ps(h2 + j);
// 0-1
__m128d v_ad = _mm_cvtps_pd(v_a);
__m128d v_bd = _mm_cvtps_pd(v_b);
v_s12 = _mm_add_pd(v_s12, _mm_mul_pd(v_ad, v_bd));
v_s11 = _mm_add_pd(v_s11, _mm_mul_pd(v_ad, v_ad));
v_s22 = _mm_add_pd(v_s22, _mm_mul_pd(v_bd, v_bd));
v_s1 = _mm_add_pd(v_s1, v_ad);
v_s2 = _mm_add_pd(v_s2, v_bd);
// 2-3
v_ad = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_a), 8)));
v_bd = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_b), 8)));
v_s12 = _mm_add_pd(v_s12, _mm_mul_pd(v_ad, v_bd));
v_s11 = _mm_add_pd(v_s11, _mm_mul_pd(v_ad, v_ad));
v_s22 = _mm_add_pd(v_s22, _mm_mul_pd(v_bd, v_bd));
v_s1 = _mm_add_pd(v_s1, v_ad);
v_s2 = _mm_add_pd(v_s2, v_bd);
}
double CV_DECL_ALIGNED(16) ar[10];
_mm_store_pd(ar, v_s12);
_mm_store_pd(ar + 2, v_s11);
_mm_store_pd(ar + 4, v_s22);
_mm_store_pd(ar + 6, v_s1);
_mm_store_pd(ar + 8, v_s2);
s12 += ar[0] + ar[1];
s11 += ar[2] + ar[3];
s22 += ar[4] + ar[5];
s1 += ar[6] + ar[7];
s2 += ar[8] + ar[9];
v_float32 v_a = vx_load(h1 + j);
v_float32 v_b = vx_load(h2 + j);
// 0-1
v_float64 v_ad = v_cvt_f64(v_a);
v_float64 v_bd = v_cvt_f64(v_b);
v_s12 = v_muladd(v_ad, v_bd, v_s12);
v_s11 = v_muladd(v_ad, v_ad, v_s11);
v_s22 = v_muladd(v_bd, v_bd, v_s22);
v_s1 += v_ad;
v_s2 += v_bd;
// 2-3
v_ad = v_cvt_f64_high(v_a);
v_bd = v_cvt_f64_high(v_b);
v_s12 = v_muladd(v_ad, v_bd, v_s12);
v_s11 = v_muladd(v_ad, v_ad, v_s11);
v_s22 = v_muladd(v_bd, v_bd, v_s22);
v_s1 += v_ad;
v_s2 += v_bd;
}
#endif
s12 += v_reduce_sum(v_s12);
s11 += v_reduce_sum(v_s11);
s22 += v_reduce_sum(v_s22);
s1 += v_reduce_sum(v_s1);
s2 += v_reduce_sum(v_s2);
#elif CV_SIMD && 0 //Disable vectorization for CV_COMP_CORREL if f64 is unsupported due to low precision
v_float32 v_s1 = vx_setzero_f32();
v_float32 v_s2 = vx_setzero_f32();
v_float32 v_s11 = vx_setzero_f32();
v_float32 v_s12 = vx_setzero_f32();
v_float32 v_s22 = vx_setzero_f32();
for (; j <= len - v_float32::nlanes; j += v_float32::nlanes)
{
v_float32 v_a = vx_load(h1 + j);
v_float32 v_b = vx_load(h2 + j);
v_s12 = v_muladd(v_a, v_b, v_s12);
v_s11 = v_muladd(v_a, v_a, v_s11);
v_s22 = v_muladd(v_b, v_b, v_s22);
v_s1 += v_a;
v_s2 += v_b;
}
s12 += v_reduce_sum(v_s12);
s11 += v_reduce_sum(v_s11);
s22 += v_reduce_sum(v_s22);
s1 += v_reduce_sum(v_s1);
s2 += v_reduce_sum(v_s2);
#endif
for( ; j < len; j++ )
{
double a = h1[j];
@ -2019,67 +2029,68 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
}
else if( method == CV_COMP_INTERSECT )
{
#if CV_NEON
float32x4_t v_result = vdupq_n_f32(0.0f);
for( ; j <= len - 4; j += 4 )
v_result = vaddq_f32(v_result, vminq_f32(vld1q_f32(h1 + j), vld1q_f32(h2 + j)));
float CV_DECL_ALIGNED(16) ar[4];
vst1q_f32(ar, v_result);
result += ar[0] + ar[1] + ar[2] + ar[3];
#elif CV_SSE2
if (haveSIMD)
#if CV_SIMD_64F
v_float64 v_result = vx_setzero_f64();
for ( ; j <= len - v_float32::nlanes; j += v_float32::nlanes)
{
__m128d v_result = _mm_setzero_pd();
for ( ; j <= len - 4; j += 4)
{
__m128 v_src = _mm_min_ps(_mm_loadu_ps(h1 + j),
_mm_loadu_ps(h2 + j));
v_result = _mm_add_pd(v_result, _mm_cvtps_pd(v_src));
v_src = _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8));
v_result = _mm_add_pd(v_result, _mm_cvtps_pd(v_src));
}
double CV_DECL_ALIGNED(16) ar[2];
_mm_store_pd(ar, v_result);
result += ar[0] + ar[1];
v_float32 v_src = v_min(vx_load(h1 + j), vx_load(h2 + j));
v_result += v_cvt_f64(v_src) + v_cvt_f64_high(v_src);
}
result += v_reduce_sum(v_result);
#elif CV_SIMD
v_float32 v_result = vx_setzero_f32();
for (; j <= len - v_float32::nlanes; j += v_float32::nlanes)
{
v_float32 v_src = v_min(vx_load(h1 + j), vx_load(h2 + j));
v_result += v_src;
}
#endif
result += v_reduce_sum(v_result);
#endif
for( ; j < len; j++ )
result += std::min(h1[j], h2[j]);
}
else if( method == CV_COMP_BHATTACHARYYA )
{
#if CV_SSE2
if (haveSIMD)
#if CV_SIMD_64F
v_float64 v_s1 = vx_setzero_f64();
v_float64 v_s2 = vx_setzero_f64();
v_float64 v_result = vx_setzero_f64();
for ( ; j <= len - v_float32::nlanes; j += v_float32::nlanes)
{
__m128d v_s1 = _mm_setzero_pd(), v_s2 = v_s1, v_result = v_s1;
for ( ; j <= len - 4; j += 4)
{
__m128 v_a = _mm_loadu_ps(h1 + j);
__m128 v_b = _mm_loadu_ps(h2 + j);
__m128d v_ad = _mm_cvtps_pd(v_a);
__m128d v_bd = _mm_cvtps_pd(v_b);
v_s1 = _mm_add_pd(v_s1, v_ad);
v_s2 = _mm_add_pd(v_s2, v_bd);
v_result = _mm_add_pd(v_result, _mm_sqrt_pd(_mm_mul_pd(v_ad, v_bd)));
v_ad = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_a), 8)));
v_bd = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_b), 8)));
v_s1 = _mm_add_pd(v_s1, v_ad);
v_s2 = _mm_add_pd(v_s2, v_bd);
v_result = _mm_add_pd(v_result, _mm_sqrt_pd(_mm_mul_pd(v_ad, v_bd)));
}
double CV_DECL_ALIGNED(16) ar[6];
_mm_store_pd(ar, v_s1);
_mm_store_pd(ar + 2, v_s2);
_mm_store_pd(ar + 4, v_result);
s1 += ar[0] + ar[1];
s2 += ar[2] + ar[3];
result += ar[4] + ar[5];
v_float32 v_a = vx_load(h1 + j);
v_float32 v_b = vx_load(h2 + j);
v_float64 v_ad = v_cvt_f64(v_a);
v_float64 v_bd = v_cvt_f64(v_b);
v_s1 += v_ad;
v_s2 += v_bd;
v_result += v_sqrt(v_ad * v_bd);
v_ad = v_cvt_f64_high(v_a);
v_bd = v_cvt_f64_high(v_b);
v_s1 += v_ad;
v_s2 += v_bd;
v_result += v_sqrt(v_ad * v_bd);
}
#endif
s1 += v_reduce_sum(v_s1);
s2 += v_reduce_sum(v_s2);
result += v_reduce_sum(v_result);
#elif CV_SIMD && 0 //Disable vectorization for CV_COMP_BHATTACHARYYA if f64 is unsupported due to low precision
v_float32 v_s1 = vx_setzero_f32();
v_float32 v_s2 = vx_setzero_f32();
v_float32 v_result = vx_setzero_f32();
for (; j <= len - v_float32::nlanes; j += v_float32::nlanes)
{
v_float32 v_a = vx_load(h1 + j);
v_float32 v_b = vx_load(h2 + j);
v_s1 += v_a;
v_s2 += v_b;
v_result += v_sqrt(v_a * v_b);
}
s1 += v_reduce_sum(v_s1);
s2 += v_reduce_sum(v_s2);
result += v_reduce_sum(v_result);
#endif
for( ; j < len; j++ )
{
double a = h1[j];

@ -99,9 +99,7 @@ static void init_MFCreateDXGIDeviceManager()
pMFCreateDXGIDeviceManager_initialized = true;
}
#endif
#if (WINVER >= 0x0602) // Available since Win 8
#pragma comment(lib, "MinCore_Downlevel")
#endif
#pragma comment(lib, "Shlwapi.lib")
#endif
#include <mferror.h>

@ -49,7 +49,7 @@ def getXCodeMajor():
raise Exception("Failed to parse Xcode version")
class Builder:
def __init__(self, opencv, contrib, dynamic, bitcodedisabled, exclude, targets):
def __init__(self, opencv, contrib, dynamic, bitcodedisabled, exclude, enablenonfree, targets):
self.opencv = os.path.abspath(opencv)
self.contrib = None
if contrib:
@ -61,6 +61,7 @@ class Builder:
self.dynamic = dynamic
self.bitcodedisabled = bitcodedisabled
self.exclude = exclude
self.enablenonfree = enablenonfree
self.targets = targets
def getBD(self, parent, t):
@ -138,7 +139,9 @@ class Builder:
"-DBUILD_SHARED_LIBS=ON",
"-DCMAKE_MACOSX_BUNDLE=ON",
"-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED=NO",
] if self.dynamic else [])
] if self.dynamic else []) + ([
"-DOPENCV_ENABLE_NONFREE=ON"
] if self.enablenonfree else [])
if len(self.exclude) > 0:
args += ["-DBUILD_opencv_world=OFF"] if not self.dynamic else []
@ -286,6 +289,7 @@ if __name__ == "__main__":
parser.add_argument('--iphoneos_deployment_target', default=os.environ.get('IPHONEOS_DEPLOYMENT_TARGET', IPHONEOS_DEPLOYMENT_TARGET), help='specify IPHONEOS_DEPLOYMENT_TARGET')
parser.add_argument('--iphoneos_archs', default='armv7,armv7s,arm64', help='select iPhoneOS target ARCHS')
parser.add_argument('--iphonesimulator_archs', default='i386,x86_64', help='select iPhoneSimulator target ARCHS')
parser.add_argument('--enable_nonfree', default=False, dest='enablenonfree', action='store_true', help='enable non-free modules (disabled by default)')
args = parser.parse_args()
os.environ['IPHONEOS_DEPLOYMENT_TARGET'] = args.iphoneos_deployment_target
@ -295,7 +299,7 @@ if __name__ == "__main__":
iphonesimulator_archs = args.iphonesimulator_archs.split(',')
print('Using iPhoneSimulator ARCHS=' + str(iphonesimulator_archs))
b = iOSBuilder(args.opencv, args.contrib, args.dynamic, args.bitcodedisabled, args.without,
b = iOSBuilder(args.opencv, args.contrib, args.dynamic, args.bitcodedisabled, args.without, args.enablenonfree,
[
(iphoneos_archs, "iPhoneOS"),
] if os.environ.get('BUILD_PRECOMMIT', None) else

@ -38,9 +38,10 @@ if __name__ == "__main__":
parser.add_argument('--opencv', metavar='DIR', default=folder, help='folder with opencv repository (default is "../.." relative to script location)')
parser.add_argument('--contrib', metavar='DIR', default=None, help='folder with opencv_contrib repository (default is "None" - build only main framework)')
parser.add_argument('--without', metavar='MODULE', default=[], action='append', help='OpenCV modules to exclude from the framework')
parser.add_argument('--enable_nonfree', default=False, dest='enablenonfree', action='store_true', help='enable non-free modules (disabled by default)')
args = parser.parse_args()
b = OSXBuilder(args.opencv, args.contrib, False, False, args.without,
b = OSXBuilder(args.opencv, args.contrib, False, False, args.without, args.enablenonfree,
[
(["x86_64"], "MacOSX")
])

Loading…
Cancel
Save