Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/13743/head
Alexander Alekhin 6 years ago
commit 665408e57f
  1. 18
      modules/dnn/src/dnn.cpp
  2. 21
      modules/dnn/src/layers/blank_layer.cpp
  3. 36
      modules/dnn/src/layers/convolution_layer.cpp
  4. 8
      modules/dnn/src/layers/pooling_layer.cpp
  5. 36
      modules/dnn/src/op_inf_engine.cpp
  6. 4
      modules/dnn/src/op_inf_engine.hpp
  7. 90
      modules/dnn/test/test_layers.cpp
  8. 34
      modules/dnn/test/test_misc.cpp
  9. 2
      modules/dnn/test/test_onnx_importer.cpp
  10. 4
      modules/dnn/test/test_tf_importer.cpp
  11. 23
      modules/imgproc/perf/perf_blur.cpp
  12. 223
      modules/imgproc/src/blend.cpp
  13. 92
      modules/imgproc/src/median_blur.cpp
  14. 3
      modules/imgproc/src/pyramids.cpp
  15. 258
      modules/imgproc/src/spatialgradient.cpp
  16. 9
      modules/imgproc/test/test_filter.cpp
  17. 9
      modules/js/src/core_bindings.cpp
  18. 11
      modules/ml/src/svm.cpp
  19. 45
      modules/ml/test/test_svmtrainauto.cpp
  20. 4
      samples/dnn/tf_text_graph_common.py

@ -148,7 +148,13 @@ private:
#else
cv::dnn::Net net;
cv::dnn::LayerParams lp;
net.addLayerToPrev("testLayer", "Identity", lp);
lp.set("kernel_size", 1);
lp.set("num_output", 1);
lp.set("bias_term", false);
lp.type = "Convolution";
lp.name = "testLayer";
lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1)));
net.addLayerToPrev(lp.name, lp.type, lp);
net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
net.setPreferableTarget(target);
static int inpDims[] = {1, 2, 3, 4};
@ -2676,7 +2682,7 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
for (auto& it : ieNet.getOutputsInfo())
{
Ptr<Layer> cvLayer(new InfEngineBackendLayer(it.second));
Ptr<Layer> cvLayer(new InfEngineBackendLayer(ieNet));
InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str());
CV_Assert(ieLayer);
@ -2871,8 +2877,7 @@ void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
std::vector<LayerPin> pins;
for (int i = 0; i < outBlobNames.size(); i++)
{
std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
pins.insert(pins.end(), lp.begin(), lp.end());
pins.push_back(impl->getPinByAlias(outBlobNames[i]));
}
impl->setUpNet(pins);
@ -2885,9 +2890,10 @@ void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
for (int i = 0; i < outBlobNames.size(); i++)
{
std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
for (int i = 0; i < lp.size(); i++)
outputBlobs[i].resize(lp.size());
for (int j = 0; j < lp.size(); j++)
{
outputBlobs[i].push_back(impl->getBlob(lp[i]));
outputBlobs[i][j] = impl->getBlob(lp[j]);
}
}
}

@ -110,14 +110,25 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
CV_Assert(!input->dims.empty());
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::SplitLayer ieLayer(name);
ieLayer.setOutputPorts({InferenceEngine::Port()});
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
if (preferableTarget == DNN_TARGET_MYRIAD)
{
ieLayer.setType("Copy");
}
else
{
ieLayer.setType("Split");
ieLayer.getParameters()["axis"] = input->dims.size() - 1;
ieLayer.getParameters()["out_sizes"] = input->dims[0];
}
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
CV_Assert(!input->dims.empty());
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Split";

@ -281,7 +281,7 @@ public:
const int outCn = blobs[0].size[0];
// prepare weightsMat where each row is aligned and has enough zero padding on the right to
// use vectorized (i.e. with intrinsics) loops without tail processing
Mat wm = blobs[0].reshape(1, outCn).clone();
Mat wm = blobs[0].reshape(1, outCn);
if( wm.step1() % VEC_ALIGN != 0 )
{
int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
@ -374,6 +374,10 @@ public:
if (!w.empty())
{
// Keep origin weights unchanged.
if (weightsMat.data == blobs[0].data)
weightsMat = weightsMat.clone();
Mat originWeights = blobs[0].reshape(1, outCn);
for (int i = 0; i < outCn; ++i)
{
@ -551,13 +555,13 @@ public:
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::ConvolutionLayer ieLayer(name);
ieLayer.setKernel({kernel.height, kernel.width});
ieLayer.setStrides({stride.height, stride.width});
ieLayer.setDilation({dilation.height, dilation.width});
ieLayer.setPaddingsBegin({pad.height, pad.width});
ieLayer.setPaddingsEnd({pad.height, pad.width});
ieLayer.setGroup(group);
ieLayer.setOutDepth(outCn);
ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width});
ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width});
ieLayer.setDilation({(size_t)dilation.height, (size_t)dilation.width});
ieLayer.setPaddingsBegin({(size_t)pad.height, (size_t)pad.width});
ieLayer.setPaddingsEnd({(size_t)pad.height, (size_t)pad.width});
ieLayer.setGroup((size_t)group);
ieLayer.setOutDepth((size_t)outCn);
ieLayer.setWeights(ieWeights);
if (ieBiases)
@ -1220,7 +1224,7 @@ public:
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
{
if (INF_ENGINE_RELEASE == 2018050000 && (adjustPad.height || adjustPad.width))
if (INF_ENGINE_RELEASE >= 2018050000 && (adjustPad.height || adjustPad.width))
return false;
const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout
@ -1783,13 +1787,13 @@ public:
InferenceEngine::Builder::DeconvolutionLayer ieLayer(name);
ieLayer.setKernel({kernel.height, kernel.width});
ieLayer.setStrides({stride.height, stride.width});
ieLayer.setDilation({dilation.height, dilation.width});
ieLayer.setPaddingsBegin({pad.height, pad.width});
ieLayer.setPaddingsEnd({pad.height, pad.width});
ieLayer.setGroup(group);
ieLayer.setOutDepth(numOutput);
ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width});
ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width});
ieLayer.setDilation({(size_t)dilation.height, (size_t)dilation.width});
ieLayer.setPaddingsBegin({(size_t)pad.height, (size_t)pad.width});
ieLayer.setPaddingsEnd({(size_t)pad.height, (size_t)pad.width});
ieLayer.setGroup((size_t)group);
ieLayer.setOutDepth((size_t)numOutput);
ieLayer.setWeights(wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW));
if (hasBias())

@ -299,10 +299,10 @@ public:
if (type == MAX || type == AVE)
{
InferenceEngine::Builder::PoolingLayer ieLayer(name);
ieLayer.setKernel({kernel.height, kernel.width});
ieLayer.setStrides({stride.height, stride.width});
ieLayer.setPaddingsBegin({pad_t, pad_l});
ieLayer.setPaddingsEnd({pad_b, pad_r});
ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width});
ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width});
ieLayer.setPaddingsBegin({(size_t)pad_t, (size_t)pad_l});
ieLayer.setPaddingsEnd({(size_t)pad_b, (size_t)pad_r});
ieLayer.setPoolingType(type == MAX ?
InferenceEngine::Builder::PoolingLayer::PoolingType::MAX :
InferenceEngine::Builder::PoolingLayer::PoolingType::AVG);

@ -82,7 +82,7 @@ void InfEngineBackendNet::connect(const std::vector<Ptr<BackendWrapper> >& input
CV_Assert(it != layers.end());
const int layerId = it->second;
for (int i = 0; i < inpWrappers.size(); ++i)
for (size_t i = 0; i < inpWrappers.size(); ++i)
{
const auto& inp = inpWrappers[i];
const std::string& inpName = inp->dataPtr->name;
@ -103,7 +103,7 @@ void InfEngineBackendNet::connect(const std::vector<Ptr<BackendWrapper> >& input
else
inpId = it->second;
netBuilder.connect(inpId, {layerId, i});
netBuilder.connect((size_t)inpId, {(size_t)layerId, i});
unconnectedLayersIds.erase(inpId);
}
CV_Assert(!outputs.empty());
@ -119,7 +119,7 @@ void InfEngineBackendNet::init(int targetId)
for (int id : unconnectedLayersIds)
{
InferenceEngine::Builder::OutputLayer outLayer("myconv1");
netBuilder.addLayer({id}, outLayer);
netBuilder.addLayer({InferenceEngine::PortInfo(id)}, outLayer);
}
cnn = InferenceEngine::CNNNetwork(InferenceEngine::Builder::convertToICNNNetwork(netBuilder.build()));
}
@ -718,19 +718,33 @@ Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
return Mat(size, CV_32F, (void*)blob->buffer());
}
InfEngineBackendLayer::InfEngineBackendLayer(const InferenceEngine::DataPtr& output_)
{
output = output_;
}
bool InfEngineBackendLayer::getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const
{
std::vector<size_t> dims = output->dims;
std::vector<int> shape(dims.rbegin(), dims.rend());
outputs.assign(1, shape);
InferenceEngine::ICNNNetwork::InputShapes inShapes = t_net.getInputShapes();
InferenceEngine::ICNNNetwork::InputShapes::iterator itr;
bool equal_flag = true;
size_t i = 0;
for (itr = inShapes.begin(); itr != inShapes.end(); ++itr)
{
InferenceEngine::SizeVector currentInShape(inputs[i].begin(), inputs[i].end());
if (itr->second != currentInShape)
{
itr->second = currentInShape;
equal_flag = false;
}
i++;
}
if (!equal_flag)
{
InferenceEngine::CNNNetwork curr_t_net(t_net);
curr_t_net.reshape(inShapes);
}
std::vector<size_t> dims = t_net.getOutputsInfo()[name]->getDims();
outputs.push_back(MatShape(dims.begin(), dims.end()));
return false;
}

@ -260,7 +260,7 @@ InferenceEngine::TBlob<int16_t>::Ptr convertFp16(const InferenceEngine::Blob::Pt
class InfEngineBackendLayer : public Layer
{
public:
InfEngineBackendLayer(const InferenceEngine::DataPtr& output);
InfEngineBackendLayer(const InferenceEngine::CNNNetwork &t_net_) : t_net(t_net_) {};
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
@ -273,7 +273,7 @@ public:
virtual bool supportBackend(int backendId) CV_OVERRIDE;
private:
InferenceEngine::DataPtr output;
InferenceEngine::CNNNetwork t_net;
};
#endif // HAVE_INF_ENGINE

@ -236,6 +236,10 @@ TEST_P(Test_Caffe_layers, Dropout)
TEST_P(Test_Caffe_layers, Concat)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE > 2018050000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
#endif
testLayerUsingCaffeModels("layer_concat");
testLayerUsingCaffeModels("layer_concat_optim", true, false);
testLayerUsingCaffeModels("layer_concat_shared_input", true, false);
@ -923,8 +927,9 @@ TEST_P(Layer_Test_Convolution_DLDT, Accuracy)
{
Target targetId = GetParam();
std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : "";
Net netDefault = readNet(_tf("layer_convolution.caffemodel"), _tf("layer_convolution.prototxt"));
Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
Net net = readNet(_tf("layer_convolution" + suffix + ".xml"), _tf("layer_convolution" + suffix + ".bin"));
Mat inp = blobFromNPY(_tf("blob.npy"));
@ -935,22 +940,15 @@ TEST_P(Layer_Test_Convolution_DLDT, Accuracy)
net.setInput(inp);
net.setPreferableTarget(targetId);
if (targetId != DNN_TARGET_MYRIAD)
{
Mat out = net.forward();
Mat out = net.forward();
normAssert(outDefault, out);
double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.4e-3 : 1e-5;
double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.8e-2 : 1e-4;
normAssert(outDefault, out, "", l1, lInf);
std::vector<int> outLayers = net.getUnconnectedOutLayers();
ASSERT_EQ(net.getLayer(outLayers[0])->name, "output_merge");
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat");
}
else
{
// An assertion is expected because the model is in FP32 format but
// Myriad plugin supports only FP16 models.
ASSERT_ANY_THROW(net.forward());
}
std::vector<int> outLayers = net.getUnconnectedOutLayers();
ASSERT_EQ(net.getLayer(outLayers[0])->name, "output");
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Convolution");
}
TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8)
@ -962,23 +960,16 @@ TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8)
randu(inputs[0], 0, 255);
inputs[0].convertTo(inputs[1], CV_32F);
std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : "";
Mat outs[2];
for (int i = 0; i < 2; ++i)
{
Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
Net net = readNet(_tf("layer_convolution" + suffix + ".xml"), _tf("layer_convolution" + suffix + ".bin"));
net.setPreferableTarget(targetId);
net.setInput(inputs[i]);
if (targetId != DNN_TARGET_MYRIAD)
{
outs[i] = net.forward();
ASSERT_EQ(outs[i].type(), CV_32F);
}
else
{
// An assertion is expected because the model is in FP32 format but
// Myriad plugin supports only FP16 models.
ASSERT_ANY_THROW(net.forward());
}
outs[i] = net.forward();
ASSERT_EQ(outs[i].type(), CV_32F);
}
if (targetId != DNN_TARGET_MYRIAD)
normAssert(outs[0], outs[1]);
@ -1008,8 +999,8 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Convolution_DLDT,
// net.save('/path/to/caffemodel')
//
// 3. Convert using ModelOptimizer.
typedef testing::TestWithParam<tuple<int, int, Target> > Test_DLDT_two_inputs;
TEST_P(Test_DLDT_two_inputs, as_IR)
typedef testing::TestWithParam<tuple<int, int, Target, std::vector<int> > > Test_DLDT_two_inputs_3dim;
TEST_P(Test_DLDT_two_inputs_3dim, as_IR)
{
int firstInpType = get<0>(GetParam());
int secondInpType = get<1>(GetParam());
@ -1020,32 +1011,39 @@ TEST_P(Test_DLDT_two_inputs, as_IR)
throw SkipTestException("Test is enabled starts from OpenVINO 2018R4");
#endif
Net net = readNet(_tf("net_two_inputs.xml"), _tf("net_two_inputs.bin"));
int inpSize[] = {1, 2, 3};
Mat firstInp(3, &inpSize[0], firstInpType);
Mat secondInp(3, &inpSize[0], secondInpType);
std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : "";
Net net = readNet(_tf("net_two_inputs" + suffix + ".xml"), _tf("net_two_inputs.bin"));
std::vector<int> inpSize = get<3>(GetParam());
Mat firstInp(3, inpSize.data(), firstInpType);
Mat secondInp(3, inpSize.data(), secondInpType);
randu(firstInp, 0, 255);
randu(secondInp, 0, 255);
net.setInput(firstInp, "data");
net.setInput(secondInp, "second_input");
net.setPreferableTarget(targetId);
if (targetId != DNN_TARGET_MYRIAD)
{
Mat out = net.forward();
Mat ref;
cv::add(firstInp, secondInp, ref, Mat(), CV_32F);
normAssert(out, ref);
}
else
{
// An assertion is expected because the model is in FP32 format but
// Myriad plugin supports only FP16 models.
ASSERT_ANY_THROW(net.forward());
}
double l1 = ((targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) &&
(firstInpType == CV_32F || secondInpType == CV_32F)) ? 0.06 : 0.0;
double lInf = ((targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) &&
(firstInpType == CV_32F || secondInpType == CV_32F)) ? 0.23 : 0.0;
Mat out = net.forward();
Mat ref;
cv::add(firstInp, secondInp, ref, Mat(), CV_32F);
normAssert(out, ref, "", l1, lInf);
}
std::vector< std::vector<int> > list_sizes{ {1, 2, 3}, {3, 2, 1}, {5, 5, 5}, {13, 7, 11} };
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_two_inputs_3dim, Combine(
Values(CV_8U, CV_32F), Values(CV_8U, CV_32F),
testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE)),
testing::ValuesIn(list_sizes)
));
typedef testing::TestWithParam<tuple<int, int, Target> > Test_DLDT_two_inputs;
TEST_P(Test_DLDT_two_inputs, as_backend)
{
static const float kScale = 0.5f;

@ -308,4 +308,38 @@ TEST_P(DeprecatedForward, CustomLayerWithFallback)
INSTANTIATE_TEST_CASE_P(/**/, DeprecatedForward, dnnBackendsAndTargets());
TEST(Net, forwardAndRetrieve)
{
std::string prototxt =
"input: \"data\"\n"
"layer {\n"
" name: \"testLayer\"\n"
" type: \"Slice\"\n"
" bottom: \"data\"\n"
" top: \"firstCopy\"\n"
" top: \"secondCopy\"\n"
" slice_param {\n"
" axis: 0\n"
" slice_point: 2\n"
" }\n"
"}";
Net net = readNetFromCaffe(&prototxt[0], prototxt.size());
net.setPreferableBackend(DNN_BACKEND_OPENCV);
Mat inp(4, 5, CV_32F);
randu(inp, -1, 1);
net.setInput(inp);
std::vector<String> outNames;
outNames.push_back("testLayer");
std::vector<std::vector<Mat> > outBlobs;
net.forward(outBlobs, outNames);
EXPECT_EQ(outBlobs.size(), 1);
EXPECT_EQ(outBlobs[0].size(), 2);
normAssert(outBlobs[0][0], inp.rowRange(0, 2), "first part");
normAssert(outBlobs[0][1], inp.rowRange(2, 4), "second part");
}
}} // namespace

@ -395,7 +395,7 @@ TEST_P(Test_ONNX_nets, DenseNet121)
TEST_P(Test_ONNX_nets, Inception_v1)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018050000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("Test is disabled for OpenVINO 2018R5");
#endif

@ -241,7 +241,7 @@ TEST_P(Test_TensorFlow_layers, unfused_flatten)
TEST_P(Test_TensorFlow_layers, leaky_relu)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018050000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
throw SkipTestException("");
#endif
@ -388,7 +388,7 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN)
TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018050000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("Unstable test case");
#endif

@ -230,4 +230,27 @@ PERF_TEST_P(Size_MatType_BorderType, blur5x5,
SANITY_CHECK(dst, 1);
}
///////////// BlendLinear ////////////////////////
PERF_TEST_P(Size_MatType, BlendLinear,
testing::Combine(
testing::Values(szVGA, sz720p, sz1080p, sz2160p),
testing::Values(CV_8UC1, CV_32FC1, CV_8UC3, CV_32FC3, CV_8UC4, CV_32FC4)
)
)
{
const Size srcSize = get<0>(GetParam());
const int srcType = get<1>(GetParam());
Mat src1(srcSize, srcType), src2(srcSize, srcType), dst(srcSize, srcType);
Mat weights1(srcSize, CV_32FC1), weights2(srcSize, CV_32FC1);
declare.in(src1, src2, WARMUP_RNG).in(weights1, weights2, WARMUP_READ).out(dst);
randu(weights1, 0, 1);
randu(weights2, 0, 1);
TEST_CYCLE() blendLinear(src1, src2, weights1, weights2, dst);
SANITY_CHECK_NOTHING();
}
} // namespace

@ -48,44 +48,44 @@
#include "opencv2/core/hal/intrin.hpp"
namespace cv {
#if CV_SIMD128
static inline v_float32x4 blend(const v_float32x4& v_src1, const v_float32x4& v_src2, const v_float32x4& v_w1, const v_float32x4& v_w2)
#if CV_SIMD
static inline v_float32 blend(const v_float32& v_src1, const v_float32& v_src2, const v_float32& v_w1, const v_float32& v_w2)
{
const v_float32x4 v_eps = v_setall_f32(1e-5f);
v_float32x4 v_denom = v_w1 + v_w2 + v_eps;
const v_float32 v_eps = vx_setall_f32(1e-5f);
v_float32 v_denom = v_w1 + v_w2 + v_eps;
return (v_src1 * v_w1 + v_src2 * v_w2) / v_denom;
}
static inline v_float32x4 blend(const v_float32x4& v_src1, const v_float32x4& v_src2, const float* w_ptr1, const float* w_ptr2, int offset)
static inline v_float32 blend(const v_float32& v_src1, const v_float32& v_src2, const float* w_ptr1, const float* w_ptr2, int offset)
{
v_float32x4 v_w1 = v_load(w_ptr1 + offset);
v_float32x4 v_w2 = v_load(w_ptr2 + offset);
v_float32 v_w1 = vx_load(w_ptr1 + offset);
v_float32 v_w2 = vx_load(w_ptr2 + offset);
return blend(v_src1, v_src2, v_w1, v_w2);
}
static inline v_uint32x4 saturate_f32_u32(const v_float32x4& vec)
static inline v_uint32 saturate_f32_u32(const v_float32& vec)
{
const v_int32x4 z = v_setzero_s32();
const v_int32x4 x = v_setall_s32(255);
const v_int32 z = vx_setzero_s32();
const v_int32 x = vx_setall_s32(255);
return v_reinterpret_as_u32(v_min(v_max(v_round(vec), z), x));
}
static inline v_uint8x16 pack_f32tou8(v_float32x4& val0, v_float32x4& val1, v_float32x4& val2, v_float32x4& val3)
static inline v_uint8 pack_f32tou8(v_float32& val0, v_float32& val1, v_float32& val2, v_float32& val3)
{
v_uint32x4 a = saturate_f32_u32(val0);
v_uint32x4 b = saturate_f32_u32(val1);
v_uint32x4 c = saturate_f32_u32(val2);
v_uint32x4 d = saturate_f32_u32(val3);
v_uint16x8 e = v_pack(a, b);
v_uint16x8 f = v_pack(c, d);
v_uint32 a = saturate_f32_u32(val0);
v_uint32 b = saturate_f32_u32(val1);
v_uint32 c = saturate_f32_u32(val2);
v_uint32 d = saturate_f32_u32(val3);
v_uint16 e = v_pack(a, b);
v_uint16 f = v_pack(c, d);
return v_pack(e, f);
}
static inline void store_pack_f32tou8(uchar* ptr, v_float32x4& val0, v_float32x4& val1, v_float32x4& val2, v_float32x4& val3)
static inline void store_pack_f32tou8(uchar* ptr, v_float32& val0, v_float32& val1, v_float32& val2, v_float32& val3)
{
v_store((ptr), pack_f32tou8(val0, val1, val2, val3));
}
static inline void expand_u8tof32(const v_uint8x16& src, v_float32x4& dst0, v_float32x4& dst1, v_float32x4& dst2, v_float32x4& dst3)
static inline void expand_u8tof32(const v_uint8& src, v_float32& dst0, v_float32& dst1, v_float32& dst2, v_float32& dst3)
{
v_uint16x8 a0, a1;
v_uint16 a0, a1;
v_expand(src, a0, a1);
v_uint32x4 b0, b1,b2,b3;
v_uint32 b0, b1,b2,b3;
v_expand(a0, b0, b1);
v_expand(a1, b2, b3);
dst0 = v_cvt_f32(v_reinterpret_as_s32(b0));
@ -93,71 +93,69 @@ static inline void expand_u8tof32(const v_uint8x16& src, v_float32x4& dst0, v_fl
dst2 = v_cvt_f32(v_reinterpret_as_s32(b2));
dst3 = v_cvt_f32(v_reinterpret_as_s32(b3));
}
static inline void load_expand_u8tof32(const uchar* ptr, v_float32x4& dst0, v_float32x4& dst1, v_float32x4& dst2, v_float32x4& dst3)
static inline void load_expand_u8tof32(const uchar* ptr, v_float32& dst0, v_float32& dst1, v_float32& dst2, v_float32& dst3)
{
v_uint8x16 a = v_load((ptr));
v_uint8 a = vx_load((ptr));
expand_u8tof32(a, dst0, dst1, dst2, dst3);
}
int blendLinearSimd128(const uchar* src1, const uchar* src2, const float* weights1, const float* weights2, uchar* dst, int x, int width, int cn);
int blendLinearSimd128(const float* src1, const float* src2, const float* weights1, const float* weights2, float* dst, int x, int width, int cn);
int blendLinearSimd128(const uchar* src1, const uchar* src2, const float* weights1, const float* weights2, uchar* dst, int x, int width, int cn)
int blendLinearSimd(const uchar* src1, const uchar* src2, const float* weights1, const float* weights2, uchar* dst, int x, int width, int cn);
int blendLinearSimd(const float* src1, const float* src2, const float* weights1, const float* weights2, float* dst, int x, int width, int cn);
int blendLinearSimd(const uchar* src1, const uchar* src2, const float* weights1, const float* weights2, uchar* dst, int x, int width, int cn)
{
int step = v_uint8x16::nlanes * cn;
int weight_step = v_uint8x16::nlanes;
switch(cn)
{
case 1:
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += weight_step)
for(int weight_offset = 0 ; x <= width - v_uint8::nlanes; x += v_uint8::nlanes, weight_offset += v_uint8::nlanes)
{
v_float32x4 v_src10, v_src11, v_src12, v_src13;
v_float32x4 v_src20, v_src21, v_src22, v_src23;
v_float32 v_src10, v_src11, v_src12, v_src13;
v_float32 v_src20, v_src21, v_src22, v_src23;
load_expand_u8tof32(src1 + x, v_src10, v_src11, v_src12, v_src13);
load_expand_u8tof32(src2 + x, v_src20, v_src21, v_src22, v_src23);
v_float32x4 v_dst0 = blend(v_src10, v_src20, weights1, weights2, weight_offset);
v_float32x4 v_dst1 = blend(v_src11, v_src21, weights1, weights2, weight_offset + 4);
v_float32x4 v_dst2 = blend(v_src12, v_src22, weights1, weights2, weight_offset + 8);
v_float32x4 v_dst3 = blend(v_src13, v_src23, weights1, weights2, weight_offset + 12);
v_float32 v_dst0 = blend(v_src10, v_src20, weights1, weights2, weight_offset);
v_float32 v_dst1 = blend(v_src11, v_src21, weights1, weights2, weight_offset + v_float32::nlanes);
v_float32 v_dst2 = blend(v_src12, v_src22, weights1, weights2, weight_offset + 2*v_float32::nlanes);
v_float32 v_dst3 = blend(v_src13, v_src23, weights1, weights2, weight_offset + 3*v_float32::nlanes);
store_pack_f32tou8(dst + x, v_dst0, v_dst1, v_dst2, v_dst3);
}
break;
case 2:
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += weight_step)
for(int weight_offset = 0 ; x <= width - 2*v_uint8::nlanes; x += 2*v_uint8::nlanes, weight_offset += v_uint8::nlanes)
{
v_uint8x16 v_src10, v_src11, v_src20, v_src21;
v_uint8 v_src10, v_src11, v_src20, v_src21;
v_load_deinterleave(src1 + x, v_src10, v_src11);
v_load_deinterleave(src2 + x, v_src20, v_src21);
v_float32x4 v_src100, v_src101, v_src102, v_src103, v_src110, v_src111, v_src112, v_src113;
v_float32x4 v_src200, v_src201, v_src202, v_src203, v_src210, v_src211, v_src212, v_src213;
v_float32 v_src100, v_src101, v_src102, v_src103, v_src110, v_src111, v_src112, v_src113;
v_float32 v_src200, v_src201, v_src202, v_src203, v_src210, v_src211, v_src212, v_src213;
expand_u8tof32(v_src10, v_src100, v_src101, v_src102, v_src103);
expand_u8tof32(v_src11, v_src110, v_src111, v_src112, v_src113);
expand_u8tof32(v_src20, v_src200, v_src201, v_src202, v_src203);
expand_u8tof32(v_src21, v_src210, v_src211, v_src212, v_src213);
v_float32x4 v_dst0 = blend(v_src100, v_src200, weights1, weights2, weight_offset);
v_float32x4 v_dst1 = blend(v_src110, v_src210, weights1, weights2, weight_offset);
v_float32x4 v_dst2 = blend(v_src101, v_src201, weights1, weights2, weight_offset + 4);
v_float32x4 v_dst3 = blend(v_src111, v_src211, weights1, weights2, weight_offset + 4);
v_float32x4 v_dst4 = blend(v_src102, v_src202, weights1, weights2, weight_offset + 8);
v_float32x4 v_dst5 = blend(v_src112, v_src212, weights1, weights2, weight_offset + 8);
v_float32x4 v_dst6 = blend(v_src103, v_src203, weights1, weights2, weight_offset + 12);
v_float32x4 v_dst7 = blend(v_src113, v_src213, weights1, weights2, weight_offset + 12);
v_uint8x16 v_dsta = pack_f32tou8(v_dst0, v_dst2, v_dst4, v_dst6);
v_uint8x16 v_dstb = pack_f32tou8(v_dst1, v_dst3, v_dst5, v_dst7);
v_float32 v_dst0 = blend(v_src100, v_src200, weights1, weights2, weight_offset);
v_float32 v_dst1 = blend(v_src110, v_src210, weights1, weights2, weight_offset);
v_float32 v_dst2 = blend(v_src101, v_src201, weights1, weights2, weight_offset + v_float32::nlanes);
v_float32 v_dst3 = blend(v_src111, v_src211, weights1, weights2, weight_offset + v_float32::nlanes);
v_float32 v_dst4 = blend(v_src102, v_src202, weights1, weights2, weight_offset + 2*v_float32::nlanes);
v_float32 v_dst5 = blend(v_src112, v_src212, weights1, weights2, weight_offset + 2*v_float32::nlanes);
v_float32 v_dst6 = blend(v_src103, v_src203, weights1, weights2, weight_offset + 3*v_float32::nlanes);
v_float32 v_dst7 = blend(v_src113, v_src213, weights1, weights2, weight_offset + 3*v_float32::nlanes);
v_uint8 v_dsta = pack_f32tou8(v_dst0, v_dst2, v_dst4, v_dst6);
v_uint8 v_dstb = pack_f32tou8(v_dst1, v_dst3, v_dst5, v_dst7);
v_store_interleave(dst + x, v_dsta, v_dstb);
}
break;
case 3:
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += weight_step)
for(int weight_offset = 0 ; x <= width - 3*v_uint8::nlanes; x += 3*v_uint8::nlanes, weight_offset += v_uint8::nlanes)
{
v_uint8x16 v_src10, v_src11, v_src12, v_src20, v_src21, v_src22;
v_uint8 v_src10, v_src11, v_src12, v_src20, v_src21, v_src22;
v_load_deinterleave(src1 + x, v_src10, v_src11, v_src12);
v_load_deinterleave(src2 + x, v_src20, v_src21, v_src22);
v_float32x4 v_src100, v_src101, v_src102, v_src103, v_src110, v_src111, v_src112, v_src113, v_src120, v_src121, v_src122, v_src123;
v_float32x4 v_src200, v_src201, v_src202, v_src203, v_src210, v_src211, v_src212, v_src213, v_src220, v_src221, v_src222, v_src223;
v_float32 v_src100, v_src101, v_src102, v_src103, v_src110, v_src111, v_src112, v_src113, v_src120, v_src121, v_src122, v_src123;
v_float32 v_src200, v_src201, v_src202, v_src203, v_src210, v_src211, v_src212, v_src213, v_src220, v_src221, v_src222, v_src223;
expand_u8tof32(v_src10, v_src100, v_src101, v_src102, v_src103);
expand_u8tof32(v_src11, v_src110, v_src111, v_src112, v_src113);
expand_u8tof32(v_src12, v_src120, v_src121, v_src122, v_src123);
@ -165,14 +163,14 @@ int blendLinearSimd128(const uchar* src1, const uchar* src2, const float* weight
expand_u8tof32(v_src21, v_src210, v_src211, v_src212, v_src213);
expand_u8tof32(v_src22, v_src220, v_src221, v_src222, v_src223);
v_float32x4 v_w10 = v_load(weights1 + weight_offset);
v_float32x4 v_w11 = v_load(weights1 + weight_offset + 4);
v_float32x4 v_w12 = v_load(weights1 + weight_offset + 8);
v_float32x4 v_w13 = v_load(weights1 + weight_offset + 12);
v_float32x4 v_w20 = v_load(weights2 + weight_offset);
v_float32x4 v_w21 = v_load(weights2 + weight_offset + 4);
v_float32x4 v_w22 = v_load(weights2 + weight_offset + 8);
v_float32x4 v_w23 = v_load(weights2 + weight_offset + 12);
v_float32 v_w10 = vx_load(weights1 + weight_offset);
v_float32 v_w11 = vx_load(weights1 + weight_offset + v_float32::nlanes);
v_float32 v_w12 = vx_load(weights1 + weight_offset + 2*v_float32::nlanes);
v_float32 v_w13 = vx_load(weights1 + weight_offset + 3*v_float32::nlanes);
v_float32 v_w20 = vx_load(weights2 + weight_offset);
v_float32 v_w21 = vx_load(weights2 + weight_offset + v_float32::nlanes);
v_float32 v_w22 = vx_load(weights2 + weight_offset + 2*v_float32::nlanes);
v_float32 v_w23 = vx_load(weights2 + weight_offset + 3*v_float32::nlanes);
v_src100 = blend(v_src100, v_src200, v_w10, v_w20);
v_src110 = blend(v_src110, v_src210, v_w10, v_w20);
v_src120 = blend(v_src120, v_src220, v_w10, v_w20);
@ -187,34 +185,36 @@ int blendLinearSimd128(const uchar* src1, const uchar* src2, const float* weight
v_src123 = blend(v_src123, v_src223, v_w13, v_w23);
v_uint8x16 v_dst0 = pack_f32tou8(v_src100, v_src101, v_src102, v_src103);
v_uint8x16 v_dst1 = pack_f32tou8(v_src110, v_src111, v_src112, v_src113);
v_uint8x16 v_dst2 = pack_f32tou8(v_src120, v_src121, v_src122, v_src123);
v_uint8 v_dst0 = pack_f32tou8(v_src100, v_src101, v_src102, v_src103);
v_uint8 v_dst1 = pack_f32tou8(v_src110, v_src111, v_src112, v_src113);
v_uint8 v_dst2 = pack_f32tou8(v_src120, v_src121, v_src122, v_src123);
v_store_interleave(dst + x, v_dst0, v_dst1, v_dst2);
}
break;
case 4:
step = v_uint8x16::nlanes;
weight_step = v_float32x4::nlanes;
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += weight_step)
for(int weight_offset = 0 ; x <= width - v_uint8::nlanes; x += v_uint8::nlanes, weight_offset += v_float32::nlanes)
{
v_float32x4 v_src10, v_src11, v_src12, v_src13, v_src14, v_src15, v_src16, v_src17;
v_float32x4 v_src20, v_src21, v_src22, v_src23, v_src24, v_src25, v_src26, v_src27;
v_float32 v_src10, v_src11, v_src12, v_src13;
v_float32 v_src20, v_src21, v_src22, v_src23;
load_expand_u8tof32(src1 + x, v_src10, v_src11, v_src12, v_src13);
load_expand_u8tof32(src2 + x, v_src20, v_src21, v_src22, v_src23);
v_transpose4x4(v_src10, v_src11, v_src12, v_src13, v_src14, v_src15, v_src16, v_src17);
v_transpose4x4(v_src20, v_src21, v_src22, v_src23, v_src24, v_src25, v_src26, v_src27);
v_float32 v_w10, v_w11, v_w12, v_w13, v_w20, v_w21, v_w22, v_w23, v_w0, v_w1;
v_w10 = vx_load(weights1 + weight_offset);
v_zip(v_w10, v_w10, v_w0, v_w1);
v_zip(v_w0, v_w0, v_w10, v_w11);
v_zip(v_w1, v_w1, v_w12, v_w13);
v_w20 = vx_load(weights2 + weight_offset);
v_zip(v_w20, v_w20, v_w0, v_w1);
v_zip(v_w0, v_w0, v_w20, v_w21);
v_zip(v_w1, v_w1, v_w22, v_w23);
v_float32 v_dst0, v_dst1, v_dst2, v_dst3;
v_dst0 = blend(v_src10, v_src20, v_w10, v_w20);
v_dst1 = blend(v_src11, v_src21, v_w11, v_w21);
v_dst2 = blend(v_src12, v_src22, v_w12, v_w22);
v_dst3 = blend(v_src13, v_src23, v_w13, v_w23);
v_float32x4 v_w1 = v_load(weights1 + weight_offset);
v_float32x4 v_w2 = v_load(weights2 + weight_offset);
v_src10 = blend(v_src14, v_src24, v_w1, v_w2);
v_src11 = blend(v_src15, v_src25, v_w1, v_w2);
v_src12 = blend(v_src16, v_src26, v_w1, v_w2);
v_src13 = blend(v_src17, v_src27, v_w1, v_w2);
v_float32x4 v_dst0, v_dst1, v_dst2, v_dst3;
v_transpose4x4(v_src10, v_src11, v_src12, v_src13, v_dst0, v_dst1, v_dst2, v_dst3);
store_pack_f32tou8(dst + x, v_dst0, v_dst1, v_dst2, v_dst3);
}
break;
@ -224,68 +224,67 @@ int blendLinearSimd128(const uchar* src1, const uchar* src2, const float* weight
return x;
}
int blendLinearSimd128(const float* src1, const float* src2, const float* weights1, const float* weights2, float* dst, int x, int width, int cn)
int blendLinearSimd(const float* src1, const float* src2, const float* weights1, const float* weights2, float* dst, int x, int width, int cn)
{
int step = v_float32x4::nlanes*cn;
switch(cn)
{
case 1:
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += v_float32x4::nlanes)
for(int weight_offset = 0 ; x <= width - v_float32::nlanes; x += v_float32::nlanes, weight_offset += v_float32::nlanes)
{
v_float32x4 v_src1 = v_load(src1 + x);
v_float32x4 v_src2 = v_load(src2 + x);
v_float32x4 v_w1 = v_load(weights1 + weight_offset);
v_float32x4 v_w2 = v_load(weights2 + weight_offset);
v_float32 v_src1 = vx_load(src1 + x);
v_float32 v_src2 = vx_load(src2 + x);
v_float32 v_w1 = vx_load(weights1 + weight_offset);
v_float32 v_w2 = vx_load(weights2 + weight_offset);
v_float32x4 v_dst = blend(v_src1, v_src2, v_w1, v_w2);
v_float32 v_dst = blend(v_src1, v_src2, v_w1, v_w2);
v_store(dst + x, v_dst);
}
break;
case 2:
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += v_float32x4::nlanes)
for(int weight_offset = 0 ; x <= width - 2*v_float32::nlanes; x += 2*v_float32::nlanes, weight_offset += v_float32::nlanes)
{
v_float32x4 v_src10, v_src11, v_src20, v_src21;
v_float32 v_src10, v_src11, v_src20, v_src21;
v_load_deinterleave(src1 + x, v_src10, v_src11);
v_load_deinterleave(src2 + x, v_src20, v_src21);
v_float32x4 v_w1 = v_load(weights1 + weight_offset);
v_float32x4 v_w2 = v_load(weights2 + weight_offset);
v_float32 v_w1 = vx_load(weights1 + weight_offset);
v_float32 v_w2 = vx_load(weights2 + weight_offset);
v_float32x4 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2);
v_float32x4 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2);
v_float32 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2);
v_float32 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2);
v_store_interleave(dst + x, v_dst0, v_dst1);
}
break;
case 3:
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += v_float32x4::nlanes)
for(int weight_offset = 0 ; x <= width - 3*v_float32::nlanes; x += 3*v_float32::nlanes, weight_offset += v_float32::nlanes)
{
v_float32x4 v_src10, v_src11, v_src12, v_src20, v_src21, v_src22;
v_float32 v_src10, v_src11, v_src12, v_src20, v_src21, v_src22;
v_load_deinterleave(src1 + x, v_src10, v_src11, v_src12);
v_load_deinterleave(src2 + x, v_src20, v_src21, v_src22);
v_float32x4 v_w1 = v_load(weights1 + weight_offset);
v_float32x4 v_w2 = v_load(weights2 + weight_offset);
v_float32 v_w1 = vx_load(weights1 + weight_offset);
v_float32 v_w2 = vx_load(weights2 + weight_offset);
v_float32x4 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2);
v_float32x4 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2);
v_float32x4 v_dst2 = blend(v_src12, v_src22, v_w1, v_w2);
v_float32 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2);
v_float32 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2);
v_float32 v_dst2 = blend(v_src12, v_src22, v_w1, v_w2);
v_store_interleave(dst + x, v_dst0, v_dst1, v_dst2);
}
break;
case 4:
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += v_float32x4::nlanes)
for(int weight_offset = 0 ; x <= width - 4*v_float32::nlanes; x += 4*v_float32::nlanes, weight_offset += v_float32::nlanes)
{
v_float32x4 v_src10, v_src11, v_src12, v_src13, v_src20, v_src21, v_src22, v_src23;
v_float32 v_src10, v_src11, v_src12, v_src13, v_src20, v_src21, v_src22, v_src23;
v_load_deinterleave(src1 + x, v_src10, v_src11, v_src12, v_src13);
v_load_deinterleave(src2 + x, v_src20, v_src21, v_src22, v_src23);
v_float32x4 v_w1 = v_load(weights1 + weight_offset);
v_float32x4 v_w2 = v_load(weights2 + weight_offset);
v_float32 v_w1 = vx_load(weights1 + weight_offset);
v_float32 v_w2 = vx_load(weights2 + weight_offset);
v_float32x4 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2);
v_float32x4 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2);
v_float32x4 v_dst2 = blend(v_src12, v_src22, v_w1, v_w2);
v_float32x4 v_dst3 = blend(v_src13, v_src23, v_w1, v_w2);
v_float32 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2);
v_float32 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2);
v_float32 v_dst2 = blend(v_src12, v_src22, v_w1, v_w2);
v_float32 v_dst3 = blend(v_src13, v_src23, v_w1, v_w2);
v_store_interleave(dst + x, v_dst0, v_dst1, v_dst2, v_dst3);
}
@ -321,8 +320,8 @@ public:
T * const dst_row = dst->ptr<T>(y);
int x = 0;
#if CV_SIMD128
x = blendLinearSimd128(src1_row, src2_row, weights1_row, weights2_row, dst_row, x, width, cn);
#if CV_SIMD
x = blendLinearSimd(src1_row, src2_row, weights1_row, weights2_row, dst_row, x, width, cn);
#endif
for ( ; x < width; ++x)

@ -110,15 +110,19 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
int cn = _dst.channels(), m = _dst.rows, r = (ksize-1)/2;
CV_Assert(cn > 0 && cn <= 4);
size_t sstep = _src.step, dstep = _dst.step;
Histogram CV_DECL_ALIGNED(16) H[4];
HT CV_DECL_ALIGNED(16) luc[4][16];
int STRIPE_SIZE = std::min( _dst.cols, 512/cn );
std::vector<HT> _h_coarse(1 * 16 * (STRIPE_SIZE + 2*r) * cn + 16);
std::vector<HT> _h_fine(16 * 16 * (STRIPE_SIZE + 2*r) * cn + 16);
HT* h_coarse = alignPtr(&_h_coarse[0], 16);
HT* h_fine = alignPtr(&_h_fine[0], 16);
#if defined(CV_SIMD_WIDTH) && CV_SIMD_WIDTH >= 16
# define CV_ALIGNMENT CV_SIMD_WIDTH
#else
# define CV_ALIGNMENT 16
#endif
std::vector<HT> _h_coarse(1 * 16 * (STRIPE_SIZE + 2*r) * cn + CV_ALIGNMENT);
std::vector<HT> _h_fine(16 * 16 * (STRIPE_SIZE + 2*r) * cn + CV_ALIGNMENT);
HT* h_coarse = alignPtr(&_h_coarse[0], CV_ALIGNMENT);
HT* h_fine = alignPtr(&_h_fine[0], CV_ALIGNMENT);
for( int x = 0; x < _dst.cols; x += STRIPE_SIZE )
{
@ -148,10 +152,14 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
const uchar* p0 = src + sstep * std::max( 0, i-r-1 );
const uchar* p1 = src + sstep * std::min( m-1, i+r );
memset( H, 0, cn*sizeof(H[0]) );
memset( luc, 0, cn*sizeof(luc[0]) );
for( c = 0; c < cn; c++ )
{
Histogram CV_DECL_ALIGNED(CV_ALIGNMENT) H;
HT CV_DECL_ALIGNED(CV_ALIGNMENT) luc[16];
memset(&H, 0, sizeof(H));
memset(luc, 0, sizeof(luc));
// Update column histograms for the entire row.
for( j = 0; j < n; j++ )
{
@ -163,21 +171,21 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
for (k = 0; k < 16; ++k)
{
#if CV_SIMD256
v_store(H[c].fine[k], v_mul_wrap(v256_load(h_fine + 16 * n*(16 * c + k)), v256_setall_u16(2 * r + 1)) + v256_load(H[c].fine[k]));
v_store(H.fine[k], v_mul_wrap(v256_load(h_fine + 16 * n*(16 * c + k)), v256_setall_u16(2 * r + 1)) + v256_load(H.fine[k]));
#elif CV_SIMD128
v_store(H[c].fine[k], v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k)), v_setall_u16((ushort)(2 * r + 1))) + v_load(H[c].fine[k]));
v_store(H[c].fine[k] + 8, v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k) + 8), v_setall_u16((ushort)(2 * r + 1))) + v_load(H[c].fine[k] + 8));
v_store(H.fine[k], v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k)), v_setall_u16((ushort)(2 * r + 1))) + v_load(H.fine[k]));
v_store(H.fine[k] + 8, v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k) + 8), v_setall_u16((ushort)(2 * r + 1))) + v_load(H.fine[k] + 8));
#else
for (int ind = 0; ind < 16; ++ind)
H[c].fine[k][ind] = (HT)(H[c].fine[k][ind] + (2 * r + 1) * h_fine[16 * n*(16 * c + k) + ind]);
H.fine[k][ind] = (HT)(H.fine[k][ind] + (2 * r + 1) * h_fine[16 * n*(16 * c + k) + ind]);
#endif
}
#if CV_SIMD256
v_uint16x16 v_coarse = v256_load(H[c].coarse);
v_uint16x16 v_coarse = v256_load(H.coarse);
#elif CV_SIMD128
v_uint16x8 v_coarsel = v_load(H[c].coarse);
v_uint16x8 v_coarseh = v_load(H[c].coarse + 8);
v_uint16x8 v_coarsel = v_load(H.coarse);
v_uint16x8 v_coarseh = v_load(H.coarse + 8);
#endif
HT* px = h_coarse + 16 * n*c;
for( j = 0; j < 2*r; ++j, px += 16 )
@ -189,7 +197,7 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
v_coarseh += v_load(px + 8);
#else
for (int ind = 0; ind < 16; ++ind)
H[c].coarse[ind] += px[ind];
H.coarse[ind] += px[ind];
#endif
}
@ -201,24 +209,24 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
px = h_coarse + 16 * (n*c + std::min(j + r, n - 1));
#if CV_SIMD256
v_coarse += v256_load(px);
v_store(H[c].coarse, v_coarse);
v_store(H.coarse, v_coarse);
#elif CV_SIMD128
v_coarsel += v_load(px);
v_coarseh += v_load(px + 8);
v_store(H[c].coarse, v_coarsel);
v_store(H[c].coarse + 8, v_coarseh);
v_store(H.coarse, v_coarsel);
v_store(H.coarse + 8, v_coarseh);
#else
for (int ind = 0; ind < 16; ++ind)
H[c].coarse[ind] += px[ind];
H.coarse[ind] += px[ind];
#endif
// Find median at coarse level
for ( k = 0; k < 16 ; ++k )
{
sum += H[c].coarse[k];
sum += H.coarse[k];
if ( sum > t )
{
sum -= H[c].coarse[k];
sum -= H.coarse[k];
break;
}
}
@ -231,7 +239,7 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
v_uint16x8 v_finel;
v_uint16x8 v_fineh;
#endif
if ( luc[c][k] <= j-r )
if ( luc[k] <= j-r )
{
#if CV_SIMD256
v_fine = v256_setzero_u16();
@ -239,10 +247,10 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
v_finel = v_setzero_u16();
v_fineh = v_setzero_u16();
#else
memset(&H[c].fine[k], 0, 16 * sizeof(HT));
memset(&H.fine[k], 0, 16 * sizeof(HT));
#endif
px = h_fine + 16 * (n*(16 * c + k) + j - r);
for (luc[c][k] = HT(j - r); luc[c][k] < MIN(j + r + 1, n); ++luc[c][k], px += 16)
for (luc[k] = HT(j - r); luc[k] < MIN(j + r + 1, n); ++luc[k], px += 16)
{
#if CV_SIMD256
v_fine += v256_load(px);
@ -251,11 +259,11 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
v_fineh += v_load(px + 8);
#else
for (int ind = 0; ind < 16; ++ind)
H[c].fine[k][ind] += px[ind];
H.fine[k][ind] += px[ind];
#endif
}
if ( luc[c][k] < j+r+1 )
if ( luc[k] < j+r+1 )
{
px = h_fine + 16 * (n*(16 * c + k) + (n - 1));
#if CV_SIMD256
@ -265,50 +273,50 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
v_fineh += v_mul_wrap(v_load(px + 8), v_setall_u16((ushort)(j + r + 1 - n)));
#else
for (int ind = 0; ind < 16; ++ind)
H[c].fine[k][ind] = (HT)(H[c].fine[k][ind] + (j + r + 1 - n) * px[ind]);
H.fine[k][ind] = (HT)(H.fine[k][ind] + (j + r + 1 - n) * px[ind]);
#endif
luc[c][k] = (HT)(j+r+1);
luc[k] = (HT)(j+r+1);
}
}
else
{
#if CV_SIMD256
v_fine = v256_load(H[c].fine[k]);
v_fine = v256_load(H.fine[k]);
#elif CV_SIMD128
v_finel = v_load(H[c].fine[k]);
v_fineh = v_load(H[c].fine[k] + 8);
v_finel = v_load(H.fine[k]);
v_fineh = v_load(H.fine[k] + 8);
#endif
px = h_fine + 16*n*(16 * c + k);
for ( ; luc[c][k] < j+r+1; ++luc[c][k] )
for ( ; luc[k] < j+r+1; ++luc[k] )
{
#if CV_SIMD256
v_fine = v_fine + v256_load(px + 16 * MIN(luc[c][k], n - 1)) - v256_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0));
v_fine = v_fine + v256_load(px + 16 * MIN(luc[k], n - 1)) - v256_load(px + 16 * MAX(luc[k] - 2 * r - 1, 0));
#elif CV_SIMD128
v_finel = v_finel + v_load(px + 16 * MIN(luc[c][k], n - 1) ) - v_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0));
v_fineh = v_fineh + v_load(px + 16 * MIN(luc[c][k], n - 1) + 8) - v_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0) + 8);
v_finel = v_finel + v_load(px + 16 * MIN(luc[k], n - 1) ) - v_load(px + 16 * MAX(luc[k] - 2 * r - 1, 0));
v_fineh = v_fineh + v_load(px + 16 * MIN(luc[k], n - 1) + 8) - v_load(px + 16 * MAX(luc[k] - 2 * r - 1, 0) + 8);
#else
for (int ind = 0; ind < 16; ++ind)
H[c].fine[k][ind] += px[16 * MIN(luc[c][k], n - 1) + ind] - px[16 * MAX(luc[c][k] - 2 * r - 1, 0) + ind];
H.fine[k][ind] += px[16 * MIN(luc[k], n - 1) + ind] - px[16 * MAX(luc[k] - 2 * r - 1, 0) + ind];
#endif
}
}
px = h_coarse + 16 * (n*c + MAX(j - r, 0));
#if CV_SIMD256
v_store(H[c].fine[k], v_fine);
v_store(H.fine[k], v_fine);
v_coarse -= v256_load(px);
#elif CV_SIMD128
v_store(H[c].fine[k], v_finel);
v_store(H[c].fine[k] + 8, v_fineh);
v_store(H.fine[k], v_finel);
v_store(H.fine[k] + 8, v_fineh);
v_coarsel -= v_load(px);
v_coarseh -= v_load(px + 8);
#else
for (int ind = 0; ind < 16; ++ind)
H[c].coarse[ind] -= px[ind];
H.coarse[ind] -= px[ind];
#endif
/* Find median in segment */
segment = H[c].fine[k];
segment = H.fine[k];
for ( b = 0; b < 16 ; b++ )
{
sum += segment[b];

@ -112,6 +112,7 @@ struct PyrDownVec_32s8u
v_rshr_pack_store<8>(dst + x, t0);
x += v_uint16::nlanes;
}
typedef int CV_DECL_ALIGNED(1) unaligned_int;
for ( ; x <= width - v_int32x4::nlanes; x += v_int32x4::nlanes)
{
v_int32x4 r0, r1, r2, r3, r4, t0;
@ -122,7 +123,7 @@ struct PyrDownVec_32s8u
r4 = v_load(row4 + x);
t0 = r0 + r4 + (r2 + r2) + ((r1 + r3 + r2) << 2);
*(int*)(dst + x) = v_reinterpret_as_s32(v_rshr_pack<8>(v_pack_u(t0, t0), v_setzero_u16())).get0();
*((unaligned_int*) (dst + x)) = v_reinterpret_as_s32(v_rshr_pack<8>(v_pack_u(t0, t0), v_setzero_u16())).get0();
}
return x;

@ -123,139 +123,125 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
}
}
// Pointer to row vectors
uchar *p_src, *c_src, *n_src; // previous, current, next row
short *c_dx, *c_dy;
int i_start = 0;
int j_start = 0;
#if CV_SIMD128
if(hasSIMD128())
#if CV_SIMD
// Characters in variable names have the following meanings:
// u: unsigned char
// s: signed int
//
// [row][column]
// m: offset -1
// n: offset 0
// p: offset 1
// Example: umn is offset -1 in row and offset 0 in column
for ( i = 0; i < H - 1; i += 2 )
{
uchar *m_src;
short *n_dx, *n_dy;
// Characters in variable names have the following meanings:
// u: unsigned char
// s: signed int
//
// [row][column]
// m: offset -1
// n: offset 0
// p: offset 1
// Example: umn is offset -1 in row and offset 0 in column
for ( i = 0; i < H - 1; i += 2 )
uchar *p_src = src.ptr<uchar>(i == 0 ? i_top : i - 1);
uchar *c_src = src.ptr<uchar>(i);
uchar *n_src = src.ptr<uchar>(i+1);
uchar *m_src = src.ptr<uchar>(i == H - 2 ? i_bottom : i + 2);
short *c_dx = dx.ptr<short>(i);
short *c_dy = dy.ptr<short>(i);
short *n_dx = dx.ptr<short>(i+1);
short *n_dy = dy.ptr<short>(i+1);
// Process rest of columns 16-column chunks at a time
for ( j = 1; j < W - v_uint8::nlanes; j += v_uint8::nlanes)
{
if ( i == 0 ) p_src = src.ptr<uchar>(i_top);
else p_src = src.ptr<uchar>(i-1);
c_src = src.ptr<uchar>(i);
n_src = src.ptr<uchar>(i+1);
if ( i == H - 2 ) m_src = src.ptr<uchar>(i_bottom);
else m_src = src.ptr<uchar>(i+2);
c_dx = dx.ptr<short>(i);
c_dy = dy.ptr<short>(i);
n_dx = dx.ptr<short>(i+1);
n_dy = dy.ptr<short>(i+1);
// Process rest of columns 16-column chunks at a time
for ( j = 1; j < W - 16; j += 16 )
{
// Load top row for 3x3 Sobel filter
v_uint8x16 v_um = v_load(&p_src[j-1]);
v_uint8x16 v_un = v_load(&p_src[j]);
v_uint8x16 v_up = v_load(&p_src[j+1]);
v_uint16x8 v_um1, v_um2, v_un1, v_un2, v_up1, v_up2;
v_expand(v_um, v_um1, v_um2);
v_expand(v_un, v_un1, v_un2);
v_expand(v_up, v_up1, v_up2);
v_int16x8 v_s1m1 = v_reinterpret_as_s16(v_um1);
v_int16x8 v_s1m2 = v_reinterpret_as_s16(v_um2);
v_int16x8 v_s1n1 = v_reinterpret_as_s16(v_un1);
v_int16x8 v_s1n2 = v_reinterpret_as_s16(v_un2);
v_int16x8 v_s1p1 = v_reinterpret_as_s16(v_up1);
v_int16x8 v_s1p2 = v_reinterpret_as_s16(v_up2);
// Load second row for 3x3 Sobel filter
v_um = v_load(&c_src[j-1]);
v_un = v_load(&c_src[j]);
v_up = v_load(&c_src[j+1]);
v_expand(v_um, v_um1, v_um2);
v_expand(v_un, v_un1, v_un2);
v_expand(v_up, v_up1, v_up2);
v_int16x8 v_s2m1 = v_reinterpret_as_s16(v_um1);
v_int16x8 v_s2m2 = v_reinterpret_as_s16(v_um2);
v_int16x8 v_s2n1 = v_reinterpret_as_s16(v_un1);
v_int16x8 v_s2n2 = v_reinterpret_as_s16(v_un2);
v_int16x8 v_s2p1 = v_reinterpret_as_s16(v_up1);
v_int16x8 v_s2p2 = v_reinterpret_as_s16(v_up2);
// Load third row for 3x3 Sobel filter
v_um = v_load(&n_src[j-1]);
v_un = v_load(&n_src[j]);
v_up = v_load(&n_src[j+1]);
v_expand(v_um, v_um1, v_um2);
v_expand(v_un, v_un1, v_un2);
v_expand(v_up, v_up1, v_up2);
v_int16x8 v_s3m1 = v_reinterpret_as_s16(v_um1);
v_int16x8 v_s3m2 = v_reinterpret_as_s16(v_um2);
v_int16x8 v_s3n1 = v_reinterpret_as_s16(v_un1);
v_int16x8 v_s3n2 = v_reinterpret_as_s16(v_un2);
v_int16x8 v_s3p1 = v_reinterpret_as_s16(v_up1);
v_int16x8 v_s3p2 = v_reinterpret_as_s16(v_up2);
// dx & dy for rows 1, 2, 3
v_int16x8 v_sdx1, v_sdy1;
spatialGradientKernel<v_int16x8>( v_sdx1, v_sdy1,
v_s1m1, v_s1n1, v_s1p1,
v_s2m1, v_s2p1,
v_s3m1, v_s3n1, v_s3p1 );
v_int16x8 v_sdx2, v_sdy2;
spatialGradientKernel<v_int16x8>( v_sdx2, v_sdy2,
v_s1m2, v_s1n2, v_s1p2,
v_s2m2, v_s2p2,
v_s3m2, v_s3n2, v_s3p2 );
// Store
v_store(&c_dx[j], v_sdx1);
v_store(&c_dx[j+8], v_sdx2);
v_store(&c_dy[j], v_sdy1);
v_store(&c_dy[j+8], v_sdy2);
// Load fourth row for 3x3 Sobel filter
v_um = v_load(&m_src[j-1]);
v_un = v_load(&m_src[j]);
v_up = v_load(&m_src[j+1]);
v_expand(v_um, v_um1, v_um2);
v_expand(v_un, v_un1, v_un2);
v_expand(v_up, v_up1, v_up2);
v_int16x8 v_s4m1 = v_reinterpret_as_s16(v_um1);
v_int16x8 v_s4m2 = v_reinterpret_as_s16(v_um2);
v_int16x8 v_s4n1 = v_reinterpret_as_s16(v_un1);
v_int16x8 v_s4n2 = v_reinterpret_as_s16(v_un2);
v_int16x8 v_s4p1 = v_reinterpret_as_s16(v_up1);
v_int16x8 v_s4p2 = v_reinterpret_as_s16(v_up2);
// dx & dy for rows 2, 3, 4
spatialGradientKernel<v_int16x8>( v_sdx1, v_sdy1,
v_s2m1, v_s2n1, v_s2p1,
v_s3m1, v_s3p1,
v_s4m1, v_s4n1, v_s4p1 );
spatialGradientKernel<v_int16x8>( v_sdx2, v_sdy2,
v_s2m2, v_s2n2, v_s2p2,
v_s3m2, v_s3p2,
v_s4m2, v_s4n2, v_s4p2 );
// Store
v_store(&n_dx[j], v_sdx1);
v_store(&n_dx[j+8], v_sdx2);
v_store(&n_dy[j], v_sdy1);
v_store(&n_dy[j+8], v_sdy2);
}
// Load top row for 3x3 Sobel filter
v_uint8 v_um = vx_load(&p_src[j-1]);
v_uint8 v_un = vx_load(&p_src[j]);
v_uint8 v_up = vx_load(&p_src[j+1]);
v_uint16 v_um1, v_um2, v_un1, v_un2, v_up1, v_up2;
v_expand(v_um, v_um1, v_um2);
v_expand(v_un, v_un1, v_un2);
v_expand(v_up, v_up1, v_up2);
v_int16 v_s1m1 = v_reinterpret_as_s16(v_um1);
v_int16 v_s1m2 = v_reinterpret_as_s16(v_um2);
v_int16 v_s1n1 = v_reinterpret_as_s16(v_un1);
v_int16 v_s1n2 = v_reinterpret_as_s16(v_un2);
v_int16 v_s1p1 = v_reinterpret_as_s16(v_up1);
v_int16 v_s1p2 = v_reinterpret_as_s16(v_up2);
// Load second row for 3x3 Sobel filter
v_um = vx_load(&c_src[j-1]);
v_un = vx_load(&c_src[j]);
v_up = vx_load(&c_src[j+1]);
v_expand(v_um, v_um1, v_um2);
v_expand(v_un, v_un1, v_un2);
v_expand(v_up, v_up1, v_up2);
v_int16 v_s2m1 = v_reinterpret_as_s16(v_um1);
v_int16 v_s2m2 = v_reinterpret_as_s16(v_um2);
v_int16 v_s2n1 = v_reinterpret_as_s16(v_un1);
v_int16 v_s2n2 = v_reinterpret_as_s16(v_un2);
v_int16 v_s2p1 = v_reinterpret_as_s16(v_up1);
v_int16 v_s2p2 = v_reinterpret_as_s16(v_up2);
// Load third row for 3x3 Sobel filter
v_um = vx_load(&n_src[j-1]);
v_un = vx_load(&n_src[j]);
v_up = vx_load(&n_src[j+1]);
v_expand(v_um, v_um1, v_um2);
v_expand(v_un, v_un1, v_un2);
v_expand(v_up, v_up1, v_up2);
v_int16 v_s3m1 = v_reinterpret_as_s16(v_um1);
v_int16 v_s3m2 = v_reinterpret_as_s16(v_um2);
v_int16 v_s3n1 = v_reinterpret_as_s16(v_un1);
v_int16 v_s3n2 = v_reinterpret_as_s16(v_un2);
v_int16 v_s3p1 = v_reinterpret_as_s16(v_up1);
v_int16 v_s3p2 = v_reinterpret_as_s16(v_up2);
// dx & dy for rows 1, 2, 3
v_int16 v_sdx1, v_sdy1;
spatialGradientKernel<v_int16>( v_sdx1, v_sdy1,
v_s1m1, v_s1n1, v_s1p1,
v_s2m1, v_s2p1,
v_s3m1, v_s3n1, v_s3p1 );
v_int16 v_sdx2, v_sdy2;
spatialGradientKernel<v_int16>( v_sdx2, v_sdy2,
v_s1m2, v_s1n2, v_s1p2,
v_s2m2, v_s2p2,
v_s3m2, v_s3n2, v_s3p2 );
// Store
v_store(&c_dx[j], v_sdx1);
v_store(&c_dx[j+v_int16::nlanes], v_sdx2);
v_store(&c_dy[j], v_sdy1);
v_store(&c_dy[j+v_int16::nlanes], v_sdy2);
// Load fourth row for 3x3 Sobel filter
v_um = vx_load(&m_src[j-1]);
v_un = vx_load(&m_src[j]);
v_up = vx_load(&m_src[j+1]);
v_expand(v_um, v_um1, v_um2);
v_expand(v_un, v_un1, v_un2);
v_expand(v_up, v_up1, v_up2);
v_int16 v_s4m1 = v_reinterpret_as_s16(v_um1);
v_int16 v_s4m2 = v_reinterpret_as_s16(v_um2);
v_int16 v_s4n1 = v_reinterpret_as_s16(v_un1);
v_int16 v_s4n2 = v_reinterpret_as_s16(v_un2);
v_int16 v_s4p1 = v_reinterpret_as_s16(v_up1);
v_int16 v_s4p2 = v_reinterpret_as_s16(v_up2);
// dx & dy for rows 2, 3, 4
spatialGradientKernel<v_int16>( v_sdx1, v_sdy1,
v_s2m1, v_s2n1, v_s2p1,
v_s3m1, v_s3p1,
v_s4m1, v_s4n1, v_s4p1 );
spatialGradientKernel<v_int16>( v_sdx2, v_sdy2,
v_s2m2, v_s2n2, v_s2p2,
v_s3m2, v_s3p2,
v_s4m2, v_s4n2, v_s4p2 );
// Store
v_store(&n_dx[j], v_sdx1);
v_store(&n_dx[j+v_int16::nlanes], v_sdx2);
v_store(&n_dy[j], v_sdy1);
v_store(&n_dy[j+v_int16::nlanes], v_sdy2);
}
}
i_start = i;
@ -265,16 +251,12 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
uchar v00, v01, v02, v10, v11, v12, v20, v21, v22;
for ( i = 0; i < H; i++ )
{
if ( i == 0 ) p_src = src.ptr<uchar>(i_top);
else p_src = src.ptr<uchar>(i-1);
c_src = src.ptr<uchar>(i);
if ( i == H - 1 ) n_src = src.ptr<uchar>(i_bottom);
else n_src = src.ptr<uchar>(i+1);
uchar *p_src = src.ptr<uchar>(i == 0 ? i_top : i - 1);
uchar *c_src = src.ptr<uchar>(i);
uchar *n_src = src.ptr<uchar>(i == H - 1 ? i_bottom : i + 1);
c_dx = dx.ptr<short>(i);
c_dy = dy.ptr<short>(i);
short *c_dx = dx.ptr<short>(i);
short *c_dy = dy.ptr<short>(i);
// Process left-most column
j = 0;

@ -2235,4 +2235,13 @@ TEST(Imgproc_Sobel, s16_regression_13506)
Sobel(src, dst, CV_16S, 0, 1, 5);
ASSERT_EQ(0.0, cvtest::norm(dst, ref, NORM_INF));
}
TEST(Imgproc_Pyrdown, issue_12961)
{
Mat src(9, 9, CV_8UC1, Scalar::all(0));
Mat dst;
cv::pyrDown(src, dst);
ASSERT_EQ(0.0, cv::norm(dst));
}
}} // namespace

@ -341,6 +341,9 @@ EMSCRIPTEN_BINDINGS(binding_utils)
register_vector<cv::Mat>("MatVector");
register_vector<cv::Rect>("RectVector");
register_vector<cv::KeyPoint>("KeyPointVector");
register_vector<cv::DMatch>("DMatchVector");
register_vector<std::vector<cv::DMatch>>("DMatchVectorVector");
emscripten::class_<cv::Mat>("Mat")
.constructor<>()
@ -494,6 +497,12 @@ EMSCRIPTEN_BINDINGS(binding_utils)
.field("response", &cv::KeyPoint::response)
.field("size", &cv::KeyPoint::size);
emscripten::value_object<cv::DMatch>("DMatch")
.field("queryIdx", &cv::DMatch::queryIdx)
.field("trainIdx", &cv::DMatch::trainIdx)
.field("imgIdx", &cv::DMatch::imgIdx)
.field("distance", &cv::DMatch::distance);
emscripten::value_array<cv::Scalar_<double>> ("Scalar")
.element(index<0>())
.element(index<1>())

@ -200,20 +200,19 @@ public:
{
int j;
calc_non_rbf_base( vcount, var_count, vecs, another, results,
-2*params.gamma, -2*params.coef0 );
2*params.gamma, 2*params.coef0 );
// TODO: speedup this
for( j = 0; j < vcount; j++ )
{
Qfloat t = results[j];
Qfloat e = std::exp(-std::abs(t));
Qfloat e = std::exp(std::abs(t));
if( t > 0 )
results[j] = (Qfloat)((1. - e)/(1. + e));
else
results[j] = (Qfloat)((e - 1.)/(e + 1.));
else
results[j] = (Qfloat)((1. - e)/(1. + e));
}
}
void calc_rbf( int vcount, int var_count, const float* vecs,
const float* another, Qfloat* results )
{
@ -1310,8 +1309,6 @@ public:
if( kernelType != SIGMOID && kernelType != POLY )
params.coef0 = 0;
else if( params.coef0 < 0 )
CV_Error( CV_StsOutOfRange, "The kernel parameter <coef0> must be positive or zero" );
if( kernelType != POLY )
params.degree = 0;

@ -88,6 +88,51 @@ void CV_SVMTrainAutoTest::run( int /*start_from*/ )
TEST(ML_SVM, trainauto) { CV_SVMTrainAutoTest test; test.safe_run(); }
TEST(ML_SVM, trainauto_sigmoid)
{
const int datasize = 100;
cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 );
cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S );
const float scale_factor = 0.5;
const float radius = 2.0;
// Populate samples with data that can be split into two concentric circles
for (int i = 0; i < datasize; i+=2)
{
const float pi = 3.14159f;
const float angle_rads = (i/datasize) * pi;
const float x = radius * cos(angle_rads);
const float y = radius * cos(angle_rads);
// Larger circle
samples.at<float>( i, 0 ) = x;
samples.at<float>( i, 1 ) = y;
responses.at<int>( i, 0 ) = 0;
// Smaller circle
samples.at<float>( i + 1, 0 ) = x * scale_factor;
samples.at<float>( i + 1, 1 ) = y * scale_factor;
responses.at<int>( i + 1, 0 ) = 1;
}
cv::Ptr<TrainData> data = TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );
cv::Ptr<SVM> svm = SVM::create();
svm->setKernel(SVM::SIGMOID);
svm->setGamma(10.0);
svm->setCoef0(-10.0);
svm->trainAuto( data, 10 ); // 2-fold cross validation.
float test_data0[2] = {radius, radius};
cv::Mat test_point0 = cv::Mat( 1, 2, CV_32FC1, test_data0 );
ASSERT_EQ(0, svm->predict( test_point0 ));
float test_data1[2] = {scale_factor * radius, scale_factor * radius};
cv::Mat test_point1 = cv::Mat( 1, 2, CV_32FC1, test_data1 );
ASSERT_EQ(1, svm->predict( test_point1 ));
}
TEST(ML_SVM, trainAuto_regression_5369)
{

@ -323,7 +323,7 @@ def writeTextGraph(modelPath, outputPath, outNodes):
for node in graph_def.node:
if node.op == 'Const':
if 'value' in node.attr:
del node.attr['value']
if 'value' in node.attr and node.attr['value'].tensor.tensor_content:
node.attr['value'].tensor.tensor_content = ''
tf.train.write_graph(graph_def, "", outputPath, as_text=True)

Loading…
Cancel
Save