From e231be86b793ffef6b70efa443a4341e35c87e15 Mon Sep 17 00:00:00 2001 From: zhaoyue-zephyrus Date: Wed, 22 Jul 2020 23:07:17 +0800 Subject: [PATCH 01/12] support flownet2 with arbitary input size revise default proto to match the filename in documentations fix a bug beautify python codes fix bug beautify codes add test samples with larger/smaller size remove unless code using bytearray without creating tmp file remove useless codes --- modules/dnn/src/layers/scale_layer.cpp | 24 +++++++++++++-------- modules/dnn/test/test_layers.cpp | 2 ++ samples/dnn/optical_flow.py | 30 ++++++++++++++++++++------ 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 27aefd9b48..9452e4e85e 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -15,6 +15,7 @@ Implementation of Scale layer. #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" +#include #include namespace cv @@ -324,7 +325,7 @@ public: std::vector &internals) const CV_OVERRIDE { CV_Assert_N(inputs.size() == 1, blobs.size() == 3); - CV_Assert_N(blobs[0].total() == 1, blobs[1].total() == total(inputs[0], 1), + CV_Assert_N(blobs[0].total() == 1, blobs[2].total() == inputs[0][1]); outputs.assign(1, inputs[0]); @@ -347,15 +348,20 @@ public: float* outData = outputs[0].ptr(); Mat data_mean_cpu = blobs[1].clone(); + Mat mean_resize = Mat(inputs[0].size[3], inputs[0].size[2], CV_32FC3); + Mat mean_3d = Mat(data_mean_cpu.size[3], data_mean_cpu.size[2], CV_32FC3, data_mean_cpu.ptr(0)); + resize(mean_3d, mean_resize, Size(inputs[0].size[3], inputs[0].size[2])); + int new_size[] = {1, mean_resize.channels(), mean_resize.cols, mean_resize.rows}; + Mat data_mean_cpu_resize = mean_resize.reshape(1, *new_size); Mat data_mean_per_channel_cpu = blobs[2].clone(); - const int numWeights = data_mean_cpu.total(); + const int numWeights = data_mean_cpu_resize.total(); CV_Assert(numWeights != 0); ++num_iter; if (num_iter <= recompute_mean) { - data_mean_cpu *= (num_iter - 1); + data_mean_cpu_resize *= (num_iter - 1); const int batch = inputs[0].size[0]; float alpha = 1.0 / batch; @@ -364,15 +370,15 @@ public: Mat inpSlice(1, numWeights, CV_32F, inpData); inpSlice = alpha * inpSlice; - add(data_mean_cpu.reshape(1, 1), inpSlice, data_mean_cpu.reshape(1, 1)); + add(data_mean_cpu_resize.reshape(1, 1), inpSlice, data_mean_cpu_resize.reshape(1, 1)); inpData += numWeights; } - data_mean_cpu *= (1.0 / num_iter); + data_mean_cpu_resize *= (1.0 / num_iter); - int newsize[] = {blobs[1].size[1], (int)blobs[1].total(2)}; - reduce(data_mean_cpu.reshape(1, 2, &newsize[0]), data_mean_per_channel_cpu, 1, REDUCE_SUM, CV_32F); + int newsize[] = {inputs[0].size[1], (int)inputs[0].total(2)}; + reduce(data_mean_cpu_resize.reshape(1, 2, &newsize[0]), data_mean_per_channel_cpu, 1, REDUCE_SUM, CV_32F); - int area = blobs[1].total(2); + int area = inputs[0].total(2); data_mean_per_channel_cpu *= (1.0 / area); } @@ -387,7 +393,7 @@ public: Mat inpSlice(1, numWeights, CV_32F, inpData); Mat outSlice(1, numWeights, CV_32F, outData); - add(inpSlice, (-1) * data_mean_cpu, outSlice); + add(inpSlice, (-1) * data_mean_cpu_resize, outSlice); inpData += numWeights; outData += numWeights; } diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 0c4ce11ca5..0170bbc92e 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -646,6 +646,8 @@ TEST_P(Test_Caffe_layers, DataAugmentation) if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); testLayerUsingCaffeModels("data_augmentation", true, false); + testLayerUsingCaffeModels("data_augmentation_2x1", true, false); + testLayerUsingCaffeModels("data_augmentation_8x6", true, false); } TEST_P(Test_Caffe_layers, Resample) diff --git a/samples/dnn/optical_flow.py b/samples/dnn/optical_flow.py index 5d0d831cf3..da2a5808f2 100644 --- a/samples/dnn/optical_flow.py +++ b/samples/dnn/optical_flow.py @@ -5,7 +5,7 @@ Original paper: https://arxiv.org/abs/1612.01925. Original repo: https://github.com/lmb-freiburg/flownet2. Download the converted .caffemodel model from https://drive.google.com/open?id=16qvE9VNmU39NttpZwZs81Ga8VYQJDaWZ -and .prototxt from https://drive.google.com/open?id=19bo6SWU2p8ZKvjXqMKiCPdK8mghwDy9b. +and .prototxt from https://drive.google.com/file/d/1RyNIUsan1ZOh2hpYIH36A-jofAvJlT6a/view?usp=sharing. Otherwise download original model from https://lmb.informatik.uni-freiburg.de/resources/binaries/flownet2/flownet2-models.tar.gz, convert .h5 model to .caffemodel and modify original .prototxt using .prototxt from link above. ''' @@ -18,7 +18,7 @@ import cv2 as cv class OpticalFlow(object): def __init__(self, proto, model, height, width): - self.net = cv.dnn.readNet(proto, model) + self.net = cv.dnn.readNetFromCaffe(proto, model) self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) self.height = height self.width = width @@ -62,9 +62,9 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description='Use this script to calculate optical flow using FlowNetv2', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-input', '-i', required=True, help='Path to input video file. Skip this argument to capture frames from a camera.') - parser.add_argument('--height', default=320, help='Input height') - parser.add_argument('--width', default=448, help='Input width') - parser.add_argument('--proto', '-p', default='FlowNet2_deploy.prototxt', help='Path to prototxt.') + parser.add_argument('--height', default=320, type=int, help='Input height') + parser.add_argument('--width', default=448, type=int, help='Input width') + parser.add_argument('--proto', '-p', default='FlowNet2_deploy_anysize.prototxt', help='Path to prototxt.') parser.add_argument('--model', '-m', default='FlowNet2_weights.caffemodel', help='Path to caffemodel.') args, _ = parser.parse_known_args() @@ -75,7 +75,25 @@ if __name__ == '__main__': cv.namedWindow(winName, cv.WINDOW_NORMAL) cap = cv.VideoCapture(args.input if args.input else 0) hasFrame, first_frame = cap.read() - opt_flow = OpticalFlow(args.proto, args.model, args.height, args.width) + + divisor = 64. + var = {} + var['ADAPTED_WIDTH'] = int(np.ceil(args.width/divisor) * divisor) + var['ADAPTED_HEIGHT'] = int(np.ceil(args.height/divisor) * divisor) + var['SCALE_WIDTH'] = args.width / float(var['ADAPTED_WIDTH']) + var['SCALE_HEIGHT'] = args.height / float(var['ADAPTED_HEIGHT']) + + config = '' + proto = open(args.proto).readlines() + for line in proto: + for key, value in var.items(): + tag = "$%s$" % key + line = line.replace(tag, str(value)) + config += line + + caffemodel = open(args.model, 'rb').read() + + opt_flow = OpticalFlow(bytearray(config.encode()), caffemodel, var['ADAPTED_HEIGHT'], var['ADAPTED_WIDTH']) while cv.waitKey(1) < 0: hasFrame, second_frame = cap.read() if not hasFrame: From f617f18e46fa556daea060d3c69307567bbc65f7 Mon Sep 17 00:00:00 2001 From: Namgoo Lee Date: Wed, 19 Aug 2020 17:39:24 +0900 Subject: [PATCH 02/12] bit-exact cuda::equalizeHist --- modules/cudaimgproc/src/cuda/hist.cu | 144 ++++++++++++++++++-- modules/cudaimgproc/src/histogram.cpp | 16 +-- modules/cudaimgproc/test/test_histogram.cpp | 82 ++++++++++- 3 files changed, 217 insertions(+), 25 deletions(-) diff --git a/modules/cudaimgproc/src/cuda/hist.cu b/modules/cudaimgproc/src/cuda/hist.cu index be13091f12..6bc5f15e6c 100644 --- a/modules/cudaimgproc/src/cuda/hist.cu +++ b/modules/cudaimgproc/src/cuda/hist.cu @@ -257,18 +257,15 @@ namespace hist namespace hist { - __constant__ int c_lut[256]; - struct EqualizeHist : unary_function { - float scale; + const uchar* lut; - __host__ EqualizeHist(float _scale) : scale(_scale) {} + __host__ EqualizeHist(const uchar* _lut) : lut(_lut) {} __device__ __forceinline__ uchar operator ()(uchar val) const { - const int lut = c_lut[val]; - return __float2int_rn(scale * lut); + return lut[val]; } }; } @@ -283,16 +280,137 @@ namespace cv { namespace cuda { namespace device namespace hist { - void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream) + void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const uchar* lut, cudaStream_t stream) { - if (stream == 0) - cudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) ); - else - cudaSafeCall( cudaMemcpyToSymbolAsync(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice, stream) ); + device::transform(src, dst, EqualizeHist(lut), WithOutMask(), stream); + } + + __global__ void buildLutKernel(int* hist, unsigned char* lut, int size) + { + __shared__ int warp_smem[8]; + __shared__ int hist_smem[8][33]; + +#define HIST_SMEM_NO_BANK_CONFLICT(idx) hist_smem[(idx) >> 5][(idx) & 31] + + const int tId = threadIdx.x; + const int warpId = threadIdx.x / 32; + const int laneId = threadIdx.x % 32; + + // Step1 - Find minimum non-zero value in hist and make it zero + HIST_SMEM_NO_BANK_CONFLICT(tId) = hist[tId]; + int nonZeroIdx = HIST_SMEM_NO_BANK_CONFLICT(tId) > 0 ? tId : 256; + + __syncthreads(); + + for (int delta = 16; delta > 0; delta /= 2) + { +#if __CUDACC_VER_MAJOR__ >= 9 + int shflVal = __shfl_down_sync(0xFFFFFFFF, nonZeroIdx, delta); +#else + int shflVal = __shfl_down(nonZeroIdx, delta); +#endif + if (laneId < delta) + nonZeroIdx = min(nonZeroIdx, shflVal); + } + + if (laneId == 0) + warp_smem[warpId] = nonZeroIdx; - const float scale = 255.0f / (src.cols * src.rows); + __syncthreads(); + + if (tId < 8) + { + int warpVal = warp_smem[tId]; + for (int delta = 4; delta > 0; delta /= 2) + { +#if __CUDACC_VER_MAJOR__ >= 9 + int shflVal = __shfl_down_sync(0x000000FF, warpVal, delta); +#else + int shflVal = __shfl_down(warpVal, delta); +#endif + if (tId < delta) + warpVal = min(warpVal, shflVal); + } + if (tId == 0) + { + warp_smem[0] = warpVal; // warpVal - minimum index + } + } + + __syncthreads(); + + const int minNonZeroIdx = warp_smem[0]; + const int minNonZeroVal = HIST_SMEM_NO_BANK_CONFLICT(minNonZeroIdx); + if (minNonZeroVal == size) + { + // This is a special case: the whole image has the same color + + lut[tId] = 0; + if (tId == minNonZeroIdx) + lut[tId] = minNonZeroIdx; + return; + } - device::transform(src, dst, EqualizeHist(scale), WithOutMask(), stream); + if (tId == 0) + HIST_SMEM_NO_BANK_CONFLICT(minNonZeroIdx) = 0; + + __syncthreads(); + + // Step2 - Inclusive sum + // Algorithm from GPU Gems 3 (A Work-Efficient Parallel Scan) + // https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda + + // Step2 Phase1 - The Up-Sweep Phase + for (int delta = 1; delta < 256; delta *= 2) + { + if (tId < 128 / delta) + { + int idx = 255 - 2 * tId * delta; + HIST_SMEM_NO_BANK_CONFLICT(idx) += HIST_SMEM_NO_BANK_CONFLICT(idx - delta); + } + __syncthreads(); + } + + // Step2 Phase2 - The Down-Sweep Phase + if (tId == 0) + HIST_SMEM_NO_BANK_CONFLICT(255) = 0; + + for (int delta = 128; delta >= 1; delta /= 2) + { + if (tId < 128 / delta) + { + int rootIdx = 255 - tId * delta * 2; + int leftIdx = rootIdx - delta; + int tmp = HIST_SMEM_NO_BANK_CONFLICT(leftIdx); + HIST_SMEM_NO_BANK_CONFLICT(leftIdx) = HIST_SMEM_NO_BANK_CONFLICT(rootIdx); + HIST_SMEM_NO_BANK_CONFLICT(rootIdx) += tmp; + } + __syncthreads(); + } + + // Step2 Phase3 - Convert exclusive sum to inclusive sum + int tmp = HIST_SMEM_NO_BANK_CONFLICT(tId); + __syncthreads(); + if (tId >= 1) + HIST_SMEM_NO_BANK_CONFLICT(tId - 1) = tmp; + if (tId == 255) + HIST_SMEM_NO_BANK_CONFLICT(tId) = tmp + hist[tId]; + __syncthreads(); + + // Step3 - Scale values to build lut + + lut[tId] = saturate_cast(HIST_SMEM_NO_BANK_CONFLICT(tId) * (255.0f / (size - minNonZeroVal))); + +#undef HIST_SMEM_NO_BANK_CONFLICT + } + + void buildLut(PtrStepSzi hist, PtrStepSzb lut, int size, cudaStream_t stream) + { + buildLutKernel<<<1, 256, 0, stream>>>(hist.data, lut.data, size); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); } } diff --git a/modules/cudaimgproc/src/histogram.cpp b/modules/cudaimgproc/src/histogram.cpp index e616c5a2e9..c252abc451 100644 --- a/modules/cudaimgproc/src/histogram.cpp +++ b/modules/cudaimgproc/src/histogram.cpp @@ -102,7 +102,8 @@ void cv::cuda::calcHist(InputArray _src, InputArray _mask, OutputArray _hist, St namespace hist { - void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream); + void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const uchar* lut, cudaStream_t stream); + void buildLut(PtrStepSzi hist, PtrStepSzb lut, int size, cudaStream_t stream); } void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, Stream& _stream) @@ -114,26 +115,21 @@ void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, Stream& _stream) _dst.create(src.size(), src.type()); GpuMat dst = _dst.getGpuMat(); - int intBufSize; - nppSafeCall( nppsIntegralGetBufferSize_32s(256, &intBufSize) ); - - size_t bufSize = intBufSize + 2 * 256 * sizeof(int); + size_t bufSize = 256 * sizeof(int) + 256 * sizeof(uchar); BufferPool pool(_stream); GpuMat buf = pool.getBuffer(1, static_cast(bufSize), CV_8UC1); GpuMat hist(1, 256, CV_32SC1, buf.data); - GpuMat lut(1, 256, CV_32SC1, buf.data + 256 * sizeof(int)); - GpuMat intBuf(1, intBufSize, CV_8UC1, buf.data + 2 * 256 * sizeof(int)); + GpuMat lut(1, 256, CV_8UC1, buf.data + 256 * sizeof(int)); cuda::calcHist(src, hist, _stream); cudaStream_t stream = StreamAccessor::getStream(_stream); - NppStreamHandler h(stream); - nppSafeCall( nppsIntegral_32s(hist.ptr(), lut.ptr(), 256, intBuf.ptr()) ); + hist::buildLut(hist, lut, src.rows * src.cols, stream); - hist::equalizeHist(src, dst, lut.ptr(), stream); + hist::equalizeHist(src, dst, lut.data, stream); } //////////////////////////////////////////////////////////////////////// diff --git a/modules/cudaimgproc/test/test_histogram.cpp b/modules/cudaimgproc/test/test_histogram.cpp index 6af8eb2135..ac7c5e209f 100644 --- a/modules/cudaimgproc/test/test_histogram.cpp +++ b/modules/cudaimgproc/test/test_histogram.cpp @@ -208,7 +208,7 @@ CUDA_TEST_P(EqualizeHist, Async) cv::Mat dst_gold; cv::equalizeHist(src, dst_gold); - EXPECT_MAT_NEAR(dst_gold, dst, 3.0); + EXPECT_MAT_NEAR(dst_gold, dst, 0.0); } CUDA_TEST_P(EqualizeHist, Accuracy) @@ -221,13 +221,91 @@ CUDA_TEST_P(EqualizeHist, Accuracy) cv::Mat dst_gold; cv::equalizeHist(src, dst_gold); - EXPECT_MAT_NEAR(dst_gold, dst, 3.0); + EXPECT_MAT_NEAR(dst_gold, dst, 0.0); } INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, EqualizeHist, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES)); +TEST(EqualizeHistIssue, Issue18035) +{ + std::vector imgPaths; + imgPaths.push_back(std::string(cvtest::TS::ptr()->get_data_path()) + "../cv/shared/3MP.png"); + imgPaths.push_back(std::string(cvtest::TS::ptr()->get_data_path()) + "../cv/shared/5MP.png"); + imgPaths.push_back(std::string(cvtest::TS::ptr()->get_data_path()) + "../cv/shared/airplane.png"); + imgPaths.push_back(std::string(cvtest::TS::ptr()->get_data_path()) + "../cv/shared/baboon.png"); + imgPaths.push_back(std::string(cvtest::TS::ptr()->get_data_path()) + "../cv/shared/box.png"); + imgPaths.push_back(std::string(cvtest::TS::ptr()->get_data_path()) + "../cv/shared/box_in_scene.png"); + imgPaths.push_back(std::string(cvtest::TS::ptr()->get_data_path()) + "../cv/shared/fruits.png"); + imgPaths.push_back(std::string(cvtest::TS::ptr()->get_data_path()) + "../cv/shared/fruits_ecc.png"); + imgPaths.push_back(std::string(cvtest::TS::ptr()->get_data_path()) + "../cv/shared/graffiti.png"); + imgPaths.push_back(std::string(cvtest::TS::ptr()->get_data_path()) + "../cv/shared/lena.png"); + + for (int i = 0; i < imgPaths.size(); ++i) + { + std::string imgPath = imgPaths[i]; + cv::Mat src = cv::imread(imgPath, cv::IMREAD_GRAYSCALE); + src = src / 30; + + cv::cuda::GpuMat d_src, dst; + d_src.upload(src); + cv::cuda::equalizeHist(d_src, dst); + + cv::Mat dst_gold; + cv::equalizeHist(src, dst_gold); + + EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + } +} + +PARAM_TEST_CASE(EqualizeHistExtreme, cv::cuda::DeviceInfo, cv::Size, int) +{ + cv::cuda::DeviceInfo devInfo; + cv::Size size; + int val; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + val = GET_PARAM(2); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(EqualizeHistExtreme, Case1) +{ + cv::Mat src(size, CV_8UC1, val); + + cv::cuda::GpuMat dst; + cv::cuda::equalizeHist(loadMat(src), dst); + + cv::Mat dst_gold; + cv::equalizeHist(src, dst_gold); + + EXPECT_MAT_NEAR(dst_gold, dst, 0.0); +} + +CUDA_TEST_P(EqualizeHistExtreme, Case2) +{ + cv::Mat src = randomMat(size, CV_8UC1, val); + + cv::cuda::GpuMat dst; + cv::cuda::equalizeHist(loadMat(src), dst); + + cv::Mat dst_gold; + cv::equalizeHist(src, dst_gold); + + EXPECT_MAT_NEAR(dst_gold, dst, 0.0); +} + +INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, EqualizeHistExtreme, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Range(0, 256))); + /////////////////////////////////////////////////////////////////////////////////////////////////////// // CLAHE From a7ffcaab28aa6aec0f9e071833b15f14cca05ae3 Mon Sep 17 00:00:00 2001 From: Namgoo Lee Date: Fri, 21 Aug 2020 23:52:30 +0900 Subject: [PATCH 03/12] Remove compiler warnings --- modules/cudaimgproc/test/test_histogram.cpp | 2 +- modules/cudaoptflow/src/cuda/tvl1flow.cu | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/cudaimgproc/test/test_histogram.cpp b/modules/cudaimgproc/test/test_histogram.cpp index ac7c5e209f..a92eefde8c 100644 --- a/modules/cudaimgproc/test/test_histogram.cpp +++ b/modules/cudaimgproc/test/test_histogram.cpp @@ -242,7 +242,7 @@ TEST(EqualizeHistIssue, Issue18035) imgPaths.push_back(std::string(cvtest::TS::ptr()->get_data_path()) + "../cv/shared/graffiti.png"); imgPaths.push_back(std::string(cvtest::TS::ptr()->get_data_path()) + "../cv/shared/lena.png"); - for (int i = 0; i < imgPaths.size(); ++i) + for (size_t i = 0; i < imgPaths.size(); ++i) { std::string imgPath = imgPaths[i]; cv::Mat src = cv::imread(imgPath, cv::IMREAD_GRAYSCALE); diff --git a/modules/cudaoptflow/src/cuda/tvl1flow.cu b/modules/cudaoptflow/src/cuda/tvl1flow.cu index a84b9a3520..ab09a542a2 100644 --- a/modules/cudaoptflow/src/cuda/tvl1flow.cu +++ b/modules/cudaoptflow/src/cuda/tvl1flow.cu @@ -116,15 +116,15 @@ namespace tvl1flow texture tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp); struct SrcTexRef : SrcTex { - __device__ __forceinline__ float I1(float x, float y) const override + __device__ __forceinline__ float I1(float x, float y) const CV_OVERRIDE { return tex2D(tex_I1, x, y); } - __device__ __forceinline__ float I1x(float x, float y) const override + __device__ __forceinline__ float I1x(float x, float y) const CV_OVERRIDE { return tex2D(tex_I1x, x, y); } - __device__ __forceinline__ float I1y(float x, float y) const override + __device__ __forceinline__ float I1y(float x, float y) const CV_OVERRIDE { return tex2D(tex_I1y, x, y); } @@ -135,15 +135,15 @@ namespace tvl1flow __host__ SrcTexObj(cudaTextureObject_t tex_obj_I1_, cudaTextureObject_t tex_obj_I1x_, cudaTextureObject_t tex_obj_I1y_) : tex_obj_I1(tex_obj_I1_), tex_obj_I1x(tex_obj_I1x_), tex_obj_I1y(tex_obj_I1y_) {} - __device__ __forceinline__ float I1(float x, float y) const override + __device__ __forceinline__ float I1(float x, float y) const CV_OVERRIDE { return tex2D(tex_obj_I1, x, y); } - __device__ __forceinline__ float I1x(float x, float y) const override + __device__ __forceinline__ float I1x(float x, float y) const CV_OVERRIDE { return tex2D(tex_obj_I1x, x, y); } - __device__ __forceinline__ float I1y(float x, float y) const override + __device__ __forceinline__ float I1y(float x, float y) const CV_OVERRIDE { return tex2D(tex_obj_I1y, x, y); } From 8c1af099895361a016df48e24913774644b0d70c Mon Sep 17 00:00:00 2001 From: Ian Maquignaz Date: Sun, 23 Aug 2020 18:32:58 -0400 Subject: [PATCH 04/12] Merge pull request #18083 from IanMaquignaz:fix_gen_pattern_centering * Fixed centering issue with make_cicle_pattern and make_acircle_pattern() * Fixed issue where asymmetric circles were not at 45 degree angles. Also fixed support for inch measurement by converting parsing to usage of floating points for page size * Fixed copy-paste error from experimental workspace --- doc/pattern_tools/gen_pattern.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/doc/pattern_tools/gen_pattern.py b/doc/pattern_tools/gen_pattern.py index 720ebc7f91..1f90615736 100755 --- a/doc/pattern_tools/gen_pattern.py +++ b/doc/pattern_tools/gen_pattern.py @@ -36,18 +36,27 @@ class PatternMaker: def make_circles_pattern(self): spacing = self.square_size r = spacing / self.radius_rate - for x in range(1, self.cols + 1): - for y in range(1, self.rows + 1): - dot = SVG("circle", cx=x * spacing, cy=y * spacing, r=r, fill="black", stroke="none") + pattern_width = ((self.cols - 1.0) * spacing) + (2.0 * r) + pattern_height = ((self.rows - 1.0) * spacing) + (2.0 * r) + x_spacing = (self.width - pattern_width) / 2.0 + y_spacing = (self.height - pattern_height) / 2.0 + for x in range(0, self.cols): + for y in range(0, self.rows): + dot = SVG("circle", cx=(x * spacing) + x_spacing + r, + cy=(y * spacing) + y_spacing + r, r=r, fill="black", stroke="none") self.g.append(dot) def make_acircles_pattern(self): spacing = self.square_size r = spacing / self.radius_rate - for i in range(0, self.rows): - for j in range(0, self.cols): - dot = SVG("circle", cx=((j * 2 + i % 2) * spacing) + spacing, cy=self.height - (i * spacing + spacing), - r=r, fill="black", stroke="none") + pattern_width = ((self.cols-1.0) * 2 * spacing) + spacing + (2.0 * r) + pattern_height = ((self.rows-1.0) * spacing) + (2.0 * r) + x_spacing = (self.width - pattern_width) / 2.0 + y_spacing = (self.height - pattern_height) / 2.0 + for x in range(0, self.cols): + for y in range(0, self.rows): + dot = SVG("circle", cx=(2 * x * spacing) + (y % 2)*spacing + x_spacing + r, + cy=(y * spacing) + y_spacing + r, r=r, fill="black", stroke="none") self.g.append(dot) def make_checkerboard_pattern(self): @@ -84,9 +93,9 @@ def main(): parser.add_argument("-R", "--radius_rate", help="circles_radius = square_size/radius_rate", default="5.0", action="store", dest="radius_rate", type=float) parser.add_argument("-w", "--page_width", help="page width in units", default="216", action="store", - dest="page_width", type=int) + dest="page_width", type=float) parser.add_argument("-h", "--page_height", help="page height in units", default="279", action="store", - dest="page_width", type=int) + dest="page_width", type=float) parser.add_argument("-a", "--page_size", help="page size, supersedes -h -w arguments", default="A4", action="store", dest="page_size", choices=["A0", "A1", "A2", "A3", "A4", "A5"]) args = parser.parse_args() From cd01ee9a5410f90daa137f28f98faf8977f7933b Mon Sep 17 00:00:00 2001 From: catree Date: Mon, 24 Aug 2020 05:21:42 +0200 Subject: [PATCH 05/12] Use camera intrinsic matrix everywhere. Add cameramatrix, distcoeffs and distcoeffsfisheye macros to avoid copy/paste errors. --- doc/mymath.js | 3 + doc/mymath.sty | 17 ++ modules/calib3d/include/opencv2/calib3d.hpp | 188 +++++++++----------- 3 files changed, 109 insertions(+), 99 deletions(-) diff --git a/doc/mymath.js b/doc/mymath.js index 0b65bd16d8..ffa2b11d3d 100644 --- a/doc/mymath.js +++ b/doc/mymath.js @@ -9,6 +9,9 @@ MathJax.Hub.Config( forkfour: ["\\left\\{ \\begin{array}{l l} #1 & \\mbox{#2}\\\\ #3 & \\mbox{#4}\\\\ #5 & \\mbox{#6}\\\\ #7 & \\mbox{#8}\\\\ \\end{array} \\right.", 8], vecthree: ["\\begin{bmatrix} #1\\\\ #2\\\\ #3 \\end{bmatrix}", 3], vecthreethree: ["\\begin{bmatrix} #1 & #2 & #3\\\\ #4 & #5 & #6\\\\ #7 & #8 & #9 \\end{bmatrix}", 9], + cameramatrix: ["#1 = \\begin{bmatrix} f_x & 0 & c_x\\\\ 0 & f_y & c_y\\\\ 0 & 0 & 1 \\end{bmatrix}", 1], + distcoeffs: ["(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \\tau_x, \\tau_y]]]]) \\text{ of 4, 5, 8, 12 or 14 elements}"], + distcoeffsfisheye: ["(k_1, k_2, k_3, k_4)"], hdotsfor: ["\\dots", 1], mathbbm: ["\\mathbb{#1}", 1], bordermatrix: ["\\matrix{#1}", 1] diff --git a/doc/mymath.sty b/doc/mymath.sty index 738c7e9afc..fa2dadea65 100644 --- a/doc/mymath.sty +++ b/doc/mymath.sty @@ -51,3 +51,20 @@ #7 & #8 & #9 \end{bmatrix} } + +\newcommand{\cameramatrix}[1]{ +#1 = +\begin{bmatrix} + f_x & 0 & c_x\\ + 0 & f_y & c_y\\ + 0 & 0 & 1 +\end{bmatrix} +} + +\newcommand{\distcoeffs}[]{ +(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]]) \text{ of 4, 5, 8, 12 or 14 elements} +} + +\newcommand{\distcoeffsfisheye}[]{ +(k_1, k_2, k_3, k_4) +} diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 42ff95872f..dde820e994 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -64,17 +64,17 @@ The distortion-free projective transformation given by a pinhole camera model i \f[s \; p = A \begin{bmatrix} R|t \end{bmatrix} P_w,\f] where \f$P_w\f$ is a 3D point expressed with respect to the world coordinate system, -\f$p\f$ is a 2D pixel in the image plane, \f$A\f$ is the intrinsic camera matrix, +\f$p\f$ is a 2D pixel in the image plane, \f$A\f$ is the camera intrinsic matrix, \f$R\f$ and \f$t\f$ are the rotation and translation that describe the change of coordinates from world to camera coordinate systems (or camera frame) and \f$s\f$ is the projective transformation's arbitrary scaling and not part of the camera model. -The intrinsic camera matrix \f$A\f$ (notation used as in @cite Zhang2000 and also generally notated +The camera intrinsic matrix \f$A\f$ (notation used as in @cite Zhang2000 and also generally notated as \f$K\f$) projects 3D points given in the camera coordinate system to 2D pixel coordinates, i.e. \f[p = A P_c.\f] -The camera matrix \f$A\f$ is composed of the focal lengths \f$f_x\f$ and \f$f_y\f$, which are +The camera intrinsic matrix \f$A\f$ is composed of the focal lengths \f$f_x\f$ and \f$f_y\f$, which are expressed in pixel units, and the principal point \f$(c_x, c_y)\f$, that is usually close to the image center: @@ -382,9 +382,9 @@ R & t \\ \end{bmatrix} P_{h_0}.\f] @note - - Many functions in this module take a camera matrix as an input parameter. Although all + - Many functions in this module take a camera intrinsic matrix as an input parameter. Although all functions assume the same structure of this parameter, they may name it differently. The - parameter's description, however, will be clear in that a camera matrix with the structure + parameter's description, however, will be clear in that a camera intrinsic matrix with the structure shown above is required. - A calibration sample for 3 cameras in a horizontal position can be found at opencv_source_code/samples/cpp/3calibration.cpp @@ -648,10 +648,10 @@ CV_EXPORTS_W Vec3d RQDecomp3x3( InputArray src, OutputArray mtxR, OutputArray mt OutputArray Qy = noArray(), OutputArray Qz = noArray()); -/** @brief Decomposes a projection matrix into a rotation matrix and a camera matrix. +/** @brief Decomposes a projection matrix into a rotation matrix and a camera intrinsic matrix. @param projMatrix 3x4 input projection matrix P. -@param cameraMatrix Output 3x3 camera matrix K. +@param cameraMatrix Output 3x3 camera intrinsic matrix \f$\cameramatrix{A}\f$. @param rotMatrix Output 3x3 external rotation matrix R. @param transVect Output 4x1 translation vector T. @param rotMatrixX Optional 3x3 rotation matrix around x-axis. @@ -736,10 +736,9 @@ CV_EXPORTS_W void composeRT( InputArray rvec1, InputArray tvec1, @param rvec The rotation vector (@ref Rodrigues) that, together with tvec, performs a change of basis from world to camera coordinate system, see @ref calibrateCamera for details. @param tvec The translation vector, see parameter description above. -@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$ . +@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ . @param distCoeffs Input vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of -4, 5, 8, 12 or 14 elements. If the vector is empty, the zero distortion coefficients are assumed. +\f$\distcoeffs\f$ . If the vector is empty, the zero distortion coefficients are assumed. @param imagePoints Output array of image points, 1xN/Nx1 2-channel, or vector\ . @param jacobian Optional output 2Nx(10+\) jacobian matrix of derivatives of image @@ -793,10 +792,9 @@ Number of input points must be 4. Object points must be defined in the following 1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, where N is the number of points. vector\ can be also passed here. -@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ . @param distCoeffs Input vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of -4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are assumed. @param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from the model coordinate system to the camera coordinate system. @@ -835,7 +833,7 @@ It requires 4 coplanar object points defined in the following order: - point 3: [-squareLength / 2, -squareLength / 2, 0] The function estimates the object pose given a set of object points, their corresponding image -projections, as well as the camera matrix and the distortion coefficients, see the figure below +projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below (more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward and the Z-axis forward). @@ -968,10 +966,9 @@ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints, 1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, where N is the number of points. vector\ can be also passed here. -@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ . +@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ . @param distCoeffs Input vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of -4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are assumed. @param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from the model coordinate system to the camera coordinate system. @@ -988,7 +985,7 @@ an inlier. @param flags Method for solving a PnP problem (see @ref solvePnP ). The function estimates an object pose given a set of object points, their corresponding image -projections, as well as the camera matrix and the distortion coefficients. This function finds such +projections, as well as the camera intrinsic matrix and the distortion coefficients. This function finds such a pose that minimizes reprojection error, that is, the sum of squared distances between the observed projections imagePoints and the projected (using @ref projectPoints ) objectPoints. The use of RANSAC makes the function resistant to outliers. @@ -1017,10 +1014,9 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint 1x3/3x1 3-channel. vector\ can be also passed here. @param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel. vector\ can be also passed here. -@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ . @param distCoeffs Input vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of -4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are assumed. @param rvecs Output rotation vectors (see @ref Rodrigues ) that, together with tvecs, brings points from the model coordinate system to the camera coordinate system. A P3P problem has up to 4 solutions. @@ -1032,7 +1028,7 @@ the model coordinate system to the camera coordinate system. A P3P problem has u "An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). The function estimates the object pose given 3 object points, their corresponding image -projections, as well as the camera matrix and the distortion coefficients. +projections, as well as the camera intrinsic matrix and the distortion coefficients. @note The solutions are sorted by reprojection errors (lowest to highest). @@ -1049,10 +1045,9 @@ to the camera coordinate frame) from a 3D-2D point correspondences and starting where N is the number of points. vector\ can also be passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, where N is the number of points. vector\ can also be passed here. -@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ . @param distCoeffs Input vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of -4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are assumed. @param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from the model coordinate system to the camera coordinate system. Input values are used as an initial solution. @@ -1061,7 +1056,7 @@ the model coordinate system to the camera coordinate system. Input values are us The function refines the object pose given at least 3 object points, their corresponding image projections, an initial solution for the rotation and translation vector, -as well as the camera matrix and the distortion coefficients. +as well as the camera intrinsic matrix and the distortion coefficients. The function minimizes the projection error with respect to the rotation and the translation vectors, according to a Levenberg-Marquardt iterative minimization @cite Madsen04 @cite Eade13 process. */ @@ -1077,10 +1072,9 @@ to the camera coordinate frame) from a 3D-2D point correspondences and starting where N is the number of points. vector\ can also be passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, where N is the number of points. vector\ can also be passed here. -@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ . @param distCoeffs Input vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of -4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are assumed. @param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from the model coordinate system to the camera coordinate system. Input values are used as an initial solution. @@ -1091,7 +1085,7 @@ gain in the Damped Gauss-Newton formulation. The function refines the object pose given at least 3 object points, their corresponding image projections, an initial solution for the rotation and translation vector, -as well as the camera matrix and the distortion coefficients. +as well as the camera intrinsic matrix and the distortion coefficients. The function minimizes the projection error with respect to the rotation and the translation vectors, using a virtual visual servoing (VVS) @cite Chaumette06 @cite Marchand16 scheme. */ @@ -1119,10 +1113,9 @@ Only 1 solution is returned. 1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, where N is the number of points. vector\ can be also passed here. -@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ . @param distCoeffs Input vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of -4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are assumed. @param rvecs Vector of output rotation vectors (see @ref Rodrigues ) that, together with tvecs, brings points from the model coordinate system to the camera coordinate system. @@ -1168,7 +1161,7 @@ and useExtrinsicGuess is set to true. and the 3D object points projected with the estimated pose. The function estimates the object pose given a set of object points, their corresponding image -projections, as well as the camera matrix and the distortion coefficients, see the figure below +projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below (more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward and the Z-axis forward). @@ -1297,7 +1290,7 @@ CV_EXPORTS_W int solvePnPGeneric( InputArray objectPoints, InputArray imagePoint InputArray rvec = noArray(), InputArray tvec = noArray(), OutputArray reprojectionError = noArray() ); -/** @brief Finds an initial camera matrix from 3D-2D point correspondences. +/** @brief Finds an initial camera intrinsic matrix from 3D-2D point correspondences. @param objectPoints Vector of vectors of the calibration pattern points in the calibration pattern coordinate space. In the old interface all the per-view vectors are concatenated. See @@ -1308,7 +1301,7 @@ old interface all the per-view vectors are concatenated. @param aspectRatio If it is zero or negative, both \f$f_x\f$ and \f$f_y\f$ are estimated independently. Otherwise, \f$f_x = f_y * \texttt{aspectRatio}\f$ . -The function estimates and returns an initial camera matrix for the camera calibration process. +The function estimates and returns an initial camera intrinsic matrix for the camera calibration process. Currently, the function only supports planar calibration patterns, which are patterns where each object point has z-coordinate =0. */ @@ -1390,10 +1383,9 @@ CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSiz @param image Input/output image. It must have 1 or 3 channels. The number of channels is not altered. @param cameraMatrix Input 3x3 floating-point matrix of camera intrinsic parameters. -\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ +\f$\cameramatrix{A}\f$ @param distCoeffs Input vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of -4, 5, 8, 12 or 14 elements. If the vector is empty, the zero distortion coefficients are assumed. +\f$\distcoeffs\f$. If the vector is empty, the zero distortion coefficients are assumed. @param rvec Rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from the model coordinate system to the camera coordinate system. @param tvec Translation vector. @@ -1503,14 +1495,13 @@ pattern points (e.g. std::vector>). imagePoints.size() an objectPoints.size(), and imagePoints[i].size() and objectPoints[i].size() for each i, must be equal, respectively. In the old interface all the vectors of object points from different views are concatenated together. -@param imageSize Size of the image used only to initialize the intrinsic camera matrix. -@param cameraMatrix Input/output 3x3 floating-point camera matrix -\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS +@param imageSize Size of the image used only to initialize the camera intrinsic matrix. +@param cameraMatrix Input/output 3x3 floating-point camera intrinsic matrix +\f$\cameramatrix{A}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS and/or CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be initialized before calling the function. @param distCoeffs Input/output vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of -4, 5, 8, 12 or 14 elements. +\f$\distcoeffs\f$. @param rvecs Output vector of rotation vectors (@ref Rodrigues ) estimated for each pattern view (e.g. std::vector>). That is, each i-th rotation vector together with the corresponding i-th translation vector (see the next output parameter description) brings the calibration pattern @@ -1628,9 +1619,9 @@ CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints, int flags = 0, TermCriteria criteria = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) ); -/** @brief Computes useful camera characteristics from the camera matrix. +/** @brief Computes useful camera characteristics from the camera intrinsic matrix. -@param cameraMatrix Input camera matrix that can be estimated by calibrateCamera or +@param cameraMatrix Input camera intrinsic matrix that can be estimated by calibrateCamera or stereoCalibrate . @param imageSize Input image size in pixels. @param apertureWidth Physical width in mm of the sensor. @@ -1666,15 +1657,15 @@ be equal for each i. observed by the first camera. The same structure as in @ref calibrateCamera. @param imagePoints2 Vector of vectors of the projections of the calibration pattern points, observed by the second camera. The same structure as in @ref calibrateCamera. -@param cameraMatrix1 Input/output camera matrix for the first camera, the same as in +@param cameraMatrix1 Input/output camera intrinsic matrix for the first camera, the same as in @ref calibrateCamera. Furthermore, for the stereo case, additional flags may be used, see below. @param distCoeffs1 Input/output vector of distortion coefficients, the same as in @ref calibrateCamera. -@param cameraMatrix2 Input/output second camera matrix for the second camera. See description for +@param cameraMatrix2 Input/output second camera intrinsic matrix for the second camera. See description for cameraMatrix1. @param distCoeffs2 Input/output lens distortion coefficients for the second camera. See description for distCoeffs1. -@param imageSize Size of the image used only to initialize the intrinsic camera matrices. +@param imageSize Size of the image used only to initialize the camera intrinsic matrices. @param R Output rotation matrix. Together with the translation vector T, this matrix brings points given in the first camera's coordinate system to points in the second camera's coordinate system. In more technical terms, the tuple of R and T performs a change of basis @@ -1795,9 +1786,9 @@ CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints, /** @brief Computes rectification transforms for each head of a calibrated stereo camera. -@param cameraMatrix1 First camera matrix. +@param cameraMatrix1 First camera intrinsic matrix. @param distCoeffs1 First camera distortion parameters. -@param cameraMatrix2 Second camera matrix. +@param cameraMatrix2 Second camera intrinsic matrix. @param distCoeffs2 Second camera distortion parameters. @param imageSize Size of the image used for stereo calibration. @param R Rotation matrix from the coordinate system of the first camera to the second camera, @@ -1953,12 +1944,11 @@ CV_EXPORTS_W float rectify3Collinear( InputArray cameraMatrix1, InputArray distC OutputArray Q, double alpha, Size newImgSize, CV_OUT Rect* roi1, CV_OUT Rect* roi2, int flags ); -/** @brief Returns the new camera matrix based on the free scaling parameter. +/** @brief Returns the new camera intrinsic matrix based on the free scaling parameter. -@param cameraMatrix Input camera matrix. +@param cameraMatrix Input camera intrinsic matrix. @param distCoeffs Input vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of -4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are assumed. @param imageSize Original image size. @param alpha Free scaling parameter between 0 (when all the pixels in the undistorted image are @@ -1967,17 +1957,17 @@ stereoRectify for details. @param newImgSize Image size after rectification. By default, it is set to imageSize . @param validPixROI Optional output rectangle that outlines all-good-pixels region in the undistorted image. See roi1, roi2 description in stereoRectify . -@param centerPrincipalPoint Optional flag that indicates whether in the new camera matrix the +@param centerPrincipalPoint Optional flag that indicates whether in the new camera intrinsic matrix the principal point should be at the image center or not. By default, the principal point is chosen to best fit a subset of the source image (determined by alpha) to the corrected image. -@return new_camera_matrix Output new camera matrix. +@return new_camera_matrix Output new camera intrinsic matrix. -The function computes and returns the optimal new camera matrix based on the free scaling parameter. +The function computes and returns the optimal new camera intrinsic matrix based on the free scaling parameter. By varying this parameter, you may retrieve only sensible pixels alpha=0 , keep all the original image pixels if there is valuable information in the corners alpha=1 , or get something in between. When alpha\>0 , the undistorted result is likely to have some black pixels corresponding to -"virtual" pixels outside of the captured distorted image. The original camera matrix, distortion -coefficients, the computed new camera matrix, and newImageSize should be passed to +"virtual" pixels outside of the captured distorted image. The original camera intrinsic matrix, distortion +coefficients, the computed new camera intrinsic matrix, and newImageSize should be passed to initUndistortRectifyMap to produce the maps for remap . */ CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray distCoeffs, @@ -2222,11 +2212,11 @@ CV_EXPORTS Mat findFundamentalMat( InputArray points1, InputArray points2, @param points1 Array of N (N \>= 5) 2D points from the first image. The point coordinates should be floating-point (single or double precision). @param points2 Array of the second image points of the same size and format as points1 . -@param cameraMatrix Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ . Note that this function assumes that points1 and points2 are feature points from cameras with the -same camera matrix. If this assumption does not hold for your use case, use +same camera intrinsic matrix. If this assumption does not hold for your use case, use `undistortPoints()` with `P = cv::NoArray()` for both cameras to transform image points -to normalized image coordinates, which are valid for the identity camera matrix. When +to normalized image coordinates, which are valid for the identity camera intrinsic matrix. When passing these coordinates, pass the identity matrix for this parameter. @param method Method for computing an essential matrix. - **RANSAC** for the RANSAC algorithm. @@ -2273,10 +2263,10 @@ confidence (probability) that the estimated matrix is correct. @param mask Output array of N elements, every element of which is set to 0 for outliers and to 1 for the other points. The array is computed only in the RANSAC and LMedS methods. -This function differs from the one above that it computes camera matrix from focal length and +This function differs from the one above that it computes camera intrinsic matrix from focal length and principal point: -\f[K = +\f[A = \begin{bmatrix} f & 0 & x_{pp} \\ 0 & f & y_{pp} \\ @@ -2316,9 +2306,9 @@ inliers that pass the check. @param points1 Array of N 2D points from the first image. The point coordinates should be floating-point (single or double precision). @param points2 Array of the second image points of the same size and format as points1 . -@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ . Note that this function assumes that points1 and points2 are feature points from cameras with the -same camera matrix. +same camera intrinsic matrix. @param R Output rotation matrix. Together with the translation vector, this matrix makes up a tuple that performs a change of basis from the first camera's coordinate system to the second camera's coordinate system. Note that, in general, t can not be used for this tuple, see the parameter @@ -2381,7 +2371,7 @@ are feature points from cameras with same focal length and principal point. inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to recover pose. In the output mask only inliers which pass the cheirality check. -This function differs from the one above that it computes camera matrix from focal length and +This function differs from the one above that it computes camera intrinsic matrix from focal length and principal point: \f[A = @@ -2401,9 +2391,9 @@ CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray point @param points1 Array of N 2D points from the first image. The point coordinates should be floating-point (single or double precision). @param points2 Array of the second image points of the same size and format as points1. -@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ . Note that this function assumes that points1 and points2 are feature points from cameras with the -same camera matrix. +same camera intrinsic matrix. @param R Output rotation matrix. Together with the translation vector, this matrix makes up a tuple that performs a change of basis from the first camera's coordinate system to the second camera's coordinate system. Note that, in general, t can not be used for this tuple, see the parameter @@ -2762,7 +2752,7 @@ Check @ref tutorial_homography "the corresponding tutorial" for more details. /** @brief Decompose a homography matrix to rotation(s), translation(s) and plane normal(s). @param H The input homography matrix between two images. -@param K The input intrinsic camera calibration matrix. +@param K The input camera intrinsic matrix. @param rotations Array of rotation matrices. @param translations Array of translation matrices. @param normals Array of plane normal matrices. @@ -3020,8 +3010,8 @@ namespace fisheye @param imagePoints Output array of image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, or vector\. @param affine - @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. - @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param K Camera intrinsic matrix \f$cameramatrix{K}\f$. + @param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$. @param alpha The skew coefficient. @param jacobian Optional output 2Nx15 jacobian matrix of derivatives of image points with respect to components of the focal lengths, coordinates of the principal point, distortion coefficients, @@ -3044,12 +3034,12 @@ namespace fisheye @param undistorted Array of object points, 1xN/Nx1 2-channel (or vector\ ), where N is the number of points in the view. - @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. - @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param K Camera intrinsic matrix \f$cameramatrix{K}\f$. + @param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$. @param alpha The skew coefficient. @param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\ . - Note that the function assumes the camera matrix of the undistorted points to be identity. + Note that the function assumes the camera intrinsic matrix of the undistorted points to be identity. This means if you want to transform back points undistorted with undistortPoints() you have to multiply them with \f$P^{-1}\f$. */ @@ -3059,11 +3049,11 @@ namespace fisheye @param distorted Array of object points, 1xN/Nx1 2-channel (or vector\ ), where N is the number of points in the view. - @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. - @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param K Camera intrinsic matrix \f$cameramatrix{K}\f$. + @param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$. @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3 1-channel or 1x1 3-channel - @param P New camera matrix (3x3) or new projection matrix (3x4) + @param P New camera intrinsic matrix (3x3) or new projection matrix (3x4) @param undistorted Output array of image points, 1xN/Nx1 2-channel, or vector\ . */ CV_EXPORTS_W void undistortPoints(InputArray distorted, OutputArray undistorted, @@ -3072,11 +3062,11 @@ namespace fisheye /** @brief Computes undistortion and rectification maps for image transform by cv::remap(). If D is empty zero distortion is used, if R or P is empty identity matrixes are used. - @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. - @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param K Camera intrinsic matrix \f$cameramatrix{K}\f$. + @param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$. @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3 1-channel or 1x1 3-channel - @param P New camera matrix (3x3) or new projection matrix (3x4) + @param P New camera intrinsic matrix (3x3) or new projection matrix (3x4) @param size Undistorted image size. @param m1type Type of the first output map that can be CV_32FC1 or CV_16SC2 . See convertMaps() for details. @@ -3090,9 +3080,9 @@ namespace fisheye @param distorted image with fisheye lens distortion. @param undistorted Output image with compensated fisheye lens distortion. - @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. - @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. - @param Knew Camera matrix of the distorted image. By default, it is the identity matrix but you + @param K Camera intrinsic matrix \f$cameramatrix{K}\f$. + @param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$. + @param Knew Camera intrinsic matrix of the distorted image. By default, it is the identity matrix but you may additionally scale and shift the result by using a different matrix. @param new_size the new size @@ -3117,14 +3107,14 @@ namespace fisheye CV_EXPORTS_W void undistortImage(InputArray distorted, OutputArray undistorted, InputArray K, InputArray D, InputArray Knew = cv::noArray(), const Size& new_size = Size()); - /** @brief Estimates new camera matrix for undistortion or rectification. + /** @brief Estimates new camera intrinsic matrix for undistortion or rectification. - @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param K Camera intrinsic matrix \f$cameramatrix{K}\f$. @param image_size Size of the image - @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$. @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3 1-channel or 1x1 3-channel - @param P New camera matrix (3x3) or new projection matrix (3x4) + @param P New camera intrinsic matrix (3x3) or new projection matrix (3x4) @param balance Sets the new focal length in range between the min focal length and the max focal length. Balance is in range of [0, 1]. @param new_size the new size @@ -3140,12 +3130,12 @@ namespace fisheye @param imagePoints vector of vectors of the projections of calibration pattern points. imagePoints.size() and objectPoints.size() and imagePoints[i].size() must be equal to objectPoints[i].size() for each i. - @param image_size Size of the image used only to initialize the intrinsic camera matrix. - @param K Output 3x3 floating-point camera matrix - \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If + @param image_size Size of the image used only to initialize the camera intrinsic matrix. + @param K Output 3x3 floating-point camera intrinsic matrix + \f$\cameramatrix{A}\f$ . If fisheye::CALIB_USE_INTRINSIC_GUESS/ is specified, some or all of fx, fy, cx, cy must be initialized before calling the function. - @param D Output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param D Output vector of distortion coefficients \f$\distcoeffsfisheye\f$. @param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view. That is, each k-th rotation vector together with the corresponding k-th translation vector (see the next output parameter description) brings the calibration pattern from the model coordinate @@ -3172,9 +3162,9 @@ optimization. It stays at the center or at a different location specified when C /** @brief Stereo rectification for fisheye camera model - @param K1 First camera matrix. + @param K1 First camera intrinsic matrix. @param D1 First camera distortion parameters. - @param K2 Second camera matrix. + @param K2 Second camera intrinsic matrix. @param D2 Second camera distortion parameters. @param imageSize Size of the image used for stereo calibration. @param R Rotation matrix between the coordinate systems of the first and the second @@ -3211,15 +3201,15 @@ optimization. It stays at the center or at a different location specified when C observed by the first camera. @param imagePoints2 Vector of vectors of the projections of the calibration pattern points, observed by the second camera. - @param K1 Input/output first camera matrix: + @param K1 Input/output first camera intrinsic matrix: \f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If any of fisheye::CALIB_USE_INTRINSIC_GUESS , fisheye::CALIB_FIX_INTRINSIC are specified, some or all of the matrix components must be initialized. - @param D1 Input/output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$ of 4 elements. - @param K2 Input/output second camera matrix. The parameter is similar to K1 . + @param D1 Input/output vector of distortion coefficients \f$\distcoeffsfisheye\f$ of 4 elements. + @param K2 Input/output second camera intrinsic matrix. The parameter is similar to K1 . @param D2 Input/output lens distortion coefficients for the second camera. The parameter is similar to D1 . - @param imageSize Size of the image used only to initialize intrinsic camera matrix. + @param imageSize Size of the image used only to initialize camera intrinsic matrix. @param R Output rotation matrix between the 1st and the 2nd camera coordinate systems. @param T Output translation vector between the coordinate systems of the cameras. @param flags Different flags that may be zero or a combination of the following values: From d392b11dfb3a16f919af75ed419a64905eb7de55 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 24 Aug 2020 10:46:53 +0300 Subject: [PATCH 06/12] Supported ONNX Pow op --- modules/dnn/src/onnx/onnx_importer.cpp | 13 +++++++++++++ modules/dnn/test/test_onnx_importer.cpp | 5 +++++ 2 files changed, 18 insertions(+) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 7d37b065ab..ccef2f6936 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -712,6 +712,19 @@ void ONNXImporter::populateNet(Net dstNet) layerParams.set("bias_term", true); } } + else if (layer_type == "Pow") + { + if (layer_id.find(node_proto.input(1)) != layer_id.end()) + CV_Error(Error::StsNotImplemented, "Unsupported Pow op with variable power"); + + Mat blob = getBlob(node_proto, constBlobs, 1); + if (blob.total() != 1) + CV_Error(Error::StsNotImplemented, "Pow op supports only scalar power"); + + blob.convertTo(blob, CV_32F); + layerParams.type = "Power"; + layerParams.set("power", blob.at(0)); + } else if (layer_type == "Max") { layerParams.type = "Eltwise"; diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 25efcbb3ca..c68215ee30 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -283,6 +283,11 @@ TEST_P(Test_ONNX_layers, Cast) testONNXModels("cast"); } +TEST_P(Test_ONNX_layers, Power) +{ + testONNXModels("pow2", npy, 0, 0, false, false); +} + TEST_P(Test_ONNX_layers, Concatenation) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) From e503ac508eb7cb47757609e4a78900ea9563204f Mon Sep 17 00:00:00 2001 From: Vadim Levin Date: Mon, 24 Aug 2020 23:07:25 +0300 Subject: [PATCH 07/12] fix: libavcodec version check for AV_CODEC_FLAG_GLOBAL_HEADER --- modules/videoio/src/cap_ffmpeg_impl.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index ba21428568..59120d90e6 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -1949,7 +1949,9 @@ static AVStream *icv_add_video_stream_FFMPEG(AVFormatContext *oc, // some formats want stream headers to be separate if(oc->oformat->flags & AVFMT_GLOBALHEADER) { -#if LIBAVCODEC_BUILD > CALC_FFMPEG_VERSION(56, 35, 0) + // flags were renamed: https://github.com/libav/libav/commit/7c6eb0a1b7bf1aac7f033a7ec6d8cacc3b5c2615 +#if LIBAVCODEC_BUILD >= (LIBAVCODEC_VERSION_MICRO >= 100 \ + ? CALC_FFMPEG_VERSION(56, 60, 100) : CALC_FFMPEG_VERSION(56, 35, 0)) c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; #else c->flags |= CODEC_FLAG_GLOBAL_HEADER; From f7e524cbe65b2e1aed9cd22905fcb4d372760f04 Mon Sep 17 00:00:00 2001 From: Vadim Levin Date: Mon, 24 Aug 2020 23:12:49 +0300 Subject: [PATCH 08/12] fix: libavcodec version check for AVDISCARD_NONINTRA - AVDISCARD_NONINTRA flag is supported only for FFMPEG libraries pack --- modules/videoio/src/cap_ffmpeg_impl.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 59120d90e6..ac6f9448e0 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -993,7 +993,10 @@ bool CvCapture_FFMPEG::open( const char* _filename ) enc->skip_frame = AVDISCARD_DEFAULT; else if (strcmp(avdiscard_entry->value, "none") == 0) enc->skip_frame = AVDISCARD_NONE; -#if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(54, 59, 100) + // NONINTRA flag was introduced with version bump at revision: + // https://github.com/FFmpeg/FFmpeg/commit/b152152df3b778d0a86dcda5d4f5d065b4175a7b + // This key is supported only for FFMPEG version +#if LIBAVCODEC_VERSION_MICRO >= 100 && LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(55, 67, 100) else if (strcmp(avdiscard_entry->value, "nonintra") == 0) enc->skip_frame = AVDISCARD_NONINTRA; #endif From dda1bf1887cf081bbcd205a142260f8b2e788e76 Mon Sep 17 00:00:00 2001 From: catree Date: Sat, 22 Aug 2020 23:27:39 +0200 Subject: [PATCH 09/12] Add broken implementation note for DLS and UPnP. Add CV_LOG_DEBUG. --- modules/calib3d/include/opencv2/calib3d.hpp | 20 +++++++++++++------- modules/calib3d/src/solvepnp.cpp | 10 ++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 42ff95872f..9f4fbc8028 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -450,8 +450,10 @@ enum SolvePnPMethod { SOLVEPNP_ITERATIVE = 0, SOLVEPNP_EPNP = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp SOLVEPNP_P3P = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete - SOLVEPNP_DLS = 3, //!< A Direct Least-Squares (DLS) Method for PnP @cite hesch2011direct - SOLVEPNP_UPNP = 4, //!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive + SOLVEPNP_DLS = 3, //!< **Broken implementation. Using this flag will fallback to EPnP.** \n + //!< A Direct Least-Squares (DLS) Method for PnP @cite hesch2011direct + SOLVEPNP_UPNP = 4, //!< **Broken implementation. Using this flag will fallback to EPnP.** \n + //!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive SOLVEPNP_AP3P = 5, //!< An Efficient Algebraic Solution to the Perspective-Three-Point Problem @cite Ke17 SOLVEPNP_IPPE = 6, //!< Infinitesimal Plane-Based Pose Estimation @cite Collins14 \n //!< Object points must be coplanar. @@ -808,7 +810,7 @@ vectors, respectively, and further optimizes them. - **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In this case the function finds such a pose that minimizes reprojection error, that is the sum of squared distances between the observed projections imagePoints and the projected (using -projectPoints ) objectPoints . +@ref projectPoints ) objectPoints . - **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang "Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). In this case the function requires exactly four object and image points. @@ -817,9 +819,11 @@ In this case the function requires exactly four object and image points. In this case the function requires exactly four object and image points. - **SOLVEPNP_EPNP** Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). -- **SOLVEPNP_DLS** Method is based on the paper of J. Hesch and S. Roumeliotis. +- **SOLVEPNP_DLS** **Broken implementation. Using this flag will fallback to EPnP.** \n +Method is based on the paper of J. Hesch and S. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). -- **SOLVEPNP_UPNP** Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto, +- **SOLVEPNP_UPNP** **Broken implementation. Using this flag will fallback to EPnP.** \n +Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto, F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ assuming that both have the same value. Then the cameraMatrix is updated with the estimated @@ -1143,9 +1147,11 @@ In this case the function requires exactly four object and image points. In this case the function requires exactly four object and image points. - **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). -- **SOLVEPNP_DLS** Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis. +- **SOLVEPNP_DLS** **Broken implementation. Using this flag will fallback to EPnP.** \n +Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). -- **SOLVEPNP_UPNP** Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto, +- **SOLVEPNP_UPNP** **Broken implementation. Using this flag will fallback to EPnP.** \n +Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto, F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ assuming that both have the same value. Then the cameraMatrix is updated with the estimated diff --git a/modules/calib3d/src/solvepnp.cpp b/modules/calib3d/src/solvepnp.cpp index 0fcd670abc..0e3a1e8f22 100644 --- a/modules/calib3d/src/solvepnp.cpp +++ b/modules/calib3d/src/solvepnp.cpp @@ -48,6 +48,7 @@ #include "ap3p.h" #include "ippe.hpp" #include "opencv2/calib3d/calib3d_c.h" +#include namespace cv { @@ -780,6 +781,15 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints, vector vec_rvecs, vec_tvecs; if (flags == SOLVEPNP_EPNP || flags == SOLVEPNP_DLS || flags == SOLVEPNP_UPNP) { + if (flags == SOLVEPNP_DLS) + { + CV_LOG_DEBUG(NULL, "Broken implementation for SOLVEPNP_DLS. Fallback to EPnP."); + } + else if (flags == SOLVEPNP_UPNP) + { + CV_LOG_DEBUG(NULL, "Broken implementation for SOLVEPNP_UPNP. Fallback to EPnP."); + } + Mat undistortedPoints; undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs); epnp PnP(cameraMatrix, opoints, undistortedPoints); From 379b83e946eab0664d9153d809d35bf6bdd091d4 Mon Sep 17 00:00:00 2001 From: catree Date: Sat, 22 Aug 2020 01:44:40 +0200 Subject: [PATCH 10/12] Fix cubic root computation to be able to handle negative values. Improve doc. Add regression test. --- modules/calib3d/include/opencv2/calib3d.hpp | 16 +- modules/calib3d/src/calibration_handeye.cpp | 12 +- .../test/test_calibration_hand_eye.cpp | 141 ++++++++++++++---- 3 files changed, 128 insertions(+), 41 deletions(-) diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 42ff95872f..f86c95b5dd 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -1989,23 +1989,23 @@ CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray @param[in] R_gripper2base Rotation part extracted from the homogeneous matrix that transforms a point expressed in the gripper frame to the robot base frame (\f$_{}^{b}\textrm{T}_g\f$). -This is a vector (`vector`) that contains the rotation matrices for all the transformations -from gripper frame to robot base frame. +This is a vector (`vector`) that contains the rotation, `(3x3)` rotation matrices or `(3x1)` rotation vectors, +for all the transformations from gripper frame to robot base frame. @param[in] t_gripper2base Translation part extracted from the homogeneous matrix that transforms a point expressed in the gripper frame to the robot base frame (\f$_{}^{b}\textrm{T}_g\f$). -This is a vector (`vector`) that contains the translation vectors for all the transformations +This is a vector (`vector`) that contains the `(3x1)` translation vectors for all the transformations from gripper frame to robot base frame. @param[in] R_target2cam Rotation part extracted from the homogeneous matrix that transforms a point expressed in the target frame to the camera frame (\f$_{}^{c}\textrm{T}_t\f$). -This is a vector (`vector`) that contains the rotation matrices for all the transformations -from calibration target frame to camera frame. +This is a vector (`vector`) that contains the rotation, `(3x3)` rotation matrices or `(3x1)` rotation vectors, +for all the transformations from calibration target frame to camera frame. @param[in] t_target2cam Rotation part extracted from the homogeneous matrix that transforms a point expressed in the target frame to the camera frame (\f$_{}^{c}\textrm{T}_t\f$). -This is a vector (`vector`) that contains the translation vectors for all the transformations +This is a vector (`vector`) that contains the `(3x1)` translation vectors for all the transformations from calibration target frame to camera frame. -@param[out] R_cam2gripper Estimated rotation part extracted from the homogeneous matrix that transforms a point +@param[out] R_cam2gripper Estimated `(3x3)` rotation part extracted from the homogeneous matrix that transforms a point expressed in the camera frame to the gripper frame (\f$_{}^{g}\textrm{T}_c\f$). -@param[out] t_cam2gripper Estimated translation part extracted from the homogeneous matrix that transforms a point +@param[out] t_cam2gripper Estimated `(3x1)` translation part extracted from the homogeneous matrix that transforms a point expressed in the camera frame to the gripper frame (\f$_{}^{g}\textrm{T}_c\f$). @param[in] method One of the implemented Hand-Eye calibration method, see cv::HandEyeCalibrationMethod diff --git a/modules/calib3d/src/calibration_handeye.cpp b/modules/calib3d/src/calibration_handeye.cpp index 37d4e89d78..79c8b58a23 100644 --- a/modules/calib3d/src/calibration_handeye.cpp +++ b/modules/calib3d/src/calibration_handeye.cpp @@ -29,6 +29,7 @@ static Mat homogeneousInverse(const Mat& T) // q = sin(theta/2) * v // theta - rotation angle // v - unit rotation axis, |v| = 1 +// Reference: http://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToQuaternion/ static Mat rot2quatMinimal(const Mat& R) { CV_Assert(R.type() == CV_64FC1 && R.rows >= 3 && R.cols >= 3); @@ -44,7 +45,7 @@ static Mat rot2quatMinimal(const Mat& R) qx = (m21 - m12) / S; qy = (m02 - m20) / S; qz = (m10 - m01) / S; - } else if ((m00 > m11)&(m00 > m22)) { + } else if (m00 > m11 && m00 > m22) { double S = sqrt(1.0 + m00 - m11 - m22) * 2; // S=4*qx qx = 0.25 * S; qy = (m01 + m10) / S; @@ -98,6 +99,7 @@ static Mat quatMinimal2rot(const Mat& q) // // q - 4x1 unit quaternion // R - 3x3 rotation matrix +// Reference: http://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToQuaternion/ static Mat rot2quat(const Mat& R) { CV_Assert(R.type() == CV_64FC1 && R.rows >= 3 && R.cols >= 3); @@ -114,7 +116,7 @@ static Mat rot2quat(const Mat& R) qx = (m21 - m12) / S; qy = (m02 - m20) / S; qz = (m10 - m01) / S; - } else if ((m00 > m11)&(m00 > m22)) { + } else if (m00 > m11 && m00 > m22) { double S = sqrt(1.0 + m00 - m11 - m22) * 2; // S=4*qx qw = (m21 - m12) / S; qx = 0.25 * S; @@ -572,7 +574,11 @@ static void calibrateHandEyeAndreff(const std::vector& Hg, const std::vecto R = R.reshape(1, 2, newSize); //Eq 15 double det = determinant(R); - R = pow(sign_double(det) / abs(det), 1.0/3.0) * R; + if (std::fabs(det) < FLT_EPSILON) + { + CV_Error(Error::StsNoConv, "calibrateHandEye() with CALIB_HAND_EYE_ANDREFF method: determinant(R) is null"); + } + R = cubeRoot(static_cast(sign_double(det) / abs(det))) * R; Mat w, u, vt; SVDecomp(R, w, u, vt); diff --git a/modules/calib3d/test/test_calibration_hand_eye.cpp b/modules/calib3d/test/test_calibration_hand_eye.cpp index 848dcf07c2..919c6ad28e 100644 --- a/modules/calib3d/test/test_calibration_hand_eye.cpp +++ b/modules/calib3d/test/test_calibration_hand_eye.cpp @@ -7,6 +7,38 @@ namespace opencv_test { namespace { +static std::string getMethodName(HandEyeCalibrationMethod method) +{ + std::string method_name = ""; + switch (method) + { + case CALIB_HAND_EYE_TSAI: + method_name = "Tsai"; + break; + + case CALIB_HAND_EYE_PARK: + method_name = "Park"; + break; + + case CALIB_HAND_EYE_HORAUD: + method_name = "Horaud"; + break; + + case CALIB_HAND_EYE_ANDREFF: + method_name = "Andreff"; + break; + + case CALIB_HAND_EYE_DANIILIDIS: + method_name = "Daniilidis"; + break; + + default: + break; + } + + return method_name; +} + class CV_CalibrateHandEyeTest : public cvtest::BaseTest { public: @@ -48,7 +80,6 @@ protected: std::vector &R_target2cam, std::vector &t_target2cam, bool noise, Mat& R_cam2gripper, Mat& t_cam2gripper); Mat homogeneousInverse(const Mat& T); - std::string getMethodName(HandEyeCalibrationMethod method); double sign_double(double val); double eps_rvec[5]; @@ -340,45 +371,95 @@ Mat CV_CalibrateHandEyeTest::homogeneousInverse(const Mat& T) return Tinv; } -std::string CV_CalibrateHandEyeTest::getMethodName(HandEyeCalibrationMethod method) +double CV_CalibrateHandEyeTest::sign_double(double val) { - std::string method_name = ""; - switch (method) - { - case CALIB_HAND_EYE_TSAI: - method_name = "Tsai"; - break; + return (0 < val) - (val < 0); +} - case CALIB_HAND_EYE_PARK: - method_name = "Park"; - break; +/////////////////////////////////////////////////////////////////////////////////////////////////// - case CALIB_HAND_EYE_HORAUD: - method_name = "Horaud"; - break; +TEST(Calib3d_CalibrateHandEye, regression) { CV_CalibrateHandEyeTest test; test.safe_run(); } - case CALIB_HAND_EYE_ANDREFF: - method_name = "Andreff"; - break; +TEST(Calib3d_CalibrateHandEye, regression_17986) +{ + const std::string camera_poses_filename = findDataFile("cv/hand_eye_calibration/cali.txt"); + const std::string end_effector_poses = findDataFile("cv/hand_eye_calibration/robot_cali.txt"); - case CALIB_HAND_EYE_DANIILIDIS: - method_name = "Daniilidis"; - break; + std::vector R_target2cam; + std::vector t_target2cam; + // Parse camera poses + { + std::ifstream file(camera_poses_filename.c_str()); + ASSERT_TRUE(file.is_open()); + + int ndata = 0; + file >> ndata; + R_target2cam.reserve(ndata); + t_target2cam.reserve(ndata); + + std::string image_name; + Matx33d cameraMatrix; + Matx33d R; + Matx31d t; + Matx16d distCoeffs; + Matx13d distCoeffs2; + while (file >> image_name >> + cameraMatrix(0,0) >> cameraMatrix(0,1) >> cameraMatrix(0,2) >> + cameraMatrix(1,0) >> cameraMatrix(1,1) >> cameraMatrix(1,2) >> + cameraMatrix(2,0) >> cameraMatrix(2,1) >> cameraMatrix(2,2) >> + R(0,0) >> R(0,1) >> R(0,2) >> + R(1,0) >> R(1,1) >> R(1,2) >> + R(2,0) >> R(2,1) >> R(2,2) >> + t(0) >> t(1) >> t(2) >> + distCoeffs(0) >> distCoeffs(1) >> distCoeffs(2) >> distCoeffs(3) >> distCoeffs(4) >> + distCoeffs2(0) >> distCoeffs2(1) >> distCoeffs2(2)) { + R_target2cam.push_back(Mat(R)); + t_target2cam.push_back(Mat(t)); + } + } - default: - break; + std::vector R_gripper2base; + std::vector t_gripper2base; + // Parse end-effector poses + { + std::ifstream file(end_effector_poses.c_str()); + ASSERT_TRUE(file.is_open()); + + int ndata = 0; + file >> ndata; + R_gripper2base.reserve(ndata); + t_gripper2base.reserve(ndata); + + Matx33d R; + Matx31d t; + Matx14d last_row; + while (file >> + R(0,0) >> R(0,1) >> R(0,2) >> t(0) >> + R(1,0) >> R(1,1) >> R(1,2) >> t(1) >> + R(2,0) >> R(2,1) >> R(2,2) >> t(2) >> + last_row(0) >> last_row(1) >> last_row(2) >> last_row(3)) { + R_gripper2base.push_back(Mat(R)); + t_gripper2base.push_back(Mat(t)); + } } - return method_name; -} + std::vector methods; + methods.push_back(CALIB_HAND_EYE_TSAI); + methods.push_back(CALIB_HAND_EYE_PARK); + methods.push_back(CALIB_HAND_EYE_HORAUD); + methods.push_back(CALIB_HAND_EYE_ANDREFF); + methods.push_back(CALIB_HAND_EYE_DANIILIDIS); -double CV_CalibrateHandEyeTest::sign_double(double val) -{ - return (0 < val) - (val < 0); -} + for (size_t idx = 0; idx < methods.size(); idx++) { + SCOPED_TRACE(cv::format("method=%s", getMethodName(methods[idx]).c_str())); -/////////////////////////////////////////////////////////////////////////////////////////////////// + Matx33d R_cam2gripper_est; + Matx31d t_cam2gripper_est; + calibrateHandEye(R_gripper2base, t_gripper2base, R_target2cam, t_target2cam, R_cam2gripper_est, t_cam2gripper_est, methods[idx]); -TEST(Calib3d_CalibrateHandEye, regression) { CV_CalibrateHandEyeTest test; test.safe_run(); } + EXPECT_TRUE(checkRange(R_cam2gripper_est)); + EXPECT_TRUE(checkRange(t_cam2gripper_est)); + } +} }} // namespace From a160e4fb6b0f3715f0abd83eab2756581e9879ab Mon Sep 17 00:00:00 2001 From: jinyup100 <41290732+jinyup100@users.noreply.github.com> Date: Wed, 26 Aug 2020 05:01:16 +0900 Subject: [PATCH 11/12] Merge pull request #17647 from jinyup100:add-siamrpnpp [GSoC] Add siamrpnpp.py * Updated base branch with siamrpnpp.py * Addition of Parsers * Merged to using few ONNX files, Changes to Parsers, Links to Repo * Deleted whitespace * Adjusting flake8 error * Fixes according to review * Fix according to review * Addition of OpenVINO backends and Computation target devices * Fix on backend after review * Fixes after review * Remove extra white space * Removed Repeated Varaibles --- samples/dnn/siamrpnpp.py | 397 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 397 insertions(+) create mode 100644 samples/dnn/siamrpnpp.py diff --git a/samples/dnn/siamrpnpp.py b/samples/dnn/siamrpnpp.py new file mode 100644 index 0000000000..bb126b71e5 --- /dev/null +++ b/samples/dnn/siamrpnpp.py @@ -0,0 +1,397 @@ +import argparse +import cv2 as cv +import numpy as np +import os + +""" +Link to original paper : https://arxiv.org/abs/1812.11703 +Link to original repo : https://github.com/STVIR/pysot + +You can download the pre-trained weights of the Tracker Model from https://drive.google.com/file/d/11bwgPFVkps9AH2NOD1zBDdpF_tQghAB-/view?usp=sharing +You can download the target net (target branch of SiamRPN++) from https://drive.google.com/file/d/1dw_Ne3UMcCnFsaD6xkZepwE4GEpqq7U_/view?usp=sharing +You can download the search net (search branch of SiamRPN++) from https://drive.google.com/file/d/1Lt4oE43ZSucJvze3Y-Z87CVDreO-Afwl/view?usp=sharing +You can download the head model (RPN Head) from https://drive.google.com/file/d/1zT1yu12mtj3JQEkkfKFJWiZ71fJ-dQTi/view?usp=sharing +""" + +class ModelBuilder(): + """ This class generates the SiamRPN++ Tracker Model by using Imported ONNX Nets + """ + def __init__(self, target_net, search_net, rpn_head): + super(ModelBuilder, self).__init__() + # Build the target branch + self.target_net = target_net + # Build the search branch + self.search_net = search_net + # Build RPN_Head + self.rpn_head = rpn_head + + def template(self, z): + """ Takes the template of size (1, 1, 127, 127) as an input to generate kernel + """ + self.target_net.setInput(z) + outNames = self.target_net.getUnconnectedOutLayersNames() + self.zfs_1, self.zfs_2, self.zfs_3 = self.target_net.forward(outNames) + + def track(self, x): + """ Takes the search of size (1, 1, 255, 255) as an input to generate classification score and bounding box regression + """ + self.search_net.setInput(x) + outNames = self.search_net.getUnconnectedOutLayersNames() + xfs_1, xfs_2, xfs_3 = self.search_net.forward(outNames) + self.rpn_head.setInput(np.stack([self.zfs_1, self.zfs_2, self.zfs_3]), 'input_1') + self.rpn_head.setInput(np.stack([xfs_1, xfs_2, xfs_3]), 'input_2') + outNames = self.rpn_head.getUnconnectedOutLayersNames() + cls, loc = self.rpn_head.forward(outNames) + return {'cls': cls, 'loc': loc} + +class Anchors: + """ This class generate anchors. + """ + def __init__(self, stride, ratios, scales, image_center=0, size=0): + self.stride = stride + self.ratios = ratios + self.scales = scales + self.image_center = image_center + self.size = size + self.anchor_num = len(self.scales) * len(self.ratios) + self.anchors = self.generate_anchors() + + def generate_anchors(self): + """ + generate anchors based on predefined configuration + """ + anchors = np.zeros((self.anchor_num, 4), dtype=np.float32) + size = self.stride**2 + count = 0 + for r in self.ratios: + ws = int(np.sqrt(size * 1. / r)) + hs = int(ws * r) + + for s in self.scales: + w = ws * s + h = hs * s + anchors[count][:] = [-w * 0.5, -h * 0.5, w * 0.5, h * 0.5][:] + count += 1 + return anchors + +class SiamRPNTracker: + def __init__(self, model): + super(SiamRPNTracker, self).__init__() + self.anchor_stride = 8 + self.anchor_ratios = [0.33, 0.5, 1, 2, 3] + self.anchor_scales = [8] + self.track_base_size = 8 + self.track_context_amount = 0.5 + self.track_exemplar_size = 127 + self.track_instance_size = 255 + self.track_lr = 0.4 + self.track_penalty_k = 0.04 + self.track_window_influence = 0.44 + self.score_size = (self.track_instance_size - self.track_exemplar_size) // \ + self.anchor_stride + 1 + self.track_base_size + self.anchor_num = len(self.anchor_ratios) * len(self.anchor_scales) + hanning = np.hanning(self.score_size) + window = np.outer(hanning, hanning) + self.window = np.tile(window.flatten(), self.anchor_num) + self.anchors = self.generate_anchor(self.score_size) + self.model = model + + def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans): + """ + Args: + im: bgr based input image frame + pos: position of the center of the frame + model_sz: exemplar / target image size + s_z: original / search image size + avg_chans: channel average + Return: + im_patch: sub_windows for the given image input + """ + if isinstance(pos, float): + pos = [pos, pos] + sz = original_sz + im_h, im_w, im_d = im.shape + c = (original_sz + 1) / 2 + cx, cy = pos + context_xmin = np.floor(cx - c + 0.5) + context_xmax = context_xmin + sz - 1 + context_ymin = np.floor(cy - c + 0.5) + context_ymax = context_ymin + sz - 1 + left_pad = int(max(0., -context_xmin)) + top_pad = int(max(0., -context_ymin)) + right_pad = int(max(0., context_xmax - im_w + 1)) + bottom_pad = int(max(0., context_ymax - im_h + 1)) + context_xmin += left_pad + context_xmax += left_pad + context_ymin += top_pad + context_ymax += top_pad + + if any([top_pad, bottom_pad, left_pad, right_pad]): + size = (im_h + top_pad + bottom_pad, im_w + left_pad + right_pad, im_d) + te_im = np.zeros(size, np.uint8) + te_im[top_pad:top_pad + im_h, left_pad:left_pad + im_w, :] = im + if top_pad: + te_im[0:top_pad, left_pad:left_pad + im_w, :] = avg_chans + if bottom_pad: + te_im[im_h + top_pad:, left_pad:left_pad + im_w, :] = avg_chans + if left_pad: + te_im[:, 0:left_pad, :] = avg_chans + if right_pad: + te_im[:, im_w + left_pad:, :] = avg_chans + im_patch = te_im[int(context_ymin):int(context_ymax + 1), + int(context_xmin):int(context_xmax + 1), :] + else: + im_patch = im[int(context_ymin):int(context_ymax + 1), + int(context_xmin):int(context_xmax + 1), :] + + if not np.array_equal(model_sz, original_sz): + im_patch = cv.resize(im_patch, (model_sz, model_sz)) + im_patch = im_patch.transpose(2, 0, 1) + im_patch = im_patch[np.newaxis, :, :, :] + im_patch = im_patch.astype(np.float32) + return im_patch + + def generate_anchor(self, score_size): + """ + Args: + im: bgr based input image frame + pos: position of the center of the frame + model_sz: exemplar / target image size + s_z: original / search image size + avg_chans: channel average + Return: + anchor: anchors for pre-determined values of stride, ratio, and scale + """ + anchors = Anchors(self.anchor_stride, self.anchor_ratios, self.anchor_scales) + anchor = anchors.anchors + x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3] + anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1) + total_stride = anchors.stride + anchor_num = anchors.anchor_num + anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4)) + ori = - (score_size // 2) * total_stride + xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)], + [ori + total_stride * dy for dy in range(score_size)]) + xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \ + np.tile(yy.flatten(), (anchor_num, 1)).flatten() + anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) + return anchor + + def _convert_bbox(self, delta, anchor): + """ + Args: + delta: localisation + anchor: anchor of pre-determined anchor size + Return: + delta: prediction of bounding box + """ + delta_transpose = np.transpose(delta, (1, 2, 3, 0)) + delta_contig = np.ascontiguousarray(delta_transpose) + delta = delta_contig.reshape(4, -1) + delta[0, :] = delta[0, :] * anchor[:, 2] + anchor[:, 0] + delta[1, :] = delta[1, :] * anchor[:, 3] + anchor[:, 1] + delta[2, :] = np.exp(delta[2, :]) * anchor[:, 2] + delta[3, :] = np.exp(delta[3, :]) * anchor[:, 3] + return delta + + def _softmax(self, x): + """ + Softmax in the direction of the depth of the layer + """ + x = x.astype(dtype=np.float32) + x_max = x.max(axis=1)[:, np.newaxis] + e_x = np.exp(x-x_max) + div = np.sum(e_x, axis=1)[:, np.newaxis] + y = e_x / div + return y + + def _convert_score(self, score): + """ + Args: + cls: score + Return: + cls: score for cls + """ + score_transpose = np.transpose(score, (1, 2, 3, 0)) + score_con = np.ascontiguousarray(score_transpose) + score_view = score_con.reshape(2, -1) + score = np.transpose(score_view, (1, 0)) + score = self._softmax(score) + return score[:,1] + + def _bbox_clip(self, cx, cy, width, height, boundary): + """ + Adjusting the bounding box + """ + bbox_h, bbox_w = boundary + cx = max(0, min(cx, bbox_w)) + cy = max(0, min(cy, bbox_h)) + width = max(10, min(width, bbox_w)) + height = max(10, min(height, bbox_h)) + return cx, cy, width, height + + def init(self, img, bbox): + """ + Args: + img(np.ndarray): bgr based input image frame + bbox: (x,y,w,h): bounding box + """ + x,y,h,w = bbox + self.center_pos = np.array([x + (h - 1) / 2, y + (w - 1) / 2]) + self.h = h + self.w = w + w_z = self.w + self.track_context_amount * np.add(h, w) + h_z = self.h + self.track_context_amount * np.add(h, w) + s_z = round(np.sqrt(w_z * h_z)) + self.channel_average = np.mean(img, axis=(0, 1)) + z_crop = self.get_subwindow(img, self.center_pos, self.track_exemplar_size, s_z, self.channel_average) + self.model.template(z_crop) + + def track(self, img): + """ + Args: + img(np.ndarray): BGR image + Return: + bbox(list):[x, y, width, height] + """ + w_z = self.w + self.track_context_amount * np.add(self.w, self.h) + h_z = self.h + self.track_context_amount * np.add(self.w, self.h) + s_z = np.sqrt(w_z * h_z) + scale_z = self.track_exemplar_size / s_z + s_x = s_z * (self.track_instance_size / self.track_exemplar_size) + x_crop = self.get_subwindow(img, self.center_pos, self.track_instance_size, round(s_x), self.channel_average) + outputs = self.model.track(x_crop) + score = self._convert_score(outputs['cls']) + pred_bbox = self._convert_bbox(outputs['loc'], self.anchors) + + def change(r): + return np.maximum(r, 1. / r) + + def sz(w, h): + pad = (w + h) * 0.5 + return np.sqrt((w + pad) * (h + pad)) + + # scale penalty + s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) / + (sz(self.w * scale_z, self.h * scale_z))) + + # aspect ratio penalty + r_c = change((self.w / self.h) / + (pred_bbox[2, :] / pred_bbox[3, :])) + penalty = np.exp(-(r_c * s_c - 1) * self.track_penalty_k) + pscore = penalty * score + + # window penalty + pscore = pscore * (1 - self.track_window_influence) + \ + self.window * self.track_window_influence + best_idx = np.argmax(pscore) + bbox = pred_bbox[:, best_idx] / scale_z + lr = penalty[best_idx] * score[best_idx] * self.track_lr + + cpx, cpy = self.center_pos + x,y,w,h = bbox + cx = x + cpx + cy = y + cpy + + # smooth bbox + width = self.w * (1 - lr) + w * lr + height = self.h * (1 - lr) + h * lr + + # clip boundary + cx, cy, width, height = self._bbox_clip(cx, cy, width, height, img.shape[:2]) + + # udpate state + self.center_pos = np.array([cx, cy]) + self.w = width + self.h = height + bbox = [cx - width / 2, cy - height / 2, width, height] + best_score = score[best_idx] + return {'bbox': bbox, 'best_score': best_score} + +def get_frames(video_name): + """ + Args: + Path to input video frame + Return: + Frame + """ + cap = cv.VideoCapture(video_name if video_name else 0) + while True: + ret, frame = cap.read() + if ret: + yield frame + else: + break + +def main(): + """ Sample SiamRPN Tracker + """ + # Computation backends supported by layers + backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) + # Target Devices for computation + targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) + + parser = argparse.ArgumentParser(description='Use this script to run SiamRPN++ Visual Tracker', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--input_video', type=str, help='Path to input video file. Skip this argument to capture frames from a camera.') + parser.add_argument('--target_net', type=str, default='target_net.onnx', help='Path to part of SiamRPN++ ran on target frame.') + parser.add_argument('--search_net', type=str, default='search_net.onnx', help='Path to part of SiamRPN++ ran on search frame.') + parser.add_argument('--rpn_head', type=str, default='rpn_head.onnx', help='Path to RPN Head ONNX model.') + parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, + help='Select a computation backend: ' + "%d: automatically (by default) " + "%d: Halide" + "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit)" + "%d: OpenCV Implementation" % backends) + parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, + help='Select a target device: ' + "%d: CPU target (by default)" + "%d: OpenCL" + "%d: OpenCL FP16" + "%d: Myriad" % targets) + args, _ = parser.parse_known_args() + + if args.input_video and not os.path.isfile(args.input_video): + raise OSError("Input video file does not exist") + if not os.path.isfile(args.target_net): + raise OSError("Target Net does not exist") + if not os.path.isfile(args.search_net): + raise OSError("Search Net does not exist") + if not os.path.isfile(args.rpn_head): + raise OSError("RPN Head Net does not exist") + + #Load the Networks + target_net = cv.dnn.readNetFromONNX(args.target_net) + target_net.setPreferableBackend(args.backend) + target_net.setPreferableTarget(args.target) + search_net = cv.dnn.readNetFromONNX(args.search_net) + search_net.setPreferableBackend(args.backend) + search_net.setPreferableTarget(args.target) + rpn_head = cv.dnn.readNetFromONNX(args.rpn_head) + rpn_head.setPreferableBackend(args.backend) + rpn_head.setPreferableTarget(args.target) + model = ModelBuilder(target_net, search_net, rpn_head) + tracker = SiamRPNTracker(model) + + first_frame = True + cv.namedWindow('SiamRPN++ Tracker', cv.WINDOW_AUTOSIZE) + for frame in get_frames(args.input_video): + if first_frame: + try: + init_rect = cv.selectROI('SiamRPN++ Tracker', frame, False, False) + except: + exit() + tracker.init(frame, init_rect) + first_frame = False + else: + outputs = tracker.track(frame) + bbox = list(map(int, outputs['bbox'])) + x,y,w,h = bbox + cv.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3) + cv.imshow('SiamRPN++ Tracker', frame) + key = cv.waitKey(1) + if key == ord("q"): + break + +if __name__ == '__main__': + main() From 9aa401a7d0510720728f71bcd65e9ffdda954db2 Mon Sep 17 00:00:00 2001 From: Sergei Slashchinin <62052793+sl-sergei@users.noreply.github.com> Date: Wed, 26 Aug 2020 13:15:59 +0300 Subject: [PATCH 12/12] Merge pull request #17978 from sl-sergei:fix_17516_17531 * Fix ONNX loading in issues opencv#17516, opencv#17531 * Add tests for Linear and Matmul layers * Disable tests for IE versions lower than 20.4 * Skip unstable tests with OpenCL FP16 on Intel GPU * Add correct test filtering for OpenCL FP16 tests --- modules/dnn/src/onnx/onnx_importer.cpp | 13 +++++++++++++ modules/dnn/test/test_onnx_importer.cpp | 20 ++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index ccef2f6936..8cb2c5eda1 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -946,6 +946,19 @@ void ONNXImporter::populateNet(Net dstNet) Mat bias = getBlob(node_proto, constBlobs, 2); layerParams.blobs.push_back(bias); } + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat inputBuf = getBlob(node_proto, constBlobs, 0); + + LayerParams constParams; + constParams.name = node_proto.input(0); + constParams.type = "Const"; + constParams.blobs.push_back(inputBuf); + + opencv_onnx::NodeProto proto; + proto.add_output(constParams.name); + addLayer(dstNet, constParams, proto, layer_id, outShapes); + } layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]); layerParams.set("bias_term", node_proto.input_size() == 3); diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index c68215ee30..1eb848897c 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -611,6 +611,26 @@ TEST_P(Test_ONNX_layers, Pad2d_Unfused) testONNXModels("ZeroPad2d"); } +TEST_P(Test_ONNX_layers, LinearWithConstant) +{ + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE); +#endif + testONNXModels("lin_with_constant"); +} + +TEST_P(Test_ONNX_layers, MatmulWithTwoInputs) +{ + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE); +#endif + testONNXModels("matmul_with_two_inputs"); +} + INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets()); class Test_ONNX_nets : public Test_ONNX_layers