79 changed files with 1054 additions and 46 deletions
@ -0,0 +1,141 @@ |
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at
// Copyright (C) 2020, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp" |
#include "layers_common.hpp" |
namespace cv { namespace dnn { |
class AccumLayerImpl CV_FINAL : public AccumLayer |
{ |
public: |
AccumLayerImpl(const LayerParams& params) |
{ |
setParamsFrom(params); |
top_height = params.get<int>("top_height", 0); |
top_width = params.get<int>("top_width", 0); |
divisor = params.get<int>("size_divisible_by", 0); |
have_reference = params.get<String>("have_reference", "false") == "true"; |
} |
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs, |
const int requiredOutputs, |
std::vector<MatShape> &outputs, |
std::vector<MatShape> &internals) const CV_OVERRIDE |
{ |
std::vector<int> outShape; |
int batch = inputs[0][0]; |
outShape.push_back(batch); |
if (have_reference) |
{ |
CV_Assert(inputs.size() >= 2); |
int totalchannels = 0; |
for (int i = 0; i < inputs.size() - 1; i++) { |
CV_Assert(inputs[i][0] == batch); |
totalchannels += inputs[i][1]; |
} |
outShape.push_back(totalchannels); |
int height = inputs.back()[2]; |
int width = inputs.back()[3]; |
outShape.push_back(height); |
outShape.push_back(width); |
} |
else |
{ |
int maxwidth = -1; |
int maxheight = -1; |
int totalchannels = 0; |
// Find largest blob size and count total channels
for (int i = 0; i < inputs.size(); ++i) |
{ |
totalchannels += inputs[i][1]; |
maxheight = std::max(maxheight, inputs[i][2]); |
maxwidth = std::max(maxwidth, inputs[i][3]); |
CV_Assert(inputs[i][0] == batch); |
} |
outShape.push_back(totalchannels); |
int out_h = divisor ? static_cast<int>(ceil(maxheight / divisor) * divisor) : top_height; |
int out_w = divisor ? static_cast<int>(ceil(maxwidth / divisor) * divisor) : top_width; |
// Layer can specify custom top size which is larger than default
if (out_h <= maxheight || out_w <= maxwidth) |
{ |
out_h = maxheight; |
out_w = maxwidth; |
} |
outShape.push_back(out_h); |
outShape.push_back(out_w); |
} |
outputs.assign(1, outShape); |
return false; |
} |
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE |
{ |
LayerParams resizeParams; |
resizeParams.set("interpolation", "bilinear"); |
resizeParams.set("align_corners", true); |
resize = ResizeLayer::create(resizeParams); |
} |
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE |
{ |
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); |
std::vector<Mat> inputs, outputs; |
inputs_arr.getMatVector(inputs); |
outputs_arr.getMatVector(outputs); |
const int out_h = outputs[0].size[2]; |
const int out_w = outputs[0].size[3]; |
float* out_data = outputs[0].ptr<float>(); |
std::vector<int> sizes(&outputs[0].size[0], &outputs[0].size[0] + outputs[0].size.dims()); |
for (int i = 0; i < inputs.size() - have_reference; i++) |
{ |
sizes[1] = inputs[i].size[1]; |
Mat outSlice(sizes, CV_32F, out_data); |
if (out_h == inputs[i].size[2] && out_w == inputs[i].size[3]) |
{ |
inputs[i].copyTo(outSlice); |
} |
else |
{ |
std::vector<Mat> inp_slices, out_slices; |
inp_slices.push_back(inputs[i]); |
out_slices.push_back(outSlice); |
resize->finalize(inp_slices, out_slices); |
resize->forward(inp_slices, out_slices, internals_arr); |
} |
out_data +=; |
} |
} |
private: |
int top_height; |
int top_width; |
int divisor; |
bool have_reference; |
Ptr<ResizeLayer> resize; |
}; |
Ptr<AccumLayer> AccumLayer::create(const LayerParams& params) |
{ |
return Ptr<AccumLayer>(new AccumLayerImpl(params)); |
} |
}} // namespace cv::dnn
@ -0,0 +1,207 @@ |
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at
// Copyright (C) 2020, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp" |
#include "layers_common.hpp" |
namespace cv { namespace dnn { |
class CorrelationLayerImpl CV_FINAL : public CorrelationLayer |
{ |
public: |
CorrelationLayerImpl(const LayerParams& params) |
{ |
setParamsFrom(params); |
pad = params.get<int>("pad", 0); |
CV_Assert_N(params.has("kernel_size"), params.has("max_displacement")); |
max_displacement = params.get<int>("max_displacement"); |
kernel = params.get<int>("kernel_size"); |
if (kernel % 2 == 0) |
CV_Error(Error::StsNotImplemented, "Odd kernel size required."); |
stride_1 = params.get<int>("stride_1", 1); |
stride_2 = params.get<int>("stride_2", 1); |
} |
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs, |
const int requiredOutputs, |
std::vector<MatShape> &outputs, |
std::vector<MatShape> &internals) const CV_OVERRIDE |
{ |
CV_Assert_N(inputs.size() == 2, inputs[0].size() == 4, inputs[1].size() == 4); |
int padded_height = inputs[0][2] + 2 * pad; |
int padded_width = inputs[0][3] + 2 * pad; |
int kernel_radius = (kernel - 1) / 2; |
int border_size = max_displacement + kernel_radius; |
int neighborhood_grid_radius = max_displacement / stride_2; |
int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1; |
std::vector<int> outShape; |
int num = inputs[0][0]; |
outShape.push_back(num); |
int out_c = neighborhood_grid_width * neighborhood_grid_width; |
outShape.push_back(out_c); |
int out_h = ceil(static_cast<float>(padded_height - border_size * 2) / stride_1); |
int out_w = ceil(static_cast<float>(padded_width - border_size * 2) / stride_1); |
CV_Assert_N(out_h >= 1, out_w >= 1); |
outShape.push_back(out_h); |
outShape.push_back(out_w); |
outputs.assign(1, outShape); |
return false; |
} |
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE |
{ |
std::vector<Mat> inputs; |
inputs_arr.getMatVector(inputs); |
int padded_height = inputs[0].size[2] + 2 * pad; |
int padded_width = inputs[0].size[3] + 2 * pad; |
int size[] = {inputs[0].size[0], padded_height, padded_width, inputs[0].size[1]}; |
rbot0 = Mat(4, &size[0], CV_32F, float(0)); |
rbot1 = Mat(4, &size[0], CV_32F, float(0)); |
} |
void blobRearrangeKernel2(const Mat& input, Mat& output) |
{ |
const int num = input.size[0]; |
const int channels = input.size[1]; |
const int height = input.size[2]; |
const int width = input.size[3]; |
const int area = height * width; |
const int pad_area = (width + 2 * pad) * (height + 2 * pad); |
const float* in = input.ptr<float>(); |
float* out = output.ptr<float>(); |
for (int n = 0; n < num; n++) |
{ |
for (int ch = 0; ch < channels; ch++) |
{ |
for (int xy = 0; xy < area; xy++) |
{ |
float value = in[(n * channels + ch) * area + xy]; |
int xpad = (xy % width + pad); |
int ypad = (xy / width + pad); |
int xypad = ypad * (width + 2 * pad) + xpad; |
out[(n * pad_area + xypad) * channels + ch] = value; |
} |
} |
} |
} |
void correlationKernelSubtraction(const Mat& input0, const Mat& input1, Mat& output, int item) |
{ |
const int inp_h = input0.size[1]; |
const int inp_w = input0.size[2]; |
const int inp_c = input0.size[3]; |
const int out_c = output.size[1]; |
const int out_h = output.size[2]; |
const int out_w = output.size[3]; |
int topcount =; |
int neighborhood_grid_radius = max_displacement / stride_2; |
int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1; |
const float* inp0_data = input0.ptr<float>(); |
const float* inp1_data = input1.ptr<float>(); |
float* out_data = output.ptr<float>(); |
int sumelems = kernel * kernel * inp_c; |
std::vector<float> patch_data(sumelems, 0); |
for (int y = 0; y < out_h; y++) |
{ |
for (int x = 0; x < out_w; x++) |
{ |
int x1 = x * stride_1 + max_displacement; |
int y1 = y * stride_1 + max_displacement; |
for (int j = 0; j < kernel; j++) |
{ |
for (int i = 0; i < kernel; i++) |
{ |
int ji_off = ((j * kernel) + i) * inp_c; |
for (int ch = 0; ch < inp_c; ch++) |
{ |
int idx1 = ((item * inp_h + y1 + j) * inp_w + x1 + i) * inp_c + ch; |
int idxPatchData = ji_off + ch; |
patch_data[idxPatchData] = inp0_data[idx1]; |
} |
} |
} |
for (int out_ch = 0; out_ch < out_c; out_ch++) |
{ |
float sum = 0; |
int s2o = (out_ch % neighborhood_grid_width - neighborhood_grid_radius) * stride_2; |
int s2p = (out_ch / neighborhood_grid_width - neighborhood_grid_radius) * stride_2; |
int x2 = x1 + s2o; |
int y2 = y1 + s2p; |
for (int j = 0; j < kernel; j++) |
{ |
for (int i = 0; i < kernel; i++) |
{ |
int ji_off = ((j * kernel) + i) * inp_c; |
for (int ch = 0; ch < inp_c; ch++) |
{ |
int idxPatchData = ji_off + ch; |
int idx2 = ((item * inp_h + y2 + j) * inp_w + x2 + i) * inp_c + ch; |
sum += patch_data[idxPatchData] * inp1_data[idx2]; |
} |
} |
} |
int index = ((out_ch * out_h + y) * out_w) + x; |
out_data[index + item * topcount] = static_cast<float>(sum) / sumelems; |
} |
} |
} |
} |
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE |
{ |
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); |
std::vector<Mat> inputs, outputs, internals; |
inputs_arr.getMatVector(inputs); |
outputs_arr.getMatVector(outputs); |
internals_arr.getMatVector(internals); |
blobRearrangeKernel2(inputs[0], rbot0); |
blobRearrangeKernel2(inputs[1], rbot1); |
for (int i = 0; i < inputs[0].size[0]; i++) |
{ |
correlationKernelSubtraction(rbot0, rbot1, outputs[0], i); |
} |
} |
private: |
int pad; |
int kernel; |
int max_displacement; |
int stride_1; |
int stride_2; |
Mat rbot0; |
Mat rbot1; |
}; |
Ptr<CorrelationLayer> CorrelationLayer::create(const LayerParams& params) |
{ |
return Ptr<CorrelationLayer>(new CorrelationLayerImpl(params)); |
} |
}} // namespace cv::dnn
@ -0,0 +1,117 @@ |
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at
// Copyright (C) 2020, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp" |
#include "layers_common.hpp" |
namespace cv { namespace dnn { |
class FlowWarpLayerImpl CV_FINAL : public FlowWarpLayer |
{ |
public: |
FlowWarpLayerImpl(const LayerParams& params) |
{ |
setParamsFrom(params); |
String fill_string = toLowerCase(params.get<String>("FillParameter", "ZERO")); |
if (fill_string != "zero") |
CV_Error(Error::StsNotImplemented, "Only zero filling supported."); |
fill_value = 0; |
} |
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs, |
const int requiredOutputs, |
std::vector<MatShape> &outputs, |
std::vector<MatShape> &internals) const CV_OVERRIDE |
{ |
CV_Assert(inputs.size() == 2); |
CV_Assert_N(inputs[0][0] == inputs[1][0], inputs[1][1] == 2, |
inputs[0][2] == inputs[1][2], inputs[0][3] == inputs[1][3]); |
outputs.assign(1, inputs[0]); |
return false; |
} |
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE |
{ |
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); |
std::vector<Mat> inputs, outputs; |
inputs_arr.getMatVector(inputs); |
outputs_arr.getMatVector(outputs); |
const int out_n = outputs[0].size[0]; |
const int out_c = outputs[0].size[1]; |
const int out_h = outputs[0].size[2]; |
const int out_w = outputs[0].size[3]; |
const int area = out_w * out_h; |
const int total = area * out_c; |
const float* image_data = inputs[0].ptr<float>(); |
const float* flow_data = inputs[1].ptr<float>(); |
float* out_data = outputs[0].ptr<float>(); |
for (int n = 0; n < out_n; n++) |
{ |
int off = total * n; |
for (int x = 0; x < out_w; x++) |
{ |
for (int y = 0; y < out_h; y++) |
{ |
int idx = 2 * area * n + y * out_w + x; |
float fx = flow_data[idx]; |
float fy = flow_data[idx + area]; |
float x2 = x + fx; |
float y2 = y + fy; |
if (x2 >= 0 && y2 >= 0 && x2 < out_w && y2 < out_h) |
{ |
int ix2_L = x2; |
float alpha = x2 - ix2_L; |
int iy2_T = y2; |
float beta = y2 - iy2_T; |
int ix2_R = std::min(ix2_L + 1, out_w - 1); |
int iy2_B = std::min(iy2_T + 1, out_h - 1); |
for (int c = 0; c < out_c; c++) |
{ |
float TL = image_data[off + c * area + iy2_T * out_w + ix2_L]; |
float TR = image_data[off + c * area + iy2_T * out_w + ix2_R]; |
float BL = image_data[off + c * area + iy2_B * out_w + ix2_L]; |
float BR = image_data[off + c * area + iy2_B * out_w + ix2_R]; |
out_data[off + c * area + y * out_w + x] = (1 - alpha) * (1 - beta) * TL + |
(1 - alpha) * beta * BL + |
alpha * (1 - beta) * TR + |
alpha * beta * BR; |
} |
} |
else |
{ |
for (int c = 0; c < out_c; c++) |
out_data[off + c * area + y * out_w + x] = fill_value; |
} |
} |
} |
} |
} |
private: |
float fill_value; |
}; |
Ptr<FlowWarpLayer> FlowWarpLayer::create(const LayerParams& params) |
{ |
return Ptr<FlowWarpLayer>(new FlowWarpLayerImpl(params)); |
} |
}} // namespace cv::dnn
@ -0,0 +1,85 @@ |
#!/usr/bin/env python |
''' |
This sample using FlowNet v2 model to calculate optical flow. |
Original paper: |
Original repo: |
Download the converted .caffemodel model from |
and .prototxt from |
Otherwise download original model from, |
convert .h5 model to .caffemodel and modify original .prototxt using .prototxt from link above. |
''' |
import argparse |
import os.path |
import numpy as np |
import cv2 as cv |
class OpticalFlow(object): |
def __init__(self, proto, model, height, width): |
|||| = cv.dnn.readNet(proto, model) |
|||| |
self.height = height |
self.width = width |
def compute_flow(self, first_img, second_img): |
inp0 = cv.dnn.blobFromImage(first_img, size=(self.width, self.height)) |
inp1 = cv.dnn.blobFromImage(second_img, size=(self.width, self.height)) |
||||, "img0") |
||||, "img1") |
flow = |
output = self.motion_to_color(flow) |
return output |
def motion_to_color(self, flow): |
arr = np.arange(0, 255, dtype=np.uint8) |
colormap = cv.applyColorMap(arr, cv.COLORMAP_HSV) |
colormap = colormap.squeeze(1) |
flow = flow.squeeze(0) |
fx, fy = flow[0, ...], flow[1, ...] |
rad = np.sqrt(fx**2 + fy**2) |
maxrad = rad.max() if rad.max() != 0 else 1 |
ncols = arr.size |
rad = rad[..., np.newaxis] / maxrad |
a = np.arctan2(-fy / maxrad, -fx / maxrad) / np.pi |
fk = (a + 1) / 2.0 * (ncols - 1) |
k0 = fk.astype( |
k1 = (k0 + 1) % ncols |
f = fk[..., np.newaxis] - k0[..., np.newaxis] |
col0 = colormap[k0] / 255.0 |
col1 = colormap[k1] / 255.0 |
col = (1 - f) * col0 + f * col1 |
col = np.where(rad <= 1, 1 - rad * (1 - col), col * 0.75) |
output = (255.0 * col).astype(np.uint8) |
return output |
if __name__ == '__main__': |
parser = argparse.ArgumentParser(description='Use this script to calculate optical flow using FlowNetv2', |
formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
parser.add_argument('-input', '-i', required=True, help='Path to input video file. Skip this argument to capture frames from a camera.') |
parser.add_argument('--height', default=320, help='Input height') |
parser.add_argument('--width', default=448, help='Input width') |
parser.add_argument('--proto', '-p', default='FlowNet2_deploy.prototxt', help='Path to prototxt.') |
parser.add_argument('--model', '-m', default='FlowNet2_weights.caffemodel', help='Path to caffemodel.') |
args, _ = parser.parse_known_args() |
if not os.path.isfile(args.model) or not os.path.isfile(args.proto): |
raise OSError("Prototxt or caffemodel not exist") |
winName = 'Calculation optical flow in OpenCV' |
cv.namedWindow(winName, cv.WINDOW_NORMAL) |
cap = cv.VideoCapture(args.input if args.input else 0) |
hasFrame, first_frame = |
opt_flow = OpticalFlow(args.proto, args.model, args.height, args.width) |
while cv.waitKey(1) < 0: |
hasFrame, second_frame = |
if not hasFrame: |
break |
flow = opt_flow.compute_flow(first_frame, second_frame) |
first_frame = second_frame |
cv.imshow(winName, flow) |
Reference in new issue