Merge pull request #10255 from dkurt:dnn_roi_pooling

pull/10290/head
Alexander Alekhin 7 years ago
commit f2070c9f5d
  1. 6
      modules/dnn/include/opencv2/dnn/all_layers.hpp
  2. 1196
      modules/dnn/misc/caffe/opencv-caffe.pb.cc
  3. 311
      modules/dnn/misc/caffe/opencv-caffe.pb.h
  4. 13
      modules/dnn/src/caffe/opencv-caffe.proto
  5. 1
      modules/dnn/src/init.cpp
  6. 147
      modules/dnn/src/layers/pooling_layer.cpp
  7. 16
      modules/dnn/test/test_layers.cpp

@ -242,7 +242,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
{
MAX,
AVE,
STOCHASTIC
STOCHASTIC,
ROI
};
int type;
@ -251,6 +252,9 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
bool computeMaxIdx;
String padMode;
bool ceilMode;
// ROIPooling parameters.
Size pooledSize;
float spatialScale;
static Ptr<PoolingLayer> create(const LayerParams& params);
};

File diff suppressed because it is too large Load Diff

@ -87,6 +87,7 @@ class PoolingParameter;
class PowerParameter;
class PriorBoxParameter;
class PythonParameter;
class ROIPoolingParameter;
class ReLUParameter;
class RecurrentParameter;
class ReductionParameter;
@ -4182,6 +4183,15 @@ class LayerParameter : public ::google::protobuf::Message /* @@protoc_insertion_
::opencv_caffe::ReshapeParameter* release_reshape_param();
void set_allocated_reshape_param(::opencv_caffe::ReshapeParameter* reshape_param);
// optional .opencv_caffe.ROIPoolingParameter roi_pooling_param = 8266711;
bool has_roi_pooling_param() const;
void clear_roi_pooling_param();
static const int kRoiPoolingParamFieldNumber = 8266711;
const ::opencv_caffe::ROIPoolingParameter& roi_pooling_param() const;
::opencv_caffe::ROIPoolingParameter* mutable_roi_pooling_param();
::opencv_caffe::ROIPoolingParameter* release_roi_pooling_param();
void set_allocated_roi_pooling_param(::opencv_caffe::ROIPoolingParameter* roi_pooling_param);
// optional .opencv_caffe.ScaleParameter scale_param = 142;
bool has_scale_param() const;
void clear_scale_param();
@ -4355,6 +4365,8 @@ class LayerParameter : public ::google::protobuf::Message /* @@protoc_insertion_
inline void clear_has_relu_param();
inline void set_has_reshape_param();
inline void clear_has_reshape_param();
inline void set_has_roi_pooling_param();
inline void clear_has_roi_pooling_param();
inline void set_has_scale_param();
inline void clear_has_scale_param();
inline void set_has_sigmoid_param();
@ -4428,6 +4440,7 @@ class LayerParameter : public ::google::protobuf::Message /* @@protoc_insertion_
::opencv_caffe::ReductionParameter* reduction_param_;
::opencv_caffe::ReLUParameter* relu_param_;
::opencv_caffe::ReshapeParameter* reshape_param_;
::opencv_caffe::ROIPoolingParameter* roi_pooling_param_;
::opencv_caffe::ScaleParameter* scale_param_;
::opencv_caffe::SigmoidParameter* sigmoid_param_;
::opencv_caffe::SoftmaxParameter* softmax_param_;
@ -12783,6 +12796,124 @@ class NormalizedBBox : public ::google::protobuf::Message /* @@protoc_insertion_
};
extern ::google::protobuf::internal::ExplicitlyConstructed<NormalizedBBox> NormalizedBBox_default_instance_;
// -------------------------------------------------------------------
class ROIPoolingParameter : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:opencv_caffe.ROIPoolingParameter) */ {
public:
ROIPoolingParameter();
virtual ~ROIPoolingParameter();
ROIPoolingParameter(const ROIPoolingParameter& from);
inline ROIPoolingParameter& operator=(const ROIPoolingParameter& from) {
CopyFrom(from);
return *this;
}
inline const ::google::protobuf::UnknownFieldSet& unknown_fields() const {
return _internal_metadata_.unknown_fields();
}
inline ::google::protobuf::UnknownFieldSet* mutable_unknown_fields() {
return _internal_metadata_.mutable_unknown_fields();
}
static const ::google::protobuf::Descriptor* descriptor();
static const ROIPoolingParameter& default_instance();
static const ROIPoolingParameter* internal_default_instance();
void Swap(ROIPoolingParameter* other);
// implements Message ----------------------------------------------
inline ROIPoolingParameter* New() const { return New(NULL); }
ROIPoolingParameter* New(::google::protobuf::Arena* arena) const;
void CopyFrom(const ::google::protobuf::Message& from);
void MergeFrom(const ::google::protobuf::Message& from);
void CopyFrom(const ROIPoolingParameter& from);
void MergeFrom(const ROIPoolingParameter& from);
void Clear();
bool IsInitialized() const;
size_t ByteSizeLong() const;
bool MergePartialFromCodedStream(
::google::protobuf::io::CodedInputStream* input);
void SerializeWithCachedSizes(
::google::protobuf::io::CodedOutputStream* output) const;
::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray(
bool deterministic, ::google::protobuf::uint8* output) const;
::google::protobuf::uint8* SerializeWithCachedSizesToArray(::google::protobuf::uint8* output) const {
return InternalSerializeWithCachedSizesToArray(false, output);
}
int GetCachedSize() const { return _cached_size_; }
private:
void SharedCtor();
void SharedDtor();
void SetCachedSize(int size) const;
void InternalSwap(ROIPoolingParameter* other);
void UnsafeMergeFrom(const ROIPoolingParameter& from);
private:
inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
return _internal_metadata_.arena();
}
inline void* MaybeArenaPtr() const {
return _internal_metadata_.raw_arena_ptr();
}
public:
::google::protobuf::Metadata GetMetadata() const;
// nested types ----------------------------------------------------
// accessors -------------------------------------------------------
// optional uint32 pooled_h = 1 [default = 0];
bool has_pooled_h() const;
void clear_pooled_h();
static const int kPooledHFieldNumber = 1;
::google::protobuf::uint32 pooled_h() const;
void set_pooled_h(::google::protobuf::uint32 value);
// optional uint32 pooled_w = 2 [default = 0];
bool has_pooled_w() const;
void clear_pooled_w();
static const int kPooledWFieldNumber = 2;
::google::protobuf::uint32 pooled_w() const;
void set_pooled_w(::google::protobuf::uint32 value);
// optional float spatial_scale = 3 [default = 1];
bool has_spatial_scale() const;
void clear_spatial_scale();
static const int kSpatialScaleFieldNumber = 3;
float spatial_scale() const;
void set_spatial_scale(float value);
// @@protoc_insertion_point(class_scope:opencv_caffe.ROIPoolingParameter)
private:
inline void set_has_pooled_h();
inline void clear_has_pooled_h();
inline void set_has_pooled_w();
inline void clear_has_pooled_w();
inline void set_has_spatial_scale();
inline void clear_has_spatial_scale();
::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
::google::protobuf::internal::HasBits<1> _has_bits_;
mutable int _cached_size_;
::google::protobuf::uint32 pooled_h_;
::google::protobuf::uint32 pooled_w_;
float spatial_scale_;
friend void protobuf_InitDefaults_opencv_2dcaffe_2eproto_impl();
friend void protobuf_AddDesc_opencv_2dcaffe_2eproto_impl();
friend void protobuf_AssignDesc_opencv_2dcaffe_2eproto();
friend void protobuf_ShutdownFile_opencv_2dcaffe_2eproto();
void InitAsDefaultInstance();
};
extern ::google::protobuf::internal::ExplicitlyConstructed<ROIPoolingParameter> ROIPoolingParameter_default_instance_;
// ===================================================================
@ -19015,15 +19146,60 @@ inline void LayerParameter::set_allocated_reshape_param(::opencv_caffe::ReshapeP
// @@protoc_insertion_point(field_set_allocated:opencv_caffe.LayerParameter.reshape_param)
}
// optional .opencv_caffe.ROIPoolingParameter roi_pooling_param = 8266711;
inline bool LayerParameter::has_roi_pooling_param() const {
return (_has_bits_[1] & 0x00200000u) != 0;
}
inline void LayerParameter::set_has_roi_pooling_param() {
_has_bits_[1] |= 0x00200000u;
}
inline void LayerParameter::clear_has_roi_pooling_param() {
_has_bits_[1] &= ~0x00200000u;
}
inline void LayerParameter::clear_roi_pooling_param() {
if (roi_pooling_param_ != NULL) roi_pooling_param_->::opencv_caffe::ROIPoolingParameter::Clear();
clear_has_roi_pooling_param();
}
inline const ::opencv_caffe::ROIPoolingParameter& LayerParameter::roi_pooling_param() const {
// @@protoc_insertion_point(field_get:opencv_caffe.LayerParameter.roi_pooling_param)
return roi_pooling_param_ != NULL ? *roi_pooling_param_
: *::opencv_caffe::ROIPoolingParameter::internal_default_instance();
}
inline ::opencv_caffe::ROIPoolingParameter* LayerParameter::mutable_roi_pooling_param() {
set_has_roi_pooling_param();
if (roi_pooling_param_ == NULL) {
roi_pooling_param_ = new ::opencv_caffe::ROIPoolingParameter;
}
// @@protoc_insertion_point(field_mutable:opencv_caffe.LayerParameter.roi_pooling_param)
return roi_pooling_param_;
}
inline ::opencv_caffe::ROIPoolingParameter* LayerParameter::release_roi_pooling_param() {
// @@protoc_insertion_point(field_release:opencv_caffe.LayerParameter.roi_pooling_param)
clear_has_roi_pooling_param();
::opencv_caffe::ROIPoolingParameter* temp = roi_pooling_param_;
roi_pooling_param_ = NULL;
return temp;
}
inline void LayerParameter::set_allocated_roi_pooling_param(::opencv_caffe::ROIPoolingParameter* roi_pooling_param) {
delete roi_pooling_param_;
roi_pooling_param_ = roi_pooling_param;
if (roi_pooling_param) {
set_has_roi_pooling_param();
} else {
clear_has_roi_pooling_param();
}
// @@protoc_insertion_point(field_set_allocated:opencv_caffe.LayerParameter.roi_pooling_param)
}
// optional .opencv_caffe.ScaleParameter scale_param = 142;
inline bool LayerParameter::has_scale_param() const {
return (_has_bits_[1] & 0x00200000u) != 0;
return (_has_bits_[1] & 0x00400000u) != 0;
}
inline void LayerParameter::set_has_scale_param() {
_has_bits_[1] |= 0x00200000u;
_has_bits_[1] |= 0x00400000u;
}
inline void LayerParameter::clear_has_scale_param() {
_has_bits_[1] &= ~0x00200000u;
_has_bits_[1] &= ~0x00400000u;
}
inline void LayerParameter::clear_scale_param() {
if (scale_param_ != NULL) scale_param_->::opencv_caffe::ScaleParameter::Clear();
@ -19062,13 +19238,13 @@ inline void LayerParameter::set_allocated_scale_param(::opencv_caffe::ScaleParam
// optional .opencv_caffe.SigmoidParameter sigmoid_param = 124;
inline bool LayerParameter::has_sigmoid_param() const {
return (_has_bits_[1] & 0x00400000u) != 0;
return (_has_bits_[1] & 0x00800000u) != 0;
}
inline void LayerParameter::set_has_sigmoid_param() {
_has_bits_[1] |= 0x00400000u;
_has_bits_[1] |= 0x00800000u;
}
inline void LayerParameter::clear_has_sigmoid_param() {
_has_bits_[1] &= ~0x00400000u;
_has_bits_[1] &= ~0x00800000u;
}
inline void LayerParameter::clear_sigmoid_param() {
if (sigmoid_param_ != NULL) sigmoid_param_->::opencv_caffe::SigmoidParameter::Clear();
@ -19107,13 +19283,13 @@ inline void LayerParameter::set_allocated_sigmoid_param(::opencv_caffe::SigmoidP
// optional .opencv_caffe.SoftmaxParameter softmax_param = 125;
inline bool LayerParameter::has_softmax_param() const {
return (_has_bits_[1] & 0x00800000u) != 0;
return (_has_bits_[1] & 0x01000000u) != 0;
}
inline void LayerParameter::set_has_softmax_param() {
_has_bits_[1] |= 0x00800000u;
_has_bits_[1] |= 0x01000000u;
}
inline void LayerParameter::clear_has_softmax_param() {
_has_bits_[1] &= ~0x00800000u;
_has_bits_[1] &= ~0x01000000u;
}
inline void LayerParameter::clear_softmax_param() {
if (softmax_param_ != NULL) softmax_param_->::opencv_caffe::SoftmaxParameter::Clear();
@ -19152,13 +19328,13 @@ inline void LayerParameter::set_allocated_softmax_param(::opencv_caffe::SoftmaxP
// optional .opencv_caffe.SPPParameter spp_param = 132;
inline bool LayerParameter::has_spp_param() const {
return (_has_bits_[1] & 0x01000000u) != 0;
return (_has_bits_[1] & 0x02000000u) != 0;
}
inline void LayerParameter::set_has_spp_param() {
_has_bits_[1] |= 0x01000000u;
_has_bits_[1] |= 0x02000000u;
}
inline void LayerParameter::clear_has_spp_param() {
_has_bits_[1] &= ~0x01000000u;
_has_bits_[1] &= ~0x02000000u;
}
inline void LayerParameter::clear_spp_param() {
if (spp_param_ != NULL) spp_param_->::opencv_caffe::SPPParameter::Clear();
@ -19197,13 +19373,13 @@ inline void LayerParameter::set_allocated_spp_param(::opencv_caffe::SPPParameter
// optional .opencv_caffe.SliceParameter slice_param = 126;
inline bool LayerParameter::has_slice_param() const {
return (_has_bits_[1] & 0x02000000u) != 0;
return (_has_bits_[1] & 0x04000000u) != 0;
}
inline void LayerParameter::set_has_slice_param() {
_has_bits_[1] |= 0x02000000u;
_has_bits_[1] |= 0x04000000u;
}
inline void LayerParameter::clear_has_slice_param() {
_has_bits_[1] &= ~0x02000000u;
_has_bits_[1] &= ~0x04000000u;
}
inline void LayerParameter::clear_slice_param() {
if (slice_param_ != NULL) slice_param_->::opencv_caffe::SliceParameter::Clear();
@ -19242,13 +19418,13 @@ inline void LayerParameter::set_allocated_slice_param(::opencv_caffe::SliceParam
// optional .opencv_caffe.TanHParameter tanh_param = 127;
inline bool LayerParameter::has_tanh_param() const {
return (_has_bits_[1] & 0x04000000u) != 0;
return (_has_bits_[1] & 0x08000000u) != 0;
}
inline void LayerParameter::set_has_tanh_param() {
_has_bits_[1] |= 0x04000000u;
_has_bits_[1] |= 0x08000000u;
}
inline void LayerParameter::clear_has_tanh_param() {
_has_bits_[1] &= ~0x04000000u;
_has_bits_[1] &= ~0x08000000u;
}
inline void LayerParameter::clear_tanh_param() {
if (tanh_param_ != NULL) tanh_param_->::opencv_caffe::TanHParameter::Clear();
@ -19287,13 +19463,13 @@ inline void LayerParameter::set_allocated_tanh_param(::opencv_caffe::TanHParamet
// optional .opencv_caffe.ThresholdParameter threshold_param = 128;
inline bool LayerParameter::has_threshold_param() const {
return (_has_bits_[1] & 0x08000000u) != 0;
return (_has_bits_[1] & 0x10000000u) != 0;
}
inline void LayerParameter::set_has_threshold_param() {
_has_bits_[1] |= 0x08000000u;
_has_bits_[1] |= 0x10000000u;
}
inline void LayerParameter::clear_has_threshold_param() {
_has_bits_[1] &= ~0x08000000u;
_has_bits_[1] &= ~0x10000000u;
}
inline void LayerParameter::clear_threshold_param() {
if (threshold_param_ != NULL) threshold_param_->::opencv_caffe::ThresholdParameter::Clear();
@ -19332,13 +19508,13 @@ inline void LayerParameter::set_allocated_threshold_param(::opencv_caffe::Thresh
// optional .opencv_caffe.TileParameter tile_param = 138;
inline bool LayerParameter::has_tile_param() const {
return (_has_bits_[1] & 0x10000000u) != 0;
return (_has_bits_[1] & 0x20000000u) != 0;
}
inline void LayerParameter::set_has_tile_param() {
_has_bits_[1] |= 0x10000000u;
_has_bits_[1] |= 0x20000000u;
}
inline void LayerParameter::clear_has_tile_param() {
_has_bits_[1] &= ~0x10000000u;
_has_bits_[1] &= ~0x20000000u;
}
inline void LayerParameter::clear_tile_param() {
if (tile_param_ != NULL) tile_param_->::opencv_caffe::TileParameter::Clear();
@ -19377,13 +19553,13 @@ inline void LayerParameter::set_allocated_tile_param(::opencv_caffe::TileParamet
// optional .opencv_caffe.WindowDataParameter window_data_param = 129;
inline bool LayerParameter::has_window_data_param() const {
return (_has_bits_[1] & 0x20000000u) != 0;
return (_has_bits_[1] & 0x40000000u) != 0;
}
inline void LayerParameter::set_has_window_data_param() {
_has_bits_[1] |= 0x20000000u;
_has_bits_[1] |= 0x40000000u;
}
inline void LayerParameter::clear_has_window_data_param() {
_has_bits_[1] &= ~0x20000000u;
_has_bits_[1] &= ~0x40000000u;
}
inline void LayerParameter::clear_window_data_param() {
if (window_data_param_ != NULL) window_data_param_->::opencv_caffe::WindowDataParameter::Clear();
@ -28660,6 +28836,85 @@ inline void NormalizedBBox::set_size(float value) {
inline const NormalizedBBox* NormalizedBBox::internal_default_instance() {
return &NormalizedBBox_default_instance_.get();
}
// -------------------------------------------------------------------
// ROIPoolingParameter
// optional uint32 pooled_h = 1 [default = 0];
inline bool ROIPoolingParameter::has_pooled_h() const {
return (_has_bits_[0] & 0x00000001u) != 0;
}
inline void ROIPoolingParameter::set_has_pooled_h() {
_has_bits_[0] |= 0x00000001u;
}
inline void ROIPoolingParameter::clear_has_pooled_h() {
_has_bits_[0] &= ~0x00000001u;
}
inline void ROIPoolingParameter::clear_pooled_h() {
pooled_h_ = 0u;
clear_has_pooled_h();
}
inline ::google::protobuf::uint32 ROIPoolingParameter::pooled_h() const {
// @@protoc_insertion_point(field_get:opencv_caffe.ROIPoolingParameter.pooled_h)
return pooled_h_;
}
inline void ROIPoolingParameter::set_pooled_h(::google::protobuf::uint32 value) {
set_has_pooled_h();
pooled_h_ = value;
// @@protoc_insertion_point(field_set:opencv_caffe.ROIPoolingParameter.pooled_h)
}
// optional uint32 pooled_w = 2 [default = 0];
inline bool ROIPoolingParameter::has_pooled_w() const {
return (_has_bits_[0] & 0x00000002u) != 0;
}
inline void ROIPoolingParameter::set_has_pooled_w() {
_has_bits_[0] |= 0x00000002u;
}
inline void ROIPoolingParameter::clear_has_pooled_w() {
_has_bits_[0] &= ~0x00000002u;
}
inline void ROIPoolingParameter::clear_pooled_w() {
pooled_w_ = 0u;
clear_has_pooled_w();
}
inline ::google::protobuf::uint32 ROIPoolingParameter::pooled_w() const {
// @@protoc_insertion_point(field_get:opencv_caffe.ROIPoolingParameter.pooled_w)
return pooled_w_;
}
inline void ROIPoolingParameter::set_pooled_w(::google::protobuf::uint32 value) {
set_has_pooled_w();
pooled_w_ = value;
// @@protoc_insertion_point(field_set:opencv_caffe.ROIPoolingParameter.pooled_w)
}
// optional float spatial_scale = 3 [default = 1];
inline bool ROIPoolingParameter::has_spatial_scale() const {
return (_has_bits_[0] & 0x00000004u) != 0;
}
inline void ROIPoolingParameter::set_has_spatial_scale() {
_has_bits_[0] |= 0x00000004u;
}
inline void ROIPoolingParameter::clear_has_spatial_scale() {
_has_bits_[0] &= ~0x00000004u;
}
inline void ROIPoolingParameter::clear_spatial_scale() {
spatial_scale_ = 1;
clear_has_spatial_scale();
}
inline float ROIPoolingParameter::spatial_scale() const {
// @@protoc_insertion_point(field_get:opencv_caffe.ROIPoolingParameter.spatial_scale)
return spatial_scale_;
}
inline void ROIPoolingParameter::set_spatial_scale(float value) {
set_has_spatial_scale();
spatial_scale_ = value;
// @@protoc_insertion_point(field_set:opencv_caffe.ROIPoolingParameter.spatial_scale)
}
inline const ROIPoolingParameter* ROIPoolingParameter::internal_default_instance() {
return &ROIPoolingParameter_default_instance_.get();
}
#endif // !PROTOBUF_INLINE_NOT_IN_HEADERS
// -------------------------------------------------------------------
@ -28795,6 +29050,8 @@ inline const NormalizedBBox* NormalizedBBox::internal_default_instance() {
// -------------------------------------------------------------------
// -------------------------------------------------------------------
// @@protoc_insertion_point(namespace_scope)

@ -552,6 +552,7 @@ message LayerParameter {
optional ReductionParameter reduction_param = 136;
optional ReLUParameter relu_param = 123;
optional ReshapeParameter reshape_param = 133;
optional ROIPoolingParameter roi_pooling_param = 8266711; // https://github.com/rbgirshick/caffe-fast-rcnn/tree/fast-rcnn
optional ScaleParameter scale_param = 142;
optional SigmoidParameter sigmoid_param = 124;
optional SoftmaxParameter softmax_param = 125;
@ -1605,3 +1606,15 @@ message NormalizedBBox {
optional float score = 7;
optional float size = 8;
}
// origin: https://github.com/rbgirshick/caffe-fast-rcnn/tree/fast-rcnn
// Message that stores parameters used by ROIPoolingLayer
message ROIPoolingParameter {
// Pad, kernel size, and stride are all given as a single value for equal
// dimensions in height and width or as Y, X pairs.
optional uint32 pooled_h = 1 [default = 0]; // The pooled output height
optional uint32 pooled_w = 2 [default = 0]; // The pooled output width
// Multiplicative spatial scale factor to translate ROI coords from their
// input scale to the scale used when pooling
optional float spatial_scale = 3 [default = 1];
}

@ -88,6 +88,7 @@ void initializeLayerFactory()
CV_DNN_REGISTER_LAYER_CLASS(Convolution, ConvolutionLayer);
CV_DNN_REGISTER_LAYER_CLASS(Deconvolution, DeconvolutionLayer);
CV_DNN_REGISTER_LAYER_CLASS(Pooling, PoolingLayer);
CV_DNN_REGISTER_LAYER_CLASS(ROIPooling, PoolingLayer);
CV_DNN_REGISTER_LAYER_CLASS(LRN, LRNLayer);
CV_DNN_REGISTER_LAYER_CLASS(InnerProduct, InnerProductLayer);
CV_DNN_REGISTER_LAYER_CLASS(Softmax, SoftmaxLayer);

@ -65,6 +65,7 @@ public:
{
type = PoolingLayer::MAX;
computeMaxIdx = true;
globalPooling = false;
if (params.has("pool"))
{
@ -77,12 +78,18 @@ public:
type = PoolingLayer::STOCHASTIC;
else
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
pad.height, pad.width, stride.height, stride.width, padMode);
}
else if (params.has("pooled_w") || params.has("pooled_h") || params.has("spatial_scale"))
{
type = PoolingLayer::ROI;
}
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
pad.height, pad.width, stride.height, stride.width, padMode);
setParamsFrom(params);
ceilMode = params.get<bool>("ceil_mode", true);
pooledSize.width = params.get<uint32_t>("pooled_w", 1);
pooledSize.height = params.get<uint32_t>("pooled_h", 1);
spatialScale = params.get<float>("spatial_scale", 1);
}
#ifdef HAVE_OPENCL
@ -91,7 +98,7 @@ public:
void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
{
CV_Assert(inputs.size() == 1);
CV_Assert(!inputs.empty());
cv::Size inp(inputs[0]->size[3], inputs[0]->size[2]),
out(outputs[0].size[3], outputs[0].size[2]);
@ -171,20 +178,23 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
for (size_t ii = 0; ii < inputs.size(); ii++)
switch (type)
{
switch (type)
{
case MAX:
maxPooling(*inputs[ii], outputs[2 * ii], outputs[2 * ii + 1]);
break;
case AVE:
avePooling(*inputs[ii], outputs[ii]);
break;
default:
CV_Error(Error::StsNotImplemented, "Not implemented");
break;
}
case MAX:
CV_Assert(inputs.size() == 1, outputs.size() == 2);
maxPooling(*inputs[0], outputs[0], outputs[1]);
break;
case AVE:
CV_Assert(inputs.size() == 1, outputs.size() == 1);
avePooling(*inputs[0], outputs[0]);
break;
case ROI:
CV_Assert(inputs.size() == 2, outputs.size() == 1);
roiPooling(*inputs[0], *inputs[1], outputs[0]);
break;
default:
CV_Error(Error::StsNotImplemented, "Not implemented");
break;
}
}
@ -201,29 +211,33 @@ public:
class PoolingInvoker : public ParallelLoopBody
{
public:
const Mat* src;
const Mat* src, *rois;
Mat *dst, *mask;
Size kernel, stride, pad;
int nstripes;
bool computeMaxIdx;
std::vector<int> ofsbuf;
int poolingType;
float spatialScale;
PoolingInvoker() : src(0), dst(0), mask(0), nstripes(0), computeMaxIdx(0), poolingType(PoolingLayer::MAX) {}
PoolingInvoker() : src(0), rois(0), dst(0), mask(0), nstripes(0),
computeMaxIdx(0), poolingType(PoolingLayer::MAX), spatialScale(0) {}
static void run(const Mat& src, Mat& dst, Mat& mask, Size kernel,
Size stride, Size pad, int poolingType,
static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel,
Size stride, Size pad, int poolingType, float spatialScale,
bool computeMaxIdx, int nstripes)
{
CV_Assert(src.isContinuous() && dst.isContinuous() &&
src.type() == CV_32F && src.type() == dst.type() &&
src.dims == 4 && dst.dims == 4 &&
src.size[0] == dst.size[0] && src.size[1] == dst.size[1] &&
(poolingType == ROI && dst.size[0] == rois.size[0] ||
src.size[0] == dst.size[0]) && src.size[1] == dst.size[1] &&
(mask.empty() || (mask.type() == src.type() && mask.size == dst.size)));
PoolingInvoker p;
p.src = &src;
p.rois = &rois;
p.dst = &dst;
p.mask = &mask;
p.kernel = kernel;
@ -232,6 +246,7 @@ public:
p.nstripes = nstripes;
p.computeMaxIdx = computeMaxIdx;
p.poolingType = poolingType;
p.spatialScale = spatialScale;
if( !computeMaxIdx )
{
@ -273,12 +288,39 @@ public:
ofs /= height;
int c = (int)(ofs % channels);
int n = (int)(ofs / channels);
int ystart = y0 * stride_h - pad_h;
int yend = min(ystart + kernel_h, inp_height + pad_h);
int ystart, yend;
const float *srcData;
int xstartROI = 0;
float roiRatio = 0;
if (poolingType == ROI)
{
const float *roisData = rois->ptr<float>(n);
int ystartROI = round(roisData[2] * spatialScale);
int yendROI = round(roisData[4] * spatialScale);
int roiHeight = std::max(yendROI - ystartROI + 1, 1);
roiRatio = (float)roiHeight / height;
ystart = ystartROI + y0 * roiRatio;
yend = ystartROI + std::ceil((y0 + 1) * roiRatio);
xstartROI = round(roisData[1] * spatialScale);
int xendROI = round(roisData[3] * spatialScale);
int roiWidth = std::max(xendROI - xstartROI + 1, 1);
roiRatio = (float)roiWidth / width;
CV_Assert(roisData[0] < src->size[0]);
srcData = src->ptr<float>(roisData[0], c);
}
else
{
ystart = y0 * stride_h - pad_h;
yend = min(ystart + kernel_h, inp_height + pad_h);
srcData = src->ptr<float>(n, c);
}
int ydelta = yend - ystart;
ystart = max(ystart, 0);
yend = min(yend, inp_height);
const float *srcData = src->ptr<float>(n, c);
float *dstData = dst->ptr<float>(n, c, y0);
float *dstMaskData = mask->data ? mask->ptr<float>(n, c, y0) : 0;
@ -286,13 +328,29 @@ public:
ofs0 += delta;
int x1 = x0 + delta;
if( poolingType == PoolingLayer::MAX )
if( poolingType == MAX || poolingType == ROI)
for( ; x0 < x1; x0++ )
{
int xstart = x0 * stride_w - pad_w;
int xend = min(xstart + kernel_w, inp_width);
int xstart, xend;
if (poolingType == ROI)
{
xstart = xstartROI + x0 * roiRatio;
xend = xstartROI + std::ceil((x0 + 1) * roiRatio);
}
else
{
xstart = x0 * stride_w - pad_w;
xend = xstart + kernel_w;
}
xstart = max(xstart, 0);
xend = min(xend, inp_width);
if (xstart >= xend || ystart >= yend)
{
dstData[x0] = 0;
if (compMaxIdx && dstMaskData)
dstMaskData[x0] = -1;
continue;
}
#if CV_SIMD128
if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_w + kernel_w < inp_width )
{
@ -489,14 +547,22 @@ public:
void maxPooling(Mat &src, Mat &dst, Mat &mask)
{
const int nstripes = getNumThreads();
PoolingInvoker::run(src, dst, mask, kernel, stride, pad, type, computeMaxIdx, nstripes);
Mat rois;
PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, type, spatialScale, computeMaxIdx, nstripes);
}
void avePooling(Mat &src, Mat &dst)
{
const int nstripes = getNumThreads();
Mat rois, mask;
PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, type, spatialScale, computeMaxIdx, nstripes);
}
void roiPooling(const Mat &src, const Mat &rois, Mat &dst)
{
const int nstripes = getNumThreads();
Mat mask;
PoolingInvoker::run(src, dst, mask, kernel, stride, pad, type, computeMaxIdx, nstripes);
PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, type, spatialScale, computeMaxIdx, nstripes);
}
virtual Ptr<BackendNode> initMaxPoolingHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
@ -632,6 +698,11 @@ public:
out.height = 1;
out.width = 1;
}
else if (type == PoolingLayer::ROI)
{
out.height = pooledSize.height;
out.width = pooledSize.width;
}
else if (padMode.empty())
{
float height = (float)(in.height + 2 * pad.height - kernel.height) / stride.height;
@ -656,17 +727,13 @@ public:
getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out);
}
outputs.resize(type == MAX ? 2 * inputs.size() : inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
int dims[] = {inputs[0][0], inputs[0][1], out.height, out.width};
if (type == ROI)
{
size_t index = type == MAX ? 2*i : i;
int dims[] = {inputs[i][0], inputs[i][1], out.height, out.width};
outputs[index] = shape(dims);
if (type == MAX)
outputs[index + 1] = shape(dims);
CV_Assert(inputs.size() == 2);
dims[0] = inputs[1][0]; // Number of proposals;
}
outputs.assign(type == MAX ? 2 : 1, shape(dims));
return false;
}

@ -560,4 +560,20 @@ TEST(Layer_Test_Reorg, Accuracy)
testLayerUsingDarknetModels("reorg", false, false);
}
TEST(Layer_Test_ROIPooling, Accuracy)
{
Net net = readNetFromCaffe(_tf("net_roi_pooling.prototxt"));
Mat inp = blobFromNPY(_tf("net_roi_pooling.input.npy"));
Mat rois = blobFromNPY(_tf("net_roi_pooling.rois.npy"));
Mat ref = blobFromNPY(_tf("net_roi_pooling.npy"));
net.setInput(inp, "input");
net.setInput(rois, "rois");
Mat out = net.forward();
normAssert(out, ref);
}
}

Loading…
Cancel
Save