Merge pull request #10356 from dkurt:dnn_rfcn

pull/10369/head
Vadim Pisarevsky 7 years ago
commit b8a24b36ce
  1. 2
      modules/dnn/include/opencv2/dnn/all_layers.hpp
  2. 1625
      modules/dnn/misc/caffe/opencv-caffe.pb.cc
  3. 390
      modules/dnn/misc/caffe/opencv-caffe.pb.h
  4. 10
      modules/dnn/src/caffe/opencv-caffe.proto
  5. 1
      modules/dnn/src/init.cpp
  6. 96
      modules/dnn/src/layers/pooling_layer.cpp
  7. 67
      samples/dnn/faster_rcnn.cpp

@ -242,6 +242,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
// ROIPooling parameters.
Size pooledSize;
float spatialScale;
// PSROIPooling parameters.
int psRoiOutChannels;
static Ptr<PoolingLayer> create(const LayerParams& params);
};

File diff suppressed because it is too large Load Diff

@ -80,6 +80,7 @@ class NonMaximumSuppressionParameter;
class NormalizeBBoxParameter;
class NormalizedBBox;
class PReLUParameter;
class PSROIPoolingParameter;
class ParamSpec;
class ParameterParameter;
class PermuteParameter;
@ -1781,6 +1782,13 @@ class DetectionOutputParameter : public ::google::protobuf::Message /* @@protoc_
float confidence_threshold() const;
void set_confidence_threshold(float value);
// optional bool normalized_bbox = 10 [default = true];
bool has_normalized_bbox() const;
void clear_normalized_bbox();
static const int kNormalizedBboxFieldNumber = 10;
bool normalized_bbox() const;
void set_normalized_bbox(bool value);
// @@protoc_insertion_point(class_scope:opencv_caffe.DetectionOutputParameter)
private:
inline void set_has_num_classes();
@ -1801,6 +1809,8 @@ class DetectionOutputParameter : public ::google::protobuf::Message /* @@protoc_
inline void clear_has_keep_top_k();
inline void set_has_confidence_threshold();
inline void clear_has_confidence_threshold();
inline void set_has_normalized_bbox();
inline void clear_has_normalized_bbox();
::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
::google::protobuf::internal::HasBits<1> _has_bits_;
@ -1812,8 +1822,9 @@ class DetectionOutputParameter : public ::google::protobuf::Message /* @@protoc_
bool variance_encoded_in_target_;
float confidence_threshold_;
::google::protobuf::int32 keep_top_k_;
bool share_location_;
int code_type_;
bool share_location_;
bool normalized_bbox_;
friend void protobuf_InitDefaults_opencv_2dcaffe_2eproto_impl();
friend void protobuf_AddDesc_opencv_2dcaffe_2eproto_impl();
friend void protobuf_AssignDesc_opencv_2dcaffe_2eproto();
@ -4148,6 +4159,15 @@ class LayerParameter : public ::google::protobuf::Message /* @@protoc_insertion_
::opencv_caffe::ProposalParameter* release_proposal_param();
void set_allocated_proposal_param(::opencv_caffe::ProposalParameter* proposal_param);
// optional .opencv_caffe.PSROIPoolingParameter psroi_pooling_param = 10001;
bool has_psroi_pooling_param() const;
void clear_psroi_pooling_param();
static const int kPsroiPoolingParamFieldNumber = 10001;
const ::opencv_caffe::PSROIPoolingParameter& psroi_pooling_param() const;
::opencv_caffe::PSROIPoolingParameter* mutable_psroi_pooling_param();
::opencv_caffe::PSROIPoolingParameter* release_psroi_pooling_param();
void set_allocated_psroi_pooling_param(::opencv_caffe::PSROIPoolingParameter* psroi_pooling_param);
// optional .opencv_caffe.PythonParameter python_param = 130;
bool has_python_param() const;
void clear_python_param();
@ -4367,6 +4387,8 @@ class LayerParameter : public ::google::protobuf::Message /* @@protoc_insertion_
inline void clear_has_prior_box_param();
inline void set_has_proposal_param();
inline void clear_has_proposal_param();
inline void set_has_psroi_pooling_param();
inline void clear_has_psroi_pooling_param();
inline void set_has_python_param();
inline void clear_has_python_param();
inline void set_has_recurrent_param();
@ -4399,7 +4421,8 @@ class LayerParameter : public ::google::protobuf::Message /* @@protoc_insertion_
inline void clear_has_window_data_param();
::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
::google::protobuf::internal::HasBits<2> _has_bits_;
::google::protobuf::internal::HasBits<3> _has_bits_;
mutable int _cached_size_;
::google::protobuf::RepeatedPtrField< ::std::string> bottom_;
::google::protobuf::RepeatedPtrField< ::std::string> top_;
::google::protobuf::RepeatedField< float > loss_weight_;
@ -4448,6 +4471,7 @@ class LayerParameter : public ::google::protobuf::Message /* @@protoc_insertion_
::opencv_caffe::PReLUParameter* prelu_param_;
::opencv_caffe::PriorBoxParameter* prior_box_param_;
::opencv_caffe::ProposalParameter* proposal_param_;
::opencv_caffe::PSROIPoolingParameter* psroi_pooling_param_;
::opencv_caffe::PythonParameter* python_param_;
::opencv_caffe::RecurrentParameter* recurrent_param_;
::opencv_caffe::ReductionParameter* reduction_param_;
@ -4464,7 +4488,6 @@ class LayerParameter : public ::google::protobuf::Message /* @@protoc_insertion_
::opencv_caffe::TileParameter* tile_param_;
::opencv_caffe::WindowDataParameter* window_data_param_;
int phase_;
mutable int _cached_size_;
friend void protobuf_InitDefaults_opencv_2dcaffe_2eproto_impl();
friend void protobuf_AddDesc_opencv_2dcaffe_2eproto_impl();
friend void protobuf_AssignDesc_opencv_2dcaffe_2eproto();
@ -13111,6 +13134,127 @@ class ProposalParameter : public ::google::protobuf::Message /* @@protoc_inserti
};
extern ::google::protobuf::internal::ExplicitlyConstructed<ProposalParameter> ProposalParameter_default_instance_;
// -------------------------------------------------------------------
class PSROIPoolingParameter : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:opencv_caffe.PSROIPoolingParameter) */ {
public:
PSROIPoolingParameter();
virtual ~PSROIPoolingParameter();
PSROIPoolingParameter(const PSROIPoolingParameter& from);
inline PSROIPoolingParameter& operator=(const PSROIPoolingParameter& from) {
CopyFrom(from);
return *this;
}
inline const ::google::protobuf::UnknownFieldSet& unknown_fields() const {
return _internal_metadata_.unknown_fields();
}
inline ::google::protobuf::UnknownFieldSet* mutable_unknown_fields() {
return _internal_metadata_.mutable_unknown_fields();
}
static const ::google::protobuf::Descriptor* descriptor();
static const PSROIPoolingParameter& default_instance();
static const PSROIPoolingParameter* internal_default_instance();
void Swap(PSROIPoolingParameter* other);
// implements Message ----------------------------------------------
inline PSROIPoolingParameter* New() const { return New(NULL); }
PSROIPoolingParameter* New(::google::protobuf::Arena* arena) const;
void CopyFrom(const ::google::protobuf::Message& from);
void MergeFrom(const ::google::protobuf::Message& from);
void CopyFrom(const PSROIPoolingParameter& from);
void MergeFrom(const PSROIPoolingParameter& from);
void Clear();
bool IsInitialized() const;
size_t ByteSizeLong() const;
bool MergePartialFromCodedStream(
::google::protobuf::io::CodedInputStream* input);
void SerializeWithCachedSizes(
::google::protobuf::io::CodedOutputStream* output) const;
::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray(
bool deterministic, ::google::protobuf::uint8* output) const;
::google::protobuf::uint8* SerializeWithCachedSizesToArray(::google::protobuf::uint8* output) const {
return InternalSerializeWithCachedSizesToArray(false, output);
}
int GetCachedSize() const { return _cached_size_; }
private:
void SharedCtor();
void SharedDtor();
void SetCachedSize(int size) const;
void InternalSwap(PSROIPoolingParameter* other);
void UnsafeMergeFrom(const PSROIPoolingParameter& from);
private:
inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
return _internal_metadata_.arena();
}
inline void* MaybeArenaPtr() const {
return _internal_metadata_.raw_arena_ptr();
}
public:
::google::protobuf::Metadata GetMetadata() const;
// nested types ----------------------------------------------------
// accessors -------------------------------------------------------
// required float spatial_scale = 1;
bool has_spatial_scale() const;
void clear_spatial_scale();
static const int kSpatialScaleFieldNumber = 1;
float spatial_scale() const;
void set_spatial_scale(float value);
// required int32 output_dim = 2;
bool has_output_dim() const;
void clear_output_dim();
static const int kOutputDimFieldNumber = 2;
::google::protobuf::int32 output_dim() const;
void set_output_dim(::google::protobuf::int32 value);
// required int32 group_size = 3;
bool has_group_size() const;
void clear_group_size();
static const int kGroupSizeFieldNumber = 3;
::google::protobuf::int32 group_size() const;
void set_group_size(::google::protobuf::int32 value);
// @@protoc_insertion_point(class_scope:opencv_caffe.PSROIPoolingParameter)
private:
inline void set_has_spatial_scale();
inline void clear_has_spatial_scale();
inline void set_has_output_dim();
inline void clear_has_output_dim();
inline void set_has_group_size();
inline void clear_has_group_size();
// helper for ByteSizeLong()
size_t RequiredFieldsByteSizeFallback() const;
::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
::google::protobuf::internal::HasBits<1> _has_bits_;
mutable int _cached_size_;
float spatial_scale_;
::google::protobuf::int32 output_dim_;
::google::protobuf::int32 group_size_;
friend void protobuf_InitDefaults_opencv_2dcaffe_2eproto_impl();
friend void protobuf_AddDesc_opencv_2dcaffe_2eproto_impl();
friend void protobuf_AssignDesc_opencv_2dcaffe_2eproto();
friend void protobuf_ShutdownFile_opencv_2dcaffe_2eproto();
void InitAsDefaultInstance();
};
extern ::google::protobuf::internal::ExplicitlyConstructed<PSROIPoolingParameter> PSROIPoolingParameter_default_instance_;
// ===================================================================
@ -14411,6 +14555,30 @@ inline void DetectionOutputParameter::set_confidence_threshold(float value) {
// @@protoc_insertion_point(field_set:opencv_caffe.DetectionOutputParameter.confidence_threshold)
}
// optional bool normalized_bbox = 10 [default = true];
inline bool DetectionOutputParameter::has_normalized_bbox() const {
return (_has_bits_[0] & 0x00000200u) != 0;
}
inline void DetectionOutputParameter::set_has_normalized_bbox() {
_has_bits_[0] |= 0x00000200u;
}
inline void DetectionOutputParameter::clear_has_normalized_bbox() {
_has_bits_[0] &= ~0x00000200u;
}
inline void DetectionOutputParameter::clear_normalized_bbox() {
normalized_bbox_ = true;
clear_has_normalized_bbox();
}
inline bool DetectionOutputParameter::normalized_bbox() const {
// @@protoc_insertion_point(field_get:opencv_caffe.DetectionOutputParameter.normalized_bbox)
return normalized_bbox_;
}
inline void DetectionOutputParameter::set_normalized_bbox(bool value) {
set_has_normalized_bbox();
normalized_bbox_ = value;
// @@protoc_insertion_point(field_set:opencv_caffe.DetectionOutputParameter.normalized_bbox)
}
inline const DetectionOutputParameter* DetectionOutputParameter::internal_default_instance() {
return &DetectionOutputParameter_default_instance_.get();
}
@ -19163,15 +19331,60 @@ inline void LayerParameter::set_allocated_proposal_param(::opencv_caffe::Proposa
// @@protoc_insertion_point(field_set_allocated:opencv_caffe.LayerParameter.proposal_param)
}
// optional .opencv_caffe.PSROIPoolingParameter psroi_pooling_param = 10001;
inline bool LayerParameter::has_psroi_pooling_param() const {
return (_has_bits_[1] & 0x00020000u) != 0;
}
inline void LayerParameter::set_has_psroi_pooling_param() {
_has_bits_[1] |= 0x00020000u;
}
inline void LayerParameter::clear_has_psroi_pooling_param() {
_has_bits_[1] &= ~0x00020000u;
}
inline void LayerParameter::clear_psroi_pooling_param() {
if (psroi_pooling_param_ != NULL) psroi_pooling_param_->::opencv_caffe::PSROIPoolingParameter::Clear();
clear_has_psroi_pooling_param();
}
inline const ::opencv_caffe::PSROIPoolingParameter& LayerParameter::psroi_pooling_param() const {
// @@protoc_insertion_point(field_get:opencv_caffe.LayerParameter.psroi_pooling_param)
return psroi_pooling_param_ != NULL ? *psroi_pooling_param_
: *::opencv_caffe::PSROIPoolingParameter::internal_default_instance();
}
inline ::opencv_caffe::PSROIPoolingParameter* LayerParameter::mutable_psroi_pooling_param() {
set_has_psroi_pooling_param();
if (psroi_pooling_param_ == NULL) {
psroi_pooling_param_ = new ::opencv_caffe::PSROIPoolingParameter;
}
// @@protoc_insertion_point(field_mutable:opencv_caffe.LayerParameter.psroi_pooling_param)
return psroi_pooling_param_;
}
inline ::opencv_caffe::PSROIPoolingParameter* LayerParameter::release_psroi_pooling_param() {
// @@protoc_insertion_point(field_release:opencv_caffe.LayerParameter.psroi_pooling_param)
clear_has_psroi_pooling_param();
::opencv_caffe::PSROIPoolingParameter* temp = psroi_pooling_param_;
psroi_pooling_param_ = NULL;
return temp;
}
inline void LayerParameter::set_allocated_psroi_pooling_param(::opencv_caffe::PSROIPoolingParameter* psroi_pooling_param) {
delete psroi_pooling_param_;
psroi_pooling_param_ = psroi_pooling_param;
if (psroi_pooling_param) {
set_has_psroi_pooling_param();
} else {
clear_has_psroi_pooling_param();
}
// @@protoc_insertion_point(field_set_allocated:opencv_caffe.LayerParameter.psroi_pooling_param)
}
// optional .opencv_caffe.PythonParameter python_param = 130;
inline bool LayerParameter::has_python_param() const {
return (_has_bits_[1] & 0x00020000u) != 0;
return (_has_bits_[1] & 0x00040000u) != 0;
}
inline void LayerParameter::set_has_python_param() {
_has_bits_[1] |= 0x00020000u;
_has_bits_[1] |= 0x00040000u;
}
inline void LayerParameter::clear_has_python_param() {
_has_bits_[1] &= ~0x00020000u;
_has_bits_[1] &= ~0x00040000u;
}
inline void LayerParameter::clear_python_param() {
if (python_param_ != NULL) python_param_->::opencv_caffe::PythonParameter::Clear();
@ -19210,13 +19423,13 @@ inline void LayerParameter::set_allocated_python_param(::opencv_caffe::PythonPar
// optional .opencv_caffe.RecurrentParameter recurrent_param = 146;
inline bool LayerParameter::has_recurrent_param() const {
return (_has_bits_[1] & 0x00040000u) != 0;
return (_has_bits_[1] & 0x00080000u) != 0;
}
inline void LayerParameter::set_has_recurrent_param() {
_has_bits_[1] |= 0x00040000u;
_has_bits_[1] |= 0x00080000u;
}
inline void LayerParameter::clear_has_recurrent_param() {
_has_bits_[1] &= ~0x00040000u;
_has_bits_[1] &= ~0x00080000u;
}
inline void LayerParameter::clear_recurrent_param() {
if (recurrent_param_ != NULL) recurrent_param_->::opencv_caffe::RecurrentParameter::Clear();
@ -19255,13 +19468,13 @@ inline void LayerParameter::set_allocated_recurrent_param(::opencv_caffe::Recurr
// optional .opencv_caffe.ReductionParameter reduction_param = 136;
inline bool LayerParameter::has_reduction_param() const {
return (_has_bits_[1] & 0x00080000u) != 0;
return (_has_bits_[1] & 0x00100000u) != 0;
}
inline void LayerParameter::set_has_reduction_param() {
_has_bits_[1] |= 0x00080000u;
_has_bits_[1] |= 0x00100000u;
}
inline void LayerParameter::clear_has_reduction_param() {
_has_bits_[1] &= ~0x00080000u;
_has_bits_[1] &= ~0x00100000u;
}
inline void LayerParameter::clear_reduction_param() {
if (reduction_param_ != NULL) reduction_param_->::opencv_caffe::ReductionParameter::Clear();
@ -19300,13 +19513,13 @@ inline void LayerParameter::set_allocated_reduction_param(::opencv_caffe::Reduct
// optional .opencv_caffe.ReLUParameter relu_param = 123;
inline bool LayerParameter::has_relu_param() const {
return (_has_bits_[1] & 0x00100000u) != 0;
return (_has_bits_[1] & 0x00200000u) != 0;
}
inline void LayerParameter::set_has_relu_param() {
_has_bits_[1] |= 0x00100000u;
_has_bits_[1] |= 0x00200000u;
}
inline void LayerParameter::clear_has_relu_param() {
_has_bits_[1] &= ~0x00100000u;
_has_bits_[1] &= ~0x00200000u;
}
inline void LayerParameter::clear_relu_param() {
if (relu_param_ != NULL) relu_param_->::opencv_caffe::ReLUParameter::Clear();
@ -19345,13 +19558,13 @@ inline void LayerParameter::set_allocated_relu_param(::opencv_caffe::ReLUParamet
// optional .opencv_caffe.ReshapeParameter reshape_param = 133;
inline bool LayerParameter::has_reshape_param() const {
return (_has_bits_[1] & 0x00200000u) != 0;
return (_has_bits_[1] & 0x00400000u) != 0;
}
inline void LayerParameter::set_has_reshape_param() {
_has_bits_[1] |= 0x00200000u;
_has_bits_[1] |= 0x00400000u;
}
inline void LayerParameter::clear_has_reshape_param() {
_has_bits_[1] &= ~0x00200000u;
_has_bits_[1] &= ~0x00400000u;
}
inline void LayerParameter::clear_reshape_param() {
if (reshape_param_ != NULL) reshape_param_->::opencv_caffe::ReshapeParameter::Clear();
@ -19390,13 +19603,13 @@ inline void LayerParameter::set_allocated_reshape_param(::opencv_caffe::ReshapeP
// optional .opencv_caffe.ROIPoolingParameter roi_pooling_param = 8266711;
inline bool LayerParameter::has_roi_pooling_param() const {
return (_has_bits_[1] & 0x00400000u) != 0;
return (_has_bits_[1] & 0x00800000u) != 0;
}
inline void LayerParameter::set_has_roi_pooling_param() {
_has_bits_[1] |= 0x00400000u;
_has_bits_[1] |= 0x00800000u;
}
inline void LayerParameter::clear_has_roi_pooling_param() {
_has_bits_[1] &= ~0x00400000u;
_has_bits_[1] &= ~0x00800000u;
}
inline void LayerParameter::clear_roi_pooling_param() {
if (roi_pooling_param_ != NULL) roi_pooling_param_->::opencv_caffe::ROIPoolingParameter::Clear();
@ -19435,13 +19648,13 @@ inline void LayerParameter::set_allocated_roi_pooling_param(::opencv_caffe::ROIP
// optional .opencv_caffe.ScaleParameter scale_param = 142;
inline bool LayerParameter::has_scale_param() const {
return (_has_bits_[1] & 0x00800000u) != 0;
return (_has_bits_[1] & 0x01000000u) != 0;
}
inline void LayerParameter::set_has_scale_param() {
_has_bits_[1] |= 0x00800000u;
_has_bits_[1] |= 0x01000000u;
}
inline void LayerParameter::clear_has_scale_param() {
_has_bits_[1] &= ~0x00800000u;
_has_bits_[1] &= ~0x01000000u;
}
inline void LayerParameter::clear_scale_param() {
if (scale_param_ != NULL) scale_param_->::opencv_caffe::ScaleParameter::Clear();
@ -19480,13 +19693,13 @@ inline void LayerParameter::set_allocated_scale_param(::opencv_caffe::ScaleParam
// optional .opencv_caffe.SigmoidParameter sigmoid_param = 124;
inline bool LayerParameter::has_sigmoid_param() const {
return (_has_bits_[1] & 0x01000000u) != 0;
return (_has_bits_[1] & 0x02000000u) != 0;
}
inline void LayerParameter::set_has_sigmoid_param() {
_has_bits_[1] |= 0x01000000u;
_has_bits_[1] |= 0x02000000u;
}
inline void LayerParameter::clear_has_sigmoid_param() {
_has_bits_[1] &= ~0x01000000u;
_has_bits_[1] &= ~0x02000000u;
}
inline void LayerParameter::clear_sigmoid_param() {
if (sigmoid_param_ != NULL) sigmoid_param_->::opencv_caffe::SigmoidParameter::Clear();
@ -19525,13 +19738,13 @@ inline void LayerParameter::set_allocated_sigmoid_param(::opencv_caffe::SigmoidP
// optional .opencv_caffe.SoftmaxParameter softmax_param = 125;
inline bool LayerParameter::has_softmax_param() const {
return (_has_bits_[1] & 0x02000000u) != 0;
return (_has_bits_[1] & 0x04000000u) != 0;
}
inline void LayerParameter::set_has_softmax_param() {
_has_bits_[1] |= 0x02000000u;
_has_bits_[1] |= 0x04000000u;
}
inline void LayerParameter::clear_has_softmax_param() {
_has_bits_[1] &= ~0x02000000u;
_has_bits_[1] &= ~0x04000000u;
}
inline void LayerParameter::clear_softmax_param() {
if (softmax_param_ != NULL) softmax_param_->::opencv_caffe::SoftmaxParameter::Clear();
@ -19570,13 +19783,13 @@ inline void LayerParameter::set_allocated_softmax_param(::opencv_caffe::SoftmaxP
// optional .opencv_caffe.SPPParameter spp_param = 132;
inline bool LayerParameter::has_spp_param() const {
return (_has_bits_[1] & 0x04000000u) != 0;
return (_has_bits_[1] & 0x08000000u) != 0;
}
inline void LayerParameter::set_has_spp_param() {
_has_bits_[1] |= 0x04000000u;
_has_bits_[1] |= 0x08000000u;
}
inline void LayerParameter::clear_has_spp_param() {
_has_bits_[1] &= ~0x04000000u;
_has_bits_[1] &= ~0x08000000u;
}
inline void LayerParameter::clear_spp_param() {
if (spp_param_ != NULL) spp_param_->::opencv_caffe::SPPParameter::Clear();
@ -19615,13 +19828,13 @@ inline void LayerParameter::set_allocated_spp_param(::opencv_caffe::SPPParameter
// optional .opencv_caffe.SliceParameter slice_param = 126;
inline bool LayerParameter::has_slice_param() const {
return (_has_bits_[1] & 0x08000000u) != 0;
return (_has_bits_[1] & 0x10000000u) != 0;
}
inline void LayerParameter::set_has_slice_param() {
_has_bits_[1] |= 0x08000000u;
_has_bits_[1] |= 0x10000000u;
}
inline void LayerParameter::clear_has_slice_param() {
_has_bits_[1] &= ~0x08000000u;
_has_bits_[1] &= ~0x10000000u;
}
inline void LayerParameter::clear_slice_param() {
if (slice_param_ != NULL) slice_param_->::opencv_caffe::SliceParameter::Clear();
@ -19660,13 +19873,13 @@ inline void LayerParameter::set_allocated_slice_param(::opencv_caffe::SliceParam
// optional .opencv_caffe.TanHParameter tanh_param = 127;
inline bool LayerParameter::has_tanh_param() const {
return (_has_bits_[1] & 0x10000000u) != 0;
return (_has_bits_[1] & 0x20000000u) != 0;
}
inline void LayerParameter::set_has_tanh_param() {
_has_bits_[1] |= 0x10000000u;
_has_bits_[1] |= 0x20000000u;
}
inline void LayerParameter::clear_has_tanh_param() {
_has_bits_[1] &= ~0x10000000u;
_has_bits_[1] &= ~0x20000000u;
}
inline void LayerParameter::clear_tanh_param() {
if (tanh_param_ != NULL) tanh_param_->::opencv_caffe::TanHParameter::Clear();
@ -19705,13 +19918,13 @@ inline void LayerParameter::set_allocated_tanh_param(::opencv_caffe::TanHParamet
// optional .opencv_caffe.ThresholdParameter threshold_param = 128;
inline bool LayerParameter::has_threshold_param() const {
return (_has_bits_[1] & 0x20000000u) != 0;
return (_has_bits_[1] & 0x40000000u) != 0;
}
inline void LayerParameter::set_has_threshold_param() {
_has_bits_[1] |= 0x20000000u;
_has_bits_[1] |= 0x40000000u;
}
inline void LayerParameter::clear_has_threshold_param() {
_has_bits_[1] &= ~0x20000000u;
_has_bits_[1] &= ~0x40000000u;
}
inline void LayerParameter::clear_threshold_param() {
if (threshold_param_ != NULL) threshold_param_->::opencv_caffe::ThresholdParameter::Clear();
@ -19750,13 +19963,13 @@ inline void LayerParameter::set_allocated_threshold_param(::opencv_caffe::Thresh
// optional .opencv_caffe.TileParameter tile_param = 138;
inline bool LayerParameter::has_tile_param() const {
return (_has_bits_[1] & 0x40000000u) != 0;
return (_has_bits_[1] & 0x80000000u) != 0;
}
inline void LayerParameter::set_has_tile_param() {
_has_bits_[1] |= 0x40000000u;
_has_bits_[1] |= 0x80000000u;
}
inline void LayerParameter::clear_has_tile_param() {
_has_bits_[1] &= ~0x40000000u;
_has_bits_[1] &= ~0x80000000u;
}
inline void LayerParameter::clear_tile_param() {
if (tile_param_ != NULL) tile_param_->::opencv_caffe::TileParameter::Clear();
@ -19795,13 +20008,13 @@ inline void LayerParameter::set_allocated_tile_param(::opencv_caffe::TileParamet
// optional .opencv_caffe.WindowDataParameter window_data_param = 129;
inline bool LayerParameter::has_window_data_param() const {
return (_has_bits_[1] & 0x80000000u) != 0;
return (_has_bits_[2] & 0x00000001u) != 0;
}
inline void LayerParameter::set_has_window_data_param() {
_has_bits_[1] |= 0x80000000u;
_has_bits_[2] |= 0x00000001u;
}
inline void LayerParameter::clear_has_window_data_param() {
_has_bits_[1] &= ~0x80000000u;
_has_bits_[2] &= ~0x00000001u;
}
inline void LayerParameter::clear_window_data_param() {
if (window_data_param_ != NULL) window_data_param_->::opencv_caffe::WindowDataParameter::Clear();
@ -29392,6 +29605,85 @@ inline void ProposalParameter::set_nms_thresh(float value) {
inline const ProposalParameter* ProposalParameter::internal_default_instance() {
return &ProposalParameter_default_instance_.get();
}
// -------------------------------------------------------------------
// PSROIPoolingParameter
// required float spatial_scale = 1;
inline bool PSROIPoolingParameter::has_spatial_scale() const {
return (_has_bits_[0] & 0x00000001u) != 0;
}
inline void PSROIPoolingParameter::set_has_spatial_scale() {
_has_bits_[0] |= 0x00000001u;
}
inline void PSROIPoolingParameter::clear_has_spatial_scale() {
_has_bits_[0] &= ~0x00000001u;
}
inline void PSROIPoolingParameter::clear_spatial_scale() {
spatial_scale_ = 0;
clear_has_spatial_scale();
}
inline float PSROIPoolingParameter::spatial_scale() const {
// @@protoc_insertion_point(field_get:opencv_caffe.PSROIPoolingParameter.spatial_scale)
return spatial_scale_;
}
inline void PSROIPoolingParameter::set_spatial_scale(float value) {
set_has_spatial_scale();
spatial_scale_ = value;
// @@protoc_insertion_point(field_set:opencv_caffe.PSROIPoolingParameter.spatial_scale)
}
// required int32 output_dim = 2;
inline bool PSROIPoolingParameter::has_output_dim() const {
return (_has_bits_[0] & 0x00000002u) != 0;
}
inline void PSROIPoolingParameter::set_has_output_dim() {
_has_bits_[0] |= 0x00000002u;
}
inline void PSROIPoolingParameter::clear_has_output_dim() {
_has_bits_[0] &= ~0x00000002u;
}
inline void PSROIPoolingParameter::clear_output_dim() {
output_dim_ = 0;
clear_has_output_dim();
}
inline ::google::protobuf::int32 PSROIPoolingParameter::output_dim() const {
// @@protoc_insertion_point(field_get:opencv_caffe.PSROIPoolingParameter.output_dim)
return output_dim_;
}
inline void PSROIPoolingParameter::set_output_dim(::google::protobuf::int32 value) {
set_has_output_dim();
output_dim_ = value;
// @@protoc_insertion_point(field_set:opencv_caffe.PSROIPoolingParameter.output_dim)
}
// required int32 group_size = 3;
inline bool PSROIPoolingParameter::has_group_size() const {
return (_has_bits_[0] & 0x00000004u) != 0;
}
inline void PSROIPoolingParameter::set_has_group_size() {
_has_bits_[0] |= 0x00000004u;
}
inline void PSROIPoolingParameter::clear_has_group_size() {
_has_bits_[0] &= ~0x00000004u;
}
inline void PSROIPoolingParameter::clear_group_size() {
group_size_ = 0;
clear_has_group_size();
}
inline ::google::protobuf::int32 PSROIPoolingParameter::group_size() const {
// @@protoc_insertion_point(field_get:opencv_caffe.PSROIPoolingParameter.group_size)
return group_size_;
}
inline void PSROIPoolingParameter::set_group_size(::google::protobuf::int32 value) {
set_has_group_size();
group_size_ = value;
// @@protoc_insertion_point(field_set:opencv_caffe.PSROIPoolingParameter.group_size)
}
inline const PSROIPoolingParameter* PSROIPoolingParameter::internal_default_instance() {
return &PSROIPoolingParameter_default_instance_.get();
}
#endif // !PROTOBUF_INLINE_NOT_IN_HEADERS
// -------------------------------------------------------------------
@ -29531,6 +29823,8 @@ inline const ProposalParameter* ProposalParameter::internal_default_instance() {
// -------------------------------------------------------------------
// -------------------------------------------------------------------
// @@protoc_insertion_point(namespace_scope)

@ -179,6 +179,8 @@ message DetectionOutputParameter {
// Only consider detections whose confidences are larger than a threshold.
// If not provided, consider all boxes.
optional float confidence_threshold = 9;
// If prior boxes are normalized to [0, 1] or not.
optional bool normalized_bbox = 10 [default = true];
}
message Datum {
@ -548,6 +550,7 @@ message LayerParameter {
optional PReLUParameter prelu_param = 131;
optional PriorBoxParameter prior_box_param = 150;
optional ProposalParameter proposal_param = 201;
optional PSROIPoolingParameter psroi_pooling_param = 10001; // https://github.com/daijifeng001/caffe-rfcn
optional PythonParameter python_param = 130;
optional RecurrentParameter recurrent_param = 146;
optional ReductionParameter reduction_param = 136;
@ -1633,3 +1636,10 @@ message ProposalParameter {
optional uint32 post_nms_topn = 7 [default = 300];
optional float nms_thresh = 8 [default = 0.7];
}
// origin: https://github.com/daijifeng001/caffe-rfcn
message PSROIPoolingParameter {
required float spatial_scale = 1;
required int32 output_dim = 2; // output channel number
required int32 group_size = 3; // equal to pooled_size
}

@ -89,6 +89,7 @@ void initializeLayerFactory()
CV_DNN_REGISTER_LAYER_CLASS(Deconvolution, DeconvolutionLayer);
CV_DNN_REGISTER_LAYER_CLASS(Pooling, PoolingLayer);
CV_DNN_REGISTER_LAYER_CLASS(ROIPooling, PoolingLayer);
CV_DNN_REGISTER_LAYER_CLASS(PSROIPooling, PoolingLayer);
CV_DNN_REGISTER_LAYER_CLASS(LRN, LRNLayer);
CV_DNN_REGISTER_LAYER_CLASS(InnerProduct, InnerProductLayer);
CV_DNN_REGISTER_LAYER_CLASS(Softmax, SoftmaxLayer);

@ -57,9 +57,9 @@ namespace cv
{
namespace dnn
{
static inline int scaleAndRoundRoi(float f, float scale)
static inline int roundRoiSize(float v)
{
return (int)(f * scale + (f >= 0.f ? 0.5f : -0.5f));
return (int)(v + (v >= 0.f ? 0.5f : -0.5f));
}
class PoolingLayerImpl : public PoolingLayer
@ -86,17 +86,24 @@ public:
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
pad.height, pad.width, stride.height, stride.width, padMode);
}
else if (params.has("pooled_w") || params.has("pooled_h") || params.has("spatial_scale"))
else if (params.has("pooled_w") || params.has("pooled_h"))
{
type = ROI;
computeMaxIdx = false;
pooledSize.width = params.get<uint32_t>("pooled_w", 1);
pooledSize.height = params.get<uint32_t>("pooled_h", 1);
}
else if (params.has("output_dim") && params.has("group_size"))
{
type = PSROI;
pooledSize.width = params.get<int>("group_size");
pooledSize.height = pooledSize.width;
psRoiOutChannels = params.get<int>("output_dim");
}
else
CV_Error(Error::StsBadArg, "Cannot determine pooling type");
setParamsFrom(params);
ceilMode = params.get<bool>("ceil_mode", true);
pooledSize.width = params.get<uint32_t>("pooled_w", 1);
pooledSize.height = params.get<uint32_t>("pooled_h", 1);
spatialScale = params.get<float>("spatial_scale", 1);
}
@ -195,7 +202,7 @@ public:
CV_Assert(inputs.size() == 1, outputs.size() == 1);
avePooling(*inputs[0], outputs[0]);
break;
case ROI:
case ROI: case PSROI:
CV_Assert(inputs.size() == 2, outputs.size() == 1);
roiPooling(*inputs[0], *inputs[1], outputs[0]);
break;
@ -234,11 +241,11 @@ public:
Size stride, Size pad, int poolingType, float spatialScale,
bool computeMaxIdx, int nstripes)
{
CV_Assert(src.isContinuous() && dst.isContinuous() &&
src.type() == CV_32F && src.type() == dst.type() &&
src.dims == 4 && dst.dims == 4 &&
(poolingType == ROI && dst.size[0] == rois.size[0] ||
src.size[0] == dst.size[0]) && src.size[1] == dst.size[1] &&
CV_Assert(src.isContinuous(), dst.isContinuous(),
src.type() == CV_32F, src.type() == dst.type(),
src.dims == 4, dst.dims == 4,
((poolingType == ROI || poolingType == PSROI) && dst.size[0] ==rois.size[0] || src.size[0] == dst.size[0]),
poolingType == PSROI || src.size[1] == dst.size[1],
(mask.empty() || (mask.type() == src.type() && mask.size == dst.size)));
PoolingInvoker p;
@ -297,12 +304,12 @@ public:
int n = (int)(ofs / channels);
int ystart, yend;
const float *srcData;
const float *srcData = 0;
if (poolingType == ROI)
{
const float *roisData = rois->ptr<float>(n);
int ystartROI = scaleAndRoundRoi(roisData[2], spatialScale);
int yendROI = scaleAndRoundRoi(roisData[4], spatialScale);
int ystartROI = roundRoiSize(roisData[2] * spatialScale);
int yendROI = roundRoiSize(roisData[4] * spatialScale);
int roiHeight = std::max(yendROI - ystartROI + 1, 1);
float roiRatio = (float)roiHeight / height;
@ -312,6 +319,17 @@ public:
CV_Assert(roisData[0] < src->size[0]);
srcData = src->ptr<float>(roisData[0], c);
}
else if (poolingType == PSROI)
{
const float *roisData = rois->ptr<float>(n);
float ystartROI = roundRoiSize(roisData[2]) * spatialScale;
float yendROI = roundRoiSize(roisData[4] + 1) * spatialScale;
float roiHeight = std::max(yendROI - ystartROI, 0.1f);
float roiRatio = roiHeight / height;
ystart = (int)std::floor(ystartROI + y0 * roiRatio);
yend = (int)std::ceil(ystartROI + (y0 + 1) * roiRatio);
}
else
{
ystart = y0 * stride_h - pad_h;
@ -530,11 +548,11 @@ public:
}
}
}
else // ROI
else if (poolingType == ROI)
{
const float *roisData = rois->ptr<float>(n);
int xstartROI = scaleAndRoundRoi(roisData[1], spatialScale);
int xendROI = scaleAndRoundRoi(roisData[3], spatialScale);
int xstartROI = roundRoiSize(roisData[1] * spatialScale);
int xendROI = roundRoiSize(roisData[3] * spatialScale);
int roiWidth = std::max(xendROI - xstartROI + 1, 1);
float roiRatio = (float)roiWidth / width;
for( ; x0 < x1; x0++ )
@ -561,6 +579,38 @@ public:
dstData[x0] = max_val;
}
}
else // PSROI
{
const float *roisData = rois->ptr<float>(n);
CV_Assert(roisData[0] < src->size[0]);
float xstartROI = roundRoiSize(roisData[1]) * spatialScale;
float xendROI = roundRoiSize(roisData[3] + 1) * spatialScale;
float roiWidth = std::max(xendROI - xstartROI, 0.1f);
float roiRatio = roiWidth / width;
for( ; x0 < x1; x0++ )
{
int xstart = (int)std::floor(xstartROI + x0 * roiRatio);
int xend = (int)std::ceil(xstartROI + (x0 + 1) * roiRatio);
xstart = max(xstart, 0);
xend = min(xend, inp_width);
if (xstart >= xend || ystart >= yend)
{
dstData[x0] = 0;
continue;
}
srcData = src->ptr<float>(roisData[0], (c * height + y0) * width + x0);
float sum_val = 0.f;
for (int y = ystart; y < yend; ++y)
for (int x = xstart; x < xend; ++x)
{
const int index = y * inp_width + x;
float val = srcData[index];
sum_val += val;
}
dstData[x0] = sum_val / ((yend - ystart) * (xend - xstart));
}
}
}
}
};
@ -719,7 +769,7 @@ public:
out.height = 1;
out.width = 1;
}
else if (type == ROI)
else if (type == ROI || type == PSROI)
{
out.height = pooledSize.height;
out.width = pooledSize.width;
@ -754,6 +804,13 @@ public:
CV_Assert(inputs.size() == 2);
dims[0] = inputs[1][0]; // Number of proposals;
}
else if (type == PSROI)
{
CV_Assert(inputs.size() == 2);
CV_Assert(psRoiOutChannels * pooledSize.width * pooledSize.height == inputs[0][1]);
dims[0] = inputs[1][0]; // Number of proposals;
dims[1] = psRoiOutChannels;
}
outputs.assign(type == MAX ? 2 : 1, shape(dims));
return false;
}
@ -784,7 +841,8 @@ private:
MAX,
AVE,
STOCHASTIC,
ROI
ROI, // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf
PSROI // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf
};
};

@ -1,24 +1,3 @@
// Faster-RCNN models use custom layer called 'Proposal' written in Python. To
// map it into OpenCV's layer replace a layer node with [type: 'Python'] to the
// following definition:
// layer {
// name: 'proposal'
// type: 'Proposal'
// bottom: 'rpn_cls_prob_reshape'
// bottom: 'rpn_bbox_pred'
// bottom: 'im_info'
// top: 'rois'
// proposal_param {
// ratio: 0.5
// ratio: 1.0
// ratio: 2.0
// scale: 8
// scale: 16
// scale: 32
// }
// }
#include <iostream>
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/imgproc.hpp>
@ -50,9 +29,11 @@ int main(int argc, char** argv)
{
// Parse command line arguments.
CommandLineParser parser(argc, argv, keys);
parser.about( "This sample is used to run Faster-RCNN object detection with OpenCV.\n"
"You can get required models from https://github.com/rbgirshick/py-faster-rcnn" );
parser.about("This sample is used to run Faster-RCNN and R-FCN object detection "
"models with OpenCV. You can get required models from "
"https://github.com/rbgirshick/py-faster-rcnn (Faster-RCNN) and from "
"https://github.com/YuwenXiong/py-R-FCN (R-FCN). Corresponding .prototxt "
"files may be found at https://github.com/opencv/opencv_extra/tree/master/testdata/dnn.");
if (argc == 1 || parser.has("help"))
{
parser.printMessage();
@ -68,19 +49,6 @@ int main(int argc, char** argv)
// Load a model.
Net net = readNetFromCaffe(protoPath, modelPath);
// Create a preprocessing layer that does final bounding boxes applying predicted
// deltas to objects locations proposals and doing non-maximum suppression over it.
LayerParams lp;
lp.set("code_type", "CENTER_SIZE"); // An every bounding box is [xmin, ymin, xmax, ymax]
lp.set("num_classes", 21);
lp.set("share_location", (int)false); // Separate predictions for different classes.
lp.set("background_label_id", 0);
lp.set("variance_encoded_in_target", (int)true);
lp.set("keep_top_k", 100);
lp.set("nms_threshold", 0.3);
lp.set("normalized_bbox", (int)false);
Ptr<Layer> detectionOutputLayer = DetectionOutputLayer::create(lp);
Mat img = imread(imagePath);
resize(img, img, Size(kInpWidth, kInpHeight));
Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false);
@ -89,31 +57,8 @@ int main(int argc, char** argv)
net.setInput(blob, "data");
net.setInput(imInfo, "im_info");
std::vector<Mat> outs;
std::vector<String> outNames(3);
outNames[0] = "proposal";
outNames[1] = "bbox_pred";
outNames[2] = "cls_prob";
net.forward(outs, outNames);
Mat proposals = outs[0].colRange(1, 5).clone(); // Only last 4 columns.
Mat& deltas = outs[1];
Mat& scores = outs[2];
// Reshape proposals from Nx4 to 1x1xN*4
std::vector<int> shape(3, 1);
shape[2] = (int)proposals.total();
proposals = proposals.reshape(1, shape);
// Run postprocessing layer.
std::vector<Mat> layerInputs(3), layerOutputs(1), layerInternals;
layerInputs[0] = deltas.reshape(1, 1);
layerInputs[1] = scores.reshape(1, 1);
layerInputs[2] = proposals;
detectionOutputLayer->forward(layerInputs, layerOutputs, layerInternals);
// Draw detections.
Mat detections = layerOutputs[0];
Mat detections = net.forward();
const float* data = (float*)detections.data;
for (size_t i = 0; i < detections.total(); i += 7)
{

Loading…
Cancel
Save