add scale_x_y parameter to region

5 years ago · 3c35b563d7
parent d981d04c76
commit 3c35b563d7
4 changed files with 21 additions and 15 deletions
--- a/modules/dnn/src/cuda/region.cu
+++ b/modules/dnn/src/cuda/region.cu
@ -29,7 +29,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
        __global__ void region_box(
            Span<T> output, View<T> input, View<T> bias,
            size_type boxes_per_cell, size_type box_size,
-            size_type rows, size_type cols,
+            size_type rows, size_type cols, T scale_x_y,
            size_type height_norm, size_type width_norm,
            T object_prob_cutoff)
        {
@ -48,8 +48,10 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
                const auto x = (box_index % row_inner_size) / col_inner_size;

                using device::fast_sigmoid;
-                output[box_offset + 0] = (T(x) + fast_sigmoid(input[box_offset + 0])) / T(cols);
-                output[box_offset + 1] = (T(y) + fast_sigmoid(input[box_offset + 1])) / T(rows);
+                const auto tmp_x = (fast_sigmoid(input[box_offset + 0]) - static_cast<T>(0.5)) * scale_x_y + static_cast<T>(0.5);
+                const auto tmp_y = (fast_sigmoid(input[box_offset + 1]) - static_cast<T>(0.5)) * scale_x_y + static_cast<T>(0.5);
+                output[box_offset + 0] = (T(x) + tmp_x) / T(cols);
+                output[box_offset + 1] = (T(y) + tmp_y) / T(rows);

                vector2_type bias_xy;
                v_load(bias_xy, bias_vPtr[box_of_the_cell]);
@ -143,7 +145,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
    void region(const Stream& stream, Span<T> output, View<T> input, View<T> bias,
        T object_prob_cutoff, T class_prob_cutoff,
        std::size_t boxes_per_cell, std::size_t box_size,
-        std::size_t rows, std::size_t cols,
+        std::size_t rows, std::size_t cols, T scale_x_y,
        std::size_t height_norm, std::size_t width_norm,
        bool if_true_sigmoid_else_softmax /* true = sigmoid, false = softmax */)
    {
@ -155,7 +157,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
        auto box_policy = make_policy(box_kernel, output.size() / box_size, 0, stream);
        launch_kernel(box_kernel, box_policy,
            output, input, bias, boxes_per_cell, box_size,
-            rows, cols, height_norm, width_norm,
+            rows, cols, scale_x_y, height_norm, width_norm,
            object_prob_cutoff);

        if (if_true_sigmoid_else_softmax) {
@ -171,10 +173,10 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {

 #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
    template void region(const Stream&, Span<__half>, View<__half>, View<__half>,
-        __half, __half, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, bool);
+        __half, __half, std::size_t, std::size_t, std::size_t, std::size_t, __half, std::size_t, std::size_t, bool);
 #endif

    template void region(const Stream&, Span<float>, View<float>, View<float>,
-        float, float, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, bool);
+        float, float, std::size_t, std::size_t, std::size_t, std::size_t, float, std::size_t, std::size_t, bool);

 }}}} /* namespace cv::dnn::cuda4dnn::kernels */
--- a/modules/dnn/src/cuda4dnn/kernels/region.hpp
+++ b/modules/dnn/src/cuda4dnn/kernels/region.hpp
@ -16,7 +16,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
    void region(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, csl::View<T> bias,
        T object_prob_cutoff, T class_prob_cutoff,
        std::size_t boxes_per_cell, std::size_t box_size,
-        std::size_t rows, std::size_t cols,
+        std::size_t rows, std::size_t cols, T scale_x_y,
        std::size_t height_norm, std::size_t width_norm,
        bool if_true_sigmoid_else_softmax);

--- a/modules/dnn/src/cuda4dnn/primitives/region.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/region.hpp
@ -48,14 +48,13 @@ namespace cv { namespace dnn { namespace cuda4dnn {
         *
         * actual class probability = conditional_class_prob * object_prob
         */
+        std::size_t classes, boxes_per_cell;
+        std::size_t width_norm, height_norm;
+        T scale_x_y;

        /* method for reducing class scores to probabilities */
        SquashMethod squash_method;

-        std::size_t classes, boxes_per_cell;
-
-        std::size_t width_norm, height_norm;
-
        /* prob cutoffs below which the prediction is nulled */
        T object_prob_cutoff;
        T class_prob_cutoff;
@ -81,8 +80,9 @@ namespace cv { namespace dnn { namespace cuda4dnn {
            width_norm = config.width_norm;
            height_norm = config.height_norm;

-            squash_type = config.squash_method;
+            scale_x_y = config.scale_x_y;

+            squash_type = config.squash_method;
            object_prob_cutoff = config.object_prob_cutoff;
            class_prob_cutoff = config.class_prob_cutoff;

@ -113,7 +113,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
            kernels::region<T>(stream, output, input, biasTensor,
                object_prob_cutoff, class_prob_cutoff,
                boxes_per_cell, cell_box_size,
-                rows, cols,
+                rows, cols, scale_x_y,
                height_norm, width_norm,
                if_true_sigmoid_else_softmax
            );
@ -170,9 +170,11 @@ namespace cv { namespace dnn { namespace cuda4dnn {
        csl::Tensor<T> biasTensor;
        std::size_t classes, boxes_per_cell;
        std::size_t width_norm, height_norm;
-        SquashMethod squash_type;
+        T scale_x_y;

+        SquashMethod squash_type;
        T object_prob_cutoff, class_prob_cutoff;
+
        T nms_iou_threshold;
    };

--- a/modules/dnn/src/layers/region_layer.cpp
+++ b/modules/dnn/src/layers/region_layer.cpp
@ -405,6 +405,8 @@ public:
        config.height_norm = height_norm;
        config.width_norm = width_norm;

+        config.scale_x_y = scale_x_y;
+
        config.object_prob_cutoff = (classfix == -1) ? 0.5 : 0.0;
        config.class_prob_cutoff = thresh;