commit
560f85f8e5
40 changed files with 3101 additions and 708 deletions
@ -1,25 +0,0 @@ |
|||||||
// This file is part of OpenCV project.
|
|
||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
|
||||||
//
|
|
||||||
// Copyright (C) 2019, Intel Corporation, all rights reserved.
|
|
||||||
#ifndef OPENCV_IMGPROC_SUM_PIXELS_HPP |
|
||||||
#define OPENCV_IMGPROC_SUM_PIXELS_HPP |
|
||||||
|
|
||||||
namespace cv |
|
||||||
{ |
|
||||||
|
|
||||||
namespace opt_AVX512_SKX |
|
||||||
{ |
|
||||||
#if CV_TRY_AVX512_SKX |
|
||||||
void calculate_integral_avx512( |
|
||||||
const uchar *src, size_t _srcstep, |
|
||||||
double *sum, size_t _sumstep, |
|
||||||
double *sqsum, size_t _sqsumstep, |
|
||||||
int width, int height, int cn); |
|
||||||
|
|
||||||
#endif |
|
||||||
} // end namespace opt_AVX512_SKX
|
|
||||||
} // end namespace cv
|
|
||||||
|
|
||||||
#endif |
|
@ -0,0 +1,288 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2020 Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2014, Itseez Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "opencv2/core/hal/intrin.hpp" |
||||||
|
|
||||||
|
#if CV_AVX512_SKX |
||||||
|
#include "sumpixels.avx512_skx.hpp" |
||||||
|
#endif |
||||||
|
|
||||||
|
namespace cv { namespace hal { |
||||||
|
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN |
||||||
|
|
||||||
|
// forward declarations
|
||||||
|
bool integral_SIMD( |
||||||
|
int depth, int sdepth, int sqdepth, |
||||||
|
const uchar* src, size_t srcstep, |
||||||
|
uchar* sum, size_t sumstep, |
||||||
|
uchar* sqsum, size_t sqsumstep, |
||||||
|
uchar* tilted, size_t tstep, |
||||||
|
int width, int height, int cn); |
||||||
|
|
||||||
|
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY |
||||||
|
namespace { |
||||||
|
|
||||||
|
template <typename T, typename ST, typename QT> |
||||||
|
struct Integral_SIMD |
||||||
|
{ |
||||||
|
bool operator()(const T *, size_t, |
||||||
|
ST *, size_t, |
||||||
|
QT *, size_t, |
||||||
|
ST *, size_t, |
||||||
|
int, int, int) const |
||||||
|
{ |
||||||
|
return false; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
#if CV_AVX512_SKX |
||||||
|
template <> |
||||||
|
struct Integral_SIMD<uchar, double, double> { |
||||||
|
Integral_SIMD() {}; |
||||||
|
|
||||||
|
|
||||||
|
bool operator()(const uchar *src, size_t _srcstep, |
||||||
|
double *sum, size_t _sumstep, |
||||||
|
double *sqsum, size_t _sqsumstep, |
||||||
|
double *tilted, size_t _tiltedstep, |
||||||
|
int width, int height, int cn) const |
||||||
|
{ |
||||||
|
CV_UNUSED(_tiltedstep); |
||||||
|
// TODO: Add support for 1 channel input (WIP)
|
||||||
|
if (!tilted && (cn <= 4)) |
||||||
|
{ |
||||||
|
calculate_integral_avx512(src, _srcstep, sum, _sumstep, |
||||||
|
sqsum, _sqsumstep, width, height, cn); |
||||||
|
return true; |
||||||
|
} |
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
}; |
||||||
|
#endif |
||||||
|
|
||||||
|
#if CV_SIMD && CV_SIMD_WIDTH <= 64 |
||||||
|
|
||||||
|
template <> |
||||||
|
struct Integral_SIMD<uchar, int, double> |
||||||
|
{ |
||||||
|
Integral_SIMD() {} |
||||||
|
|
||||||
|
bool operator()(const uchar * src, size_t _srcstep, |
||||||
|
int * sum, size_t _sumstep, |
||||||
|
double * sqsum, size_t, |
||||||
|
int * tilted, size_t, |
||||||
|
int width, int height, int cn) const |
||||||
|
{ |
||||||
|
if (sqsum || tilted || cn != 1) |
||||||
|
return false; |
||||||
|
|
||||||
|
// the first iteration
|
||||||
|
memset(sum, 0, (width + 1) * sizeof(int)); |
||||||
|
|
||||||
|
// the others
|
||||||
|
for (int i = 0; i < height; ++i) |
||||||
|
{ |
||||||
|
const uchar * src_row = src + _srcstep * i; |
||||||
|
int * prev_sum_row = (int *)((uchar *)sum + _sumstep * i) + 1; |
||||||
|
int * sum_row = (int *)((uchar *)sum + _sumstep * (i + 1)) + 1; |
||||||
|
|
||||||
|
sum_row[-1] = 0; |
||||||
|
|
||||||
|
v_int32 prev = vx_setzero_s32(); |
||||||
|
int j = 0; |
||||||
|
for ( ; j + v_uint16::nlanes <= width; j += v_uint16::nlanes) |
||||||
|
{ |
||||||
|
v_int16 el8 = v_reinterpret_as_s16(vx_load_expand(src_row + j)); |
||||||
|
v_int32 el4l, el4h; |
||||||
|
#if CV_AVX2 && CV_SIMD_WIDTH == 32 |
||||||
|
__m256i vsum = _mm256_add_epi16(el8.val, _mm256_slli_si256(el8.val, 2)); |
||||||
|
vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 4)); |
||||||
|
vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 8)); |
||||||
|
__m256i shmask = _mm256_set1_epi32(7); |
||||||
|
el4l.val = _mm256_add_epi32(_mm256_cvtepi16_epi32(_v256_extract_low(vsum)), prev.val); |
||||||
|
el4h.val = _mm256_add_epi32(_mm256_cvtepi16_epi32(_v256_extract_high(vsum)), _mm256_permutevar8x32_epi32(el4l.val, shmask)); |
||||||
|
prev.val = _mm256_permutevar8x32_epi32(el4h.val, shmask); |
||||||
|
#else |
||||||
|
el8 += v_rotate_left<1>(el8); |
||||||
|
el8 += v_rotate_left<2>(el8); |
||||||
|
#if CV_SIMD_WIDTH >= 32 |
||||||
|
el8 += v_rotate_left<4>(el8); |
||||||
|
#if CV_SIMD_WIDTH == 64 |
||||||
|
el8 += v_rotate_left<8>(el8); |
||||||
|
#endif |
||||||
|
#endif |
||||||
|
v_expand(el8, el4l, el4h); |
||||||
|
el4l += prev; |
||||||
|
el4h += el4l; |
||||||
|
|
||||||
|
prev = v_broadcast_element<v_int32::nlanes - 1>(el4h); |
||||||
|
#endif |
||||||
|
v_store(sum_row + j , el4l + vx_load(prev_sum_row + j )); |
||||||
|
v_store(sum_row + j + v_int32::nlanes, el4h + vx_load(prev_sum_row + j + v_int32::nlanes)); |
||||||
|
} |
||||||
|
|
||||||
|
for (int v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) |
||||||
|
sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
template <> |
||||||
|
struct Integral_SIMD<uchar, float, double> |
||||||
|
{ |
||||||
|
Integral_SIMD() {} |
||||||
|
|
||||||
|
bool operator()(const uchar * src, size_t _srcstep, |
||||||
|
float * sum, size_t _sumstep, |
||||||
|
double * sqsum, size_t, |
||||||
|
float * tilted, size_t, |
||||||
|
int width, int height, int cn) const |
||||||
|
{ |
||||||
|
if (sqsum || tilted || cn != 1) |
||||||
|
return false; |
||||||
|
|
||||||
|
// the first iteration
|
||||||
|
memset(sum, 0, (width + 1) * sizeof(int)); |
||||||
|
|
||||||
|
// the others
|
||||||
|
for (int i = 0; i < height; ++i) |
||||||
|
{ |
||||||
|
const uchar * src_row = src + _srcstep * i; |
||||||
|
float * prev_sum_row = (float *)((uchar *)sum + _sumstep * i) + 1; |
||||||
|
float * sum_row = (float *)((uchar *)sum + _sumstep * (i + 1)) + 1; |
||||||
|
|
||||||
|
sum_row[-1] = 0; |
||||||
|
|
||||||
|
v_float32 prev = vx_setzero_f32(); |
||||||
|
int j = 0; |
||||||
|
for (; j + v_uint16::nlanes <= width; j += v_uint16::nlanes) |
||||||
|
{ |
||||||
|
v_int16 el8 = v_reinterpret_as_s16(vx_load_expand(src_row + j)); |
||||||
|
v_float32 el4l, el4h; |
||||||
|
#if CV_AVX2 && CV_SIMD_WIDTH == 32 |
||||||
|
__m256i vsum = _mm256_add_epi16(el8.val, _mm256_slli_si256(el8.val, 2)); |
||||||
|
vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 4)); |
||||||
|
vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 8)); |
||||||
|
__m256i shmask = _mm256_set1_epi32(7); |
||||||
|
el4l.val = _mm256_add_ps(_mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_v256_extract_low(vsum))), prev.val); |
||||||
|
el4h.val = _mm256_add_ps(_mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_v256_extract_high(vsum))), _mm256_permutevar8x32_ps(el4l.val, shmask)); |
||||||
|
prev.val = _mm256_permutevar8x32_ps(el4h.val, shmask); |
||||||
|
#else |
||||||
|
el8 += v_rotate_left<1>(el8); |
||||||
|
el8 += v_rotate_left<2>(el8); |
||||||
|
#if CV_SIMD_WIDTH >= 32 |
||||||
|
el8 += v_rotate_left<4>(el8); |
||||||
|
#if CV_SIMD_WIDTH == 64 |
||||||
|
el8 += v_rotate_left<8>(el8); |
||||||
|
#endif |
||||||
|
#endif |
||||||
|
v_int32 el4li, el4hi; |
||||||
|
v_expand(el8, el4li, el4hi); |
||||||
|
el4l = v_cvt_f32(el4li) + prev; |
||||||
|
el4h = v_cvt_f32(el4hi) + el4l; |
||||||
|
|
||||||
|
prev = v_broadcast_element<v_float32::nlanes - 1>(el4h); |
||||||
|
#endif |
||||||
|
v_store(sum_row + j , el4l + vx_load(prev_sum_row + j )); |
||||||
|
v_store(sum_row + j + v_float32::nlanes, el4h + vx_load(prev_sum_row + j + v_float32::nlanes)); |
||||||
|
} |
||||||
|
|
||||||
|
for (float v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) |
||||||
|
sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
#endif |
||||||
|
|
||||||
|
} // namespace anon
|
||||||
|
|
||||||
|
bool integral_SIMD( |
||||||
|
int depth, int sdepth, int sqdepth, |
||||||
|
const uchar* src, size_t srcstep, |
||||||
|
uchar* sum, size_t sumstep, |
||||||
|
uchar* sqsum, size_t sqsumstep, |
||||||
|
uchar* tilted, size_t tstep, |
||||||
|
int width, int height, int cn) |
||||||
|
{ |
||||||
|
CV_INSTRUMENT_REGION(); |
||||||
|
|
||||||
|
#define ONE_CALL(T, ST, QT) \ |
||||||
|
return Integral_SIMD<T, ST, QT>()((const T*)src, srcstep, (ST*)sum, sumstep, (QT*)sqsum, sqsumstep, (ST*)tilted, tstep, width, height, cn) |
||||||
|
|
||||||
|
if( depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F ) |
||||||
|
ONE_CALL(uchar, int, double); |
||||||
|
else if( depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32F ) |
||||||
|
ONE_CALL(uchar, int, float); |
||||||
|
else if( depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32S ) |
||||||
|
ONE_CALL(uchar, int, int); |
||||||
|
else if( depth == CV_8U && sdepth == CV_32F && sqdepth == CV_64F ) |
||||||
|
ONE_CALL(uchar, float, double); |
||||||
|
else if( depth == CV_8U && sdepth == CV_32F && sqdepth == CV_32F ) |
||||||
|
ONE_CALL(uchar, float, float); |
||||||
|
else if( depth == CV_8U && sdepth == CV_64F && sqdepth == CV_64F ) |
||||||
|
ONE_CALL(uchar, double, double); |
||||||
|
else if( depth == CV_16U && sdepth == CV_64F && sqdepth == CV_64F ) |
||||||
|
ONE_CALL(ushort, double, double); |
||||||
|
else if( depth == CV_16S && sdepth == CV_64F && sqdepth == CV_64F ) |
||||||
|
ONE_CALL(short, double, double); |
||||||
|
else if( depth == CV_32F && sdepth == CV_32F && sqdepth == CV_64F ) |
||||||
|
ONE_CALL(float, float, double); |
||||||
|
else if( depth == CV_32F && sdepth == CV_32F && sqdepth == CV_32F ) |
||||||
|
ONE_CALL(float, float, float); |
||||||
|
else if( depth == CV_32F && sdepth == CV_64F && sqdepth == CV_64F ) |
||||||
|
ONE_CALL(float, double, double); |
||||||
|
else if( depth == CV_64F && sdepth == CV_64F && sqdepth == CV_64F ) |
||||||
|
ONE_CALL(double, double, double); |
||||||
|
else |
||||||
|
return false; |
||||||
|
|
||||||
|
#undef ONE_CALL |
||||||
|
} |
||||||
|
|
||||||
|
#endif |
||||||
|
CV_CPU_OPTIMIZATION_NAMESPACE_END |
||||||
|
}} // cv::hal::
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,74 @@ |
|||||||
|
/*
|
||||||
|
This tutorial demonstrates how to correct the skewness in a text. |
||||||
|
The program takes as input a skewed source image and shows non skewed text. |
||||||
|
|
||||||
|
*/ |
||||||
|
|
||||||
|
#include <opencv2/core.hpp> |
||||||
|
#include <opencv2/imgcodecs.hpp> |
||||||
|
#include <opencv2/highgui.hpp> |
||||||
|
#include <opencv2/imgproc.hpp> |
||||||
|
|
||||||
|
#include <iostream> |
||||||
|
#include <iomanip> |
||||||
|
#include <string> |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace std; |
||||||
|
|
||||||
|
|
||||||
|
int main( int argc, char** argv ) |
||||||
|
{ |
||||||
|
CommandLineParser parser(argc, argv, "{@input | imageTextR.png | input image}"); |
||||||
|
|
||||||
|
// Load image from the disk
|
||||||
|
Mat image = imread( samples::findFile( parser.get<String>("@input") ), IMREAD_COLOR); |
||||||
|
if (image.empty()) |
||||||
|
{ |
||||||
|
cout << "Cannot load the image " + parser.get<String>("@input") << endl; |
||||||
|
return -1; |
||||||
|
} |
||||||
|
|
||||||
|
Mat gray; |
||||||
|
cvtColor(image, gray, COLOR_BGR2GRAY); |
||||||
|
|
||||||
|
//Threshold the image, setting all foreground pixels to 255 and all background pixels to 0
|
||||||
|
Mat thresh; |
||||||
|
threshold(gray, thresh, 0, 255, THRESH_BINARY_INV | THRESH_OTSU); |
||||||
|
|
||||||
|
// Applying erode filter to remove random noise
|
||||||
|
int erosion_size = 1; |
||||||
|
Mat element = getStructuringElement( MORPH_RECT, Size(2*erosion_size+1, 2*erosion_size+1), Point(erosion_size, erosion_size) ); |
||||||
|
erode(thresh, thresh, element); |
||||||
|
|
||||||
|
cv::Mat coords; |
||||||
|
findNonZero(thresh, coords); |
||||||
|
|
||||||
|
RotatedRect box = minAreaRect(coords); |
||||||
|
float angle = box.angle; |
||||||
|
|
||||||
|
// The cv::minAreaRect function returns values in the range [-90, 0)
|
||||||
|
// if the angle is less than -45 we need to add 90 to it
|
||||||
|
if (angle < -45.0f) |
||||||
|
{ |
||||||
|
angle = (90.0f + angle); |
||||||
|
} |
||||||
|
|
||||||
|
//Obtaining the rotation matrix
|
||||||
|
Point2f center((image.cols) / 2.0f, (image.rows) / 2.0f); |
||||||
|
Mat M = getRotationMatrix2D(center, angle, 1.0f); |
||||||
|
Mat rotated; |
||||||
|
|
||||||
|
// Rotating the image by required angle
|
||||||
|
stringstream angle_to_str; |
||||||
|
angle_to_str << fixed << setprecision(2) << angle; |
||||||
|
warpAffine(image, rotated, M, image.size(), INTER_CUBIC, BORDER_REPLICATE); |
||||||
|
putText(rotated, "Angle " + angle_to_str.str() + " degrees", Point(10, 30), FONT_HERSHEY_SIMPLEX, 0.7, Scalar(0, 0, 255), 2); |
||||||
|
cout << "[INFO] angle: " << angle_to_str.str() << endl; |
||||||
|
|
||||||
|
//Show the image
|
||||||
|
imshow("Input", image); |
||||||
|
imshow("Rotated", rotated); |
||||||
|
waitKey(0); |
||||||
|
return 0; |
||||||
|
} |
@ -0,0 +1,178 @@ |
|||||||
|
#!/usr/bin/env python |
||||||
|
''' |
||||||
|
You can download the converted pb model from https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0 |
||||||
|
or convert the model yourself. |
||||||
|
|
||||||
|
Follow these steps if you want to convert the original model yourself: |
||||||
|
To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view |
||||||
|
For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet |
||||||
|
Change script evaluate_parsing_JPPNet-s2.py for human parsing |
||||||
|
1. Remove preprocessing to create image_batch_origin: |
||||||
|
with tf.name_scope("create_inputs"): |
||||||
|
... |
||||||
|
Add |
||||||
|
image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') |
||||||
|
|
||||||
|
2. Create input |
||||||
|
image = cv2.imread(path/to/image) |
||||||
|
image_rev = np.flip(image, axis=1) |
||||||
|
input = np.stack([image, image_rev], axis=0) |
||||||
|
|
||||||
|
3. Hardcode image_h and image_w shapes to determine output shapes. |
||||||
|
We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. |
||||||
|
parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), |
||||||
|
tf.image.resize_images(parsing_out1_075, INPUT_SIZE), |
||||||
|
tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) |
||||||
|
Do similarly with parsing_out2, parsing_out3 |
||||||
|
4. Remove postprocessing. Last net operation: |
||||||
|
raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) |
||||||
|
Change: |
||||||
|
parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) |
||||||
|
|
||||||
|
5. To save model after sess.run(...) add: |
||||||
|
input_graph_def = tf.get_default_graph().as_graph_def() |
||||||
|
output_node = "Mean_3" |
||||||
|
output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) |
||||||
|
|
||||||
|
output_graph = "LIP_JPPNet.pb" |
||||||
|
with tf.gfile.GFile(output_graph, "wb") as f: |
||||||
|
f.write(output_graph_def.SerializeToString())' |
||||||
|
''' |
||||||
|
|
||||||
|
import argparse |
||||||
|
import numpy as np |
||||||
|
import cv2 as cv |
||||||
|
|
||||||
|
|
||||||
|
backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) |
||||||
|
targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) |
||||||
|
|
||||||
|
|
||||||
|
def preprocess(image_path): |
||||||
|
""" |
||||||
|
Create 4-dimensional blob from image and flip image |
||||||
|
:param image_path: path to input image |
||||||
|
""" |
||||||
|
image = cv.imread(image_path) |
||||||
|
image_rev = np.flip(image, axis=1) |
||||||
|
input = cv.dnn.blobFromImages([image, image_rev], mean=(104.00698793, 116.66876762, 122.67891434)) |
||||||
|
return input |
||||||
|
|
||||||
|
|
||||||
|
def run_net(input, model_path, backend, target): |
||||||
|
""" |
||||||
|
Read network and infer model |
||||||
|
:param model_path: path to JPPNet model |
||||||
|
:param backend: computation backend |
||||||
|
:param target: computation device |
||||||
|
""" |
||||||
|
net = cv.dnn.readNet(model_path) |
||||||
|
net.setPreferableBackend(backend) |
||||||
|
net.setPreferableTarget(target) |
||||||
|
net.setInput(input) |
||||||
|
out = net.forward() |
||||||
|
return out |
||||||
|
|
||||||
|
|
||||||
|
def postprocess(out, input_shape): |
||||||
|
""" |
||||||
|
Create a grayscale human segmentation |
||||||
|
:param out: network output |
||||||
|
:param input_shape: input image width and height |
||||||
|
""" |
||||||
|
# LIP classes |
||||||
|
# 0 Background |
||||||
|
# 1 Hat |
||||||
|
# 2 Hair |
||||||
|
# 3 Glove |
||||||
|
# 4 Sunglasses |
||||||
|
# 5 UpperClothes |
||||||
|
# 6 Dress |
||||||
|
# 7 Coat |
||||||
|
# 8 Socks |
||||||
|
# 9 Pants |
||||||
|
# 10 Jumpsuits |
||||||
|
# 11 Scarf |
||||||
|
# 12 Skirt |
||||||
|
# 13 Face |
||||||
|
# 14 LeftArm |
||||||
|
# 15 RightArm |
||||||
|
# 16 LeftLeg |
||||||
|
# 17 RightLeg |
||||||
|
# 18 LeftShoe |
||||||
|
# 19 RightShoe |
||||||
|
head_output, tail_output = np.split(out, indices_or_sections=[1], axis=0) |
||||||
|
head_output = head_output.squeeze(0) |
||||||
|
tail_output = tail_output.squeeze(0) |
||||||
|
|
||||||
|
head_output = np.stack([cv.resize(img, dsize=input_shape) for img in head_output[:, ...]]) |
||||||
|
tail_output = np.stack([cv.resize(img, dsize=input_shape) for img in tail_output[:, ...]]) |
||||||
|
|
||||||
|
tail_list = np.split(tail_output, indices_or_sections=list(range(1, 20)), axis=0) |
||||||
|
tail_list = [arr.squeeze(0) for arr in tail_list] |
||||||
|
tail_list_rev = [tail_list[i] for i in range(14)] |
||||||
|
tail_list_rev.extend([tail_list[15], tail_list[14], tail_list[17], tail_list[16], tail_list[19], tail_list[18]]) |
||||||
|
tail_output_rev = np.stack(tail_list_rev, axis=0) |
||||||
|
tail_output_rev = np.flip(tail_output_rev, axis=2) |
||||||
|
raw_output_all = np.mean(np.stack([head_output, tail_output_rev], axis=0), axis=0, keepdims=True) |
||||||
|
raw_output_all = np.argmax(raw_output_all, axis=1) |
||||||
|
raw_output_all = raw_output_all.transpose(1, 2, 0) |
||||||
|
return raw_output_all |
||||||
|
|
||||||
|
|
||||||
|
def decode_labels(gray_image): |
||||||
|
""" |
||||||
|
Colorize image according to labels |
||||||
|
:param gray_image: grayscale human segmentation result |
||||||
|
""" |
||||||
|
height, width, _ = gray_image.shape |
||||||
|
colors = [(0, 0, 0), (128, 0, 0), (255, 0, 0), (0, 85, 0), (170, 0, 51), (255, 85, 0), |
||||||
|
(0, 0, 85), (0, 119, 221), (85, 85, 0), (0, 85, 85), (85, 51, 0), (52, 86, 128), |
||||||
|
(0, 128, 0), (0, 0, 255), (51, 170, 221), (0, 255, 255),(85, 255, 170), |
||||||
|
(170, 255, 85), (255, 255, 0), (255, 170, 0)] |
||||||
|
|
||||||
|
segm = np.stack([colors[idx] for idx in gray_image.flatten()]) |
||||||
|
segm = segm.reshape(height, width, 3).astype(np.uint8) |
||||||
|
segm = cv.cvtColor(segm, cv.COLOR_BGR2RGB) |
||||||
|
return segm |
||||||
|
|
||||||
|
|
||||||
|
def parse_human(image_path, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, target=cv.dnn.DNN_TARGET_CPU): |
||||||
|
""" |
||||||
|
Prepare input for execution, run net and postprocess output to parse human. |
||||||
|
:param image_path: path to input image |
||||||
|
:param model_path: path to JPPNet model |
||||||
|
:param backend: name of computation backend |
||||||
|
:param target: name of computation target |
||||||
|
""" |
||||||
|
input = preprocess(image_path) |
||||||
|
input_h, input_w = input.shape[2:] |
||||||
|
output = run_net(input, model_path, backend, target) |
||||||
|
grayscale_out = postprocess(output, (input_w, input_h)) |
||||||
|
segmentation = decode_labels(grayscale_out) |
||||||
|
return segmentation |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', |
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
||||||
|
parser.add_argument('--input', '-i', required=True, help='Path to input image.') |
||||||
|
parser.add_argument('--model', '-m', required=True, help='Path to pb model.') |
||||||
|
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, |
||||||
|
help="Choose one of computation backends: " |
||||||
|
"%d: automatically (by default), " |
||||||
|
"%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " |
||||||
|
"%d: OpenCV implementation" % backends) |
||||||
|
parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, |
||||||
|
help='Choose one of target computation devices: ' |
||||||
|
'%d: CPU target (by default), ' |
||||||
|
'%d: OpenCL, ' |
||||||
|
'%d: OpenCL fp16 (half-float precision), ' |
||||||
|
'%d: VPU' % targets) |
||||||
|
args, _ = parser.parse_known_args() |
||||||
|
|
||||||
|
output = parse_human(args.input, args.model, args.backend, args.target) |
||||||
|
winName = 'Deep learning human parsing in OpenCV' |
||||||
|
cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) |
||||||
|
cv.imshow(winName, output) |
||||||
|
cv.waitKey() |
@ -0,0 +1,58 @@ |
|||||||
|
''' |
||||||
|
Text skewness correction |
||||||
|
This tutorial demonstrates how to correct the skewness in a text. |
||||||
|
The program takes as input a skewed source image and shows non skewed text. |
||||||
|
|
||||||
|
Usage: |
||||||
|
python text_skewness_correction.py --image "Image path" |
||||||
|
''' |
||||||
|
|
||||||
|
import numpy as np |
||||||
|
import cv2 as cv |
||||||
|
import sys |
||||||
|
import argparse |
||||||
|
|
||||||
|
|
||||||
|
def main(): |
||||||
|
parser = argparse.ArgumentParser() |
||||||
|
parser.add_argument("-i", "--image", required=True, help="path to input image file") |
||||||
|
args = vars(parser.parse_args()) |
||||||
|
|
||||||
|
# load the image from disk |
||||||
|
image = cv.imread(cv.samples.findFile(args["image"])) |
||||||
|
if image is None: |
||||||
|
print("can't read image " + args["image"]) |
||||||
|
sys.exit(-1) |
||||||
|
gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY) |
||||||
|
|
||||||
|
# threshold the image, setting all foreground pixels to |
||||||
|
# 255 and all background pixels to 0 |
||||||
|
thresh = cv.threshold(gray, 0, 255, cv.THRESH_BINARY_INV | cv.THRESH_OTSU)[1] |
||||||
|
|
||||||
|
# Applying erode filter to remove random noise |
||||||
|
erosion_size = 1 |
||||||
|
element = cv.getStructuringElement(cv.MORPH_RECT, (2 * erosion_size + 1, 2 * erosion_size + 1), (erosion_size, erosion_size) ) |
||||||
|
thresh = cv.erode(thresh, element) |
||||||
|
|
||||||
|
coords = cv.findNonZero(thresh) |
||||||
|
angle = cv.minAreaRect(coords)[-1] |
||||||
|
# the `cv.minAreaRect` function returns values in the |
||||||
|
# range [-90, 0) if the angle is less than -45 we need to add 90 to it |
||||||
|
if angle < -45: |
||||||
|
angle = (90 + angle) |
||||||
|
|
||||||
|
(h, w) = image.shape[:2] |
||||||
|
center = (w // 2, h // 2) |
||||||
|
M = cv.getRotationMatrix2D(center, angle, 1.0) |
||||||
|
rotated = cv.warpAffine(image, M, (w, h), flags=cv.INTER_CUBIC, borderMode=cv.BORDER_REPLICATE) |
||||||
|
cv.putText(rotated, "Angle: {:.2f} degrees".format(angle), (10, 30), cv.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) |
||||||
|
|
||||||
|
# show the output image |
||||||
|
print("[INFO] angle: {:.2f}".format(angle)) |
||||||
|
cv.imshow("Input", image) |
||||||
|
cv.imshow("Rotated", rotated) |
||||||
|
cv.waitKey(0) |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
main() |
Loading…
Reference in new issue