|
|
|
@ -86,6 +86,7 @@ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "precomp.hpp" |
|
|
|
|
#include "opencv2/core/hal/intrin.hpp" |
|
|
|
|
|
|
|
|
|
#include <limits> |
|
|
|
|
|
|
|
|
@ -111,7 +112,7 @@ public: |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int bayer2RGBA(const T*, int, T*, int, int) const |
|
|
|
|
int bayer2RGBA(const T*, int, T*, int, int, const T) const |
|
|
|
|
{ |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
@ -122,279 +123,14 @@ public: |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
#if CV_SSE2 |
|
|
|
|
#if CV_SIMD128 |
|
|
|
|
class SIMDBayerInterpolator_8u |
|
|
|
|
{ |
|
|
|
|
public: |
|
|
|
|
SIMDBayerInterpolator_8u() |
|
|
|
|
{ |
|
|
|
|
use_simd = checkHardwareSupport(CV_CPU_SSE2); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst, |
|
|
|
|
int width, int bcoeff, int gcoeff, int rcoeff) const |
|
|
|
|
{ |
|
|
|
|
if( !use_simd ) |
|
|
|
|
return 0; |
|
|
|
|
|
|
|
|
|
__m128i _b2y = _mm_set1_epi16((short)(rcoeff*2)); |
|
|
|
|
__m128i _g2y = _mm_set1_epi16((short)(gcoeff*2)); |
|
|
|
|
__m128i _r2y = _mm_set1_epi16((short)(bcoeff*2)); |
|
|
|
|
const uchar* bayer_end = bayer + width; |
|
|
|
|
|
|
|
|
|
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 ) |
|
|
|
|
{ |
|
|
|
|
__m128i r0 = _mm_loadu_si128((const __m128i*)bayer); |
|
|
|
|
__m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step)); |
|
|
|
|
__m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2)); |
|
|
|
|
|
|
|
|
|
__m128i b1 = _mm_add_epi16(_mm_srli_epi16(_mm_slli_epi16(r0, 8), 7), |
|
|
|
|
_mm_srli_epi16(_mm_slli_epi16(r2, 8), 7)); |
|
|
|
|
__m128i b0 = _mm_add_epi16(b1, _mm_srli_si128(b1, 2)); |
|
|
|
|
b1 = _mm_slli_epi16(_mm_srli_si128(b1, 2), 1); |
|
|
|
|
|
|
|
|
|
__m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 7), _mm_srli_epi16(r2, 7)); |
|
|
|
|
__m128i g1 = _mm_srli_epi16(_mm_slli_epi16(r1, 8), 7); |
|
|
|
|
g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2))); |
|
|
|
|
g1 = _mm_slli_epi16(_mm_srli_si128(g1, 2), 2); |
|
|
|
|
|
|
|
|
|
r0 = _mm_srli_epi16(r1, 8); |
|
|
|
|
r1 = _mm_slli_epi16(_mm_add_epi16(r0, _mm_srli_si128(r0, 2)), 2); |
|
|
|
|
r0 = _mm_slli_epi16(r0, 3); |
|
|
|
|
|
|
|
|
|
g0 = _mm_add_epi16(_mm_mulhi_epi16(b0, _b2y), _mm_mulhi_epi16(g0, _g2y)); |
|
|
|
|
g1 = _mm_add_epi16(_mm_mulhi_epi16(b1, _b2y), _mm_mulhi_epi16(g1, _g2y)); |
|
|
|
|
g0 = _mm_add_epi16(g0, _mm_mulhi_epi16(r0, _r2y)); |
|
|
|
|
g1 = _mm_add_epi16(g1, _mm_mulhi_epi16(r1, _r2y)); |
|
|
|
|
g0 = _mm_srli_epi16(g0, 2); |
|
|
|
|
g1 = _mm_srli_epi16(g1, 2); |
|
|
|
|
g0 = _mm_packus_epi16(g0, g0); |
|
|
|
|
g1 = _mm_packus_epi16(g1, g1); |
|
|
|
|
g0 = _mm_unpacklo_epi8(g0, g1); |
|
|
|
|
_mm_storeu_si128((__m128i*)dst, g0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return (int)(bayer - (bayer_end - width)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const |
|
|
|
|
{ |
|
|
|
|
if( !use_simd ) |
|
|
|
|
return 0; |
|
|
|
|
/*
|
|
|
|
|
B G B G | B G B G | B G B G | B G B G |
|
|
|
|
G R G R | G R G R | G R G R | G R G R |
|
|
|
|
B G B G | B G B G | B G B G | B G B G |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
__m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2); |
|
|
|
|
__m128i mask = _mm_set1_epi16(blue < 0 ? -1 : 0), z = _mm_setzero_si128(); |
|
|
|
|
__m128i masklo = _mm_set1_epi16(0x00ff); |
|
|
|
|
const uchar* bayer_end = bayer + width; |
|
|
|
|
|
|
|
|
|
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 ) |
|
|
|
|
{ |
|
|
|
|
__m128i r0 = _mm_loadu_si128((const __m128i*)bayer); |
|
|
|
|
__m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step)); |
|
|
|
|
__m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2)); |
|
|
|
|
|
|
|
|
|
__m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklo), _mm_and_si128(r2, masklo)); |
|
|
|
|
__m128i nextb1 = _mm_srli_si128(b1, 2); |
|
|
|
|
__m128i b0 = _mm_add_epi16(b1, nextb1); |
|
|
|
|
b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1); |
|
|
|
|
b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2); |
|
|
|
|
// b0 b2 ... b14 b1 b3 ... b15
|
|
|
|
|
b0 = _mm_packus_epi16(b0, b1); |
|
|
|
|
|
|
|
|
|
__m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 8), _mm_srli_epi16(r2, 8)); |
|
|
|
|
__m128i g1 = _mm_and_si128(r1, masklo); |
|
|
|
|
g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2))); |
|
|
|
|
g1 = _mm_srli_si128(g1, 2); |
|
|
|
|
g0 = _mm_srli_epi16(_mm_add_epi16(g0, delta2), 2); |
|
|
|
|
// g0 g2 ... g14 g1 g3 ... g15
|
|
|
|
|
g0 = _mm_packus_epi16(g0, g1); |
|
|
|
|
|
|
|
|
|
r0 = _mm_srli_epi16(r1, 8); |
|
|
|
|
r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2)); |
|
|
|
|
r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1); |
|
|
|
|
// r0 r2 ... r14 r1 r3 ... r15
|
|
|
|
|
r0 = _mm_packus_epi16(r0, r1); |
|
|
|
|
|
|
|
|
|
b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask); |
|
|
|
|
b0 = _mm_xor_si128(b0, b1); |
|
|
|
|
r0 = _mm_xor_si128(r0, b1); |
|
|
|
|
|
|
|
|
|
// b1 g1 b3 g3 b5 g5...
|
|
|
|
|
b1 = _mm_unpackhi_epi8(b0, g0); |
|
|
|
|
// b0 g0 b2 g2 b4 g4 ....
|
|
|
|
|
b0 = _mm_unpacklo_epi8(b0, g0); |
|
|
|
|
|
|
|
|
|
// r1 0 r3 0 r5 0 ...
|
|
|
|
|
r1 = _mm_unpackhi_epi8(r0, z); |
|
|
|
|
// r0 0 r2 0 r4 0 ...
|
|
|
|
|
r0 = _mm_unpacklo_epi8(r0, z); |
|
|
|
|
|
|
|
|
|
// 0 b0 g0 r0 0 b2 g2 r2 ...
|
|
|
|
|
g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1); |
|
|
|
|
// 0 b8 g8 r8 0 b10 g10 r10 ...
|
|
|
|
|
g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1); |
|
|
|
|
|
|
|
|
|
// b1 g1 r1 0 b3 g3 r3 0 ...
|
|
|
|
|
r0 = _mm_unpacklo_epi16(b1, r1); |
|
|
|
|
// b9 g9 r9 0 b11 g11 r11 0 ...
|
|
|
|
|
r1 = _mm_unpackhi_epi16(b1, r1); |
|
|
|
|
|
|
|
|
|
// 0 b0 g0 r0 b1 g1 r1 0 ...
|
|
|
|
|
b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1); |
|
|
|
|
// 0 b4 g4 r4 b5 g5 r5 0 ...
|
|
|
|
|
b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1); |
|
|
|
|
|
|
|
|
|
_mm_storel_epi64((__m128i*)(dst-1+0), b0); |
|
|
|
|
_mm_storel_epi64((__m128i*)(dst-1+6*1), _mm_srli_si128(b0, 8)); |
|
|
|
|
_mm_storel_epi64((__m128i*)(dst-1+6*2), b1); |
|
|
|
|
_mm_storel_epi64((__m128i*)(dst-1+6*3), _mm_srli_si128(b1, 8)); |
|
|
|
|
|
|
|
|
|
// 0 b8 g8 r8 b9 g9 r9 0 ...
|
|
|
|
|
g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1); |
|
|
|
|
// 0 b12 g12 r12 b13 g13 r13 0 ...
|
|
|
|
|
g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1); |
|
|
|
|
|
|
|
|
|
_mm_storel_epi64((__m128i*)(dst-1+6*4), g0); |
|
|
|
|
_mm_storel_epi64((__m128i*)(dst-1+6*5), _mm_srli_si128(g0, 8)); |
|
|
|
|
|
|
|
|
|
_mm_storel_epi64((__m128i*)(dst-1+6*6), g1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return (int)(bayer - (bayer_end - width)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int bayer2RGBA(const uchar*, int, uchar*, int, int) const |
|
|
|
|
{ |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int bayer2RGB_EA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const |
|
|
|
|
{ |
|
|
|
|
if (!use_simd) |
|
|
|
|
return 0; |
|
|
|
|
|
|
|
|
|
const uchar* bayer_end = bayer + width; |
|
|
|
|
__m128i masklow = _mm_set1_epi16(0x00ff); |
|
|
|
|
__m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2); |
|
|
|
|
__m128i full = _mm_set1_epi16(-1), z = _mm_setzero_si128(); |
|
|
|
|
__m128i mask = _mm_set1_epi16(blue > 0 ? -1 : 0); |
|
|
|
|
|
|
|
|
|
for ( ; bayer <= bayer_end - 18; bayer += 14, dst += 42) |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
B G B G | B G B G | B G B G | B G B G |
|
|
|
|
G R G R | G R G R | G R G R | G R G R |
|
|
|
|
B G B G | B G B G | B G B G | B G B G |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
__m128i r0 = _mm_loadu_si128((const __m128i*)bayer); |
|
|
|
|
__m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step)); |
|
|
|
|
__m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2)); |
|
|
|
|
|
|
|
|
|
__m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklow), _mm_and_si128(r2, masklow)); |
|
|
|
|
__m128i nextb1 = _mm_srli_si128(b1, 2); |
|
|
|
|
__m128i b0 = _mm_add_epi16(b1, nextb1); |
|
|
|
|
b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1); |
|
|
|
|
b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2); |
|
|
|
|
// b0 b2 ... b14 b1 b3 ... b15
|
|
|
|
|
b0 = _mm_packus_epi16(b0, b1); |
|
|
|
|
|
|
|
|
|
// vertical sum
|
|
|
|
|
__m128i r0g = _mm_srli_epi16(r0, 8); |
|
|
|
|
__m128i r2g = _mm_srli_epi16(r2, 8); |
|
|
|
|
__m128i sumv = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(r0g, r2g), delta1), 1); |
|
|
|
|
// gorizontal sum
|
|
|
|
|
__m128i g1 = _mm_and_si128(masklow, r1); |
|
|
|
|
__m128i nextg1 = _mm_srli_si128(g1, 2); |
|
|
|
|
__m128i sumg = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(g1, nextg1), delta1), 1); |
|
|
|
|
|
|
|
|
|
// gradients
|
|
|
|
|
__m128i gradv = _mm_adds_epi16(_mm_subs_epu16(r0g, r2g), _mm_subs_epu16(r2g, r0g)); |
|
|
|
|
__m128i gradg = _mm_adds_epi16(_mm_subs_epu16(nextg1, g1), _mm_subs_epu16(g1, nextg1)); |
|
|
|
|
__m128i gmask = _mm_cmpgt_epi16(gradg, gradv); |
|
|
|
|
|
|
|
|
|
__m128i g0 = _mm_add_epi16(_mm_and_si128(gmask, sumv), _mm_and_si128(sumg, _mm_xor_si128(gmask, full))); |
|
|
|
|
// g0 g2 ... g14 g1 g3 ...
|
|
|
|
|
g0 = _mm_packus_epi16(g0, nextg1); |
|
|
|
|
|
|
|
|
|
r0 = _mm_srli_epi16(r1, 8); |
|
|
|
|
r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2)); |
|
|
|
|
r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1); |
|
|
|
|
// r0 r2 ... r14 r1 r3 ... r15
|
|
|
|
|
r0 = _mm_packus_epi16(r0, r1); |
|
|
|
|
|
|
|
|
|
b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask); |
|
|
|
|
b0 = _mm_xor_si128(b0, b1); |
|
|
|
|
r0 = _mm_xor_si128(r0, b1); |
|
|
|
|
|
|
|
|
|
// b1 g1 b3 g3 b5 g5...
|
|
|
|
|
b1 = _mm_unpackhi_epi8(b0, g0); |
|
|
|
|
// b0 g0 b2 g2 b4 g4 ....
|
|
|
|
|
b0 = _mm_unpacklo_epi8(b0, g0); |
|
|
|
|
|
|
|
|
|
// r1 0 r3 0 r5 0 ...
|
|
|
|
|
r1 = _mm_unpackhi_epi8(r0, z); |
|
|
|
|
// r0 0 r2 0 r4 0 ...
|
|
|
|
|
r0 = _mm_unpacklo_epi8(r0, z); |
|
|
|
|
|
|
|
|
|
// 0 b0 g0 r0 0 b2 g2 r2 ...
|
|
|
|
|
g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1); |
|
|
|
|
// 0 b8 g8 r8 0 b10 g10 r10 ...
|
|
|
|
|
g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1); |
|
|
|
|
|
|
|
|
|
// b1 g1 r1 0 b3 g3 r3 0 ...
|
|
|
|
|
r0 = _mm_unpacklo_epi16(b1, r1); |
|
|
|
|
// b9 g9 r9 0 b11 g11 r11 0 ...
|
|
|
|
|
r1 = _mm_unpackhi_epi16(b1, r1); |
|
|
|
|
|
|
|
|
|
// 0 b0 g0 r0 b1 g1 r1 0 ...
|
|
|
|
|
b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1); |
|
|
|
|
// 0 b4 g4 r4 b5 g5 r5 0 ...
|
|
|
|
|
b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1); |
|
|
|
|
|
|
|
|
|
_mm_storel_epi64((__m128i*)(dst+0), b0); |
|
|
|
|
_mm_storel_epi64((__m128i*)(dst+6*1), _mm_srli_si128(b0, 8)); |
|
|
|
|
_mm_storel_epi64((__m128i*)(dst+6*2), b1); |
|
|
|
|
_mm_storel_epi64((__m128i*)(dst+6*3), _mm_srli_si128(b1, 8)); |
|
|
|
|
|
|
|
|
|
// 0 b8 g8 r8 b9 g9 r9 0 ...
|
|
|
|
|
g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1); |
|
|
|
|
// 0 b12 g12 r12 b13 g13 r13 0 ...
|
|
|
|
|
g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1); |
|
|
|
|
|
|
|
|
|
_mm_storel_epi64((__m128i*)(dst+6*4), g0); |
|
|
|
|
_mm_storel_epi64((__m128i*)(dst+6*5), _mm_srli_si128(g0, 8)); |
|
|
|
|
|
|
|
|
|
_mm_storel_epi64((__m128i*)(dst+6*6), g1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return int(bayer - (bayer_end - width)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
bool use_simd; |
|
|
|
|
}; |
|
|
|
|
#elif CV_NEON |
|
|
|
|
class SIMDBayerInterpolator_8u |
|
|
|
|
{ |
|
|
|
|
public: |
|
|
|
|
SIMDBayerInterpolator_8u() |
|
|
|
|
{ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst, |
|
|
|
|
int width, int bcoeff, int gcoeff, int rcoeff) const |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
B G B G | B G B G | B G B G | B G B G |
|
|
|
|
G R G R | G R G R | G R G R | G R G R |
|
|
|
|
B G B G | B G B G | B G B G | B G B G |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
#if CV_NEON |
|
|
|
|
uint16x8_t masklo = vdupq_n_u16(255); |
|
|
|
|
const uchar* bayer_end = bayer + width; |
|
|
|
|
|
|
|
|
@ -440,6 +176,40 @@ public: |
|
|
|
|
vst1_u8(dst, p.val[0]); |
|
|
|
|
vst1_u8(dst + 8, p.val[1]); |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
v_uint16x8 _b2y = v_setall_u16((ushort)(rcoeff*2)); |
|
|
|
|
v_uint16x8 _g2y = v_setall_u16((ushort)(gcoeff*2)); |
|
|
|
|
v_uint16x8 _r2y = v_setall_u16((ushort)(bcoeff*2)); |
|
|
|
|
const uchar* bayer_end = bayer + width; |
|
|
|
|
|
|
|
|
|
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 ) |
|
|
|
|
{ |
|
|
|
|
v_uint16x8 r0 = v_load((ushort*)bayer); |
|
|
|
|
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step)); |
|
|
|
|
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2)); |
|
|
|
|
|
|
|
|
|
v_uint16x8 b1 = ((r0 << 8) >> 7) + ((r2 << 8) >> 7); |
|
|
|
|
v_uint16x8 b0 = v_rotate_right<1>(b1) + b1; |
|
|
|
|
b1 = v_rotate_right<1>(b1) << 1; |
|
|
|
|
|
|
|
|
|
v_uint16x8 g0 = (r0 >> 7) + (r2 >> 7); |
|
|
|
|
v_uint16x8 g1 = (r1 << 8) >> 7; |
|
|
|
|
g0 += v_rotate_right<1>(g1) + g1; |
|
|
|
|
g1 = v_rotate_right<1>(g1) << 2; |
|
|
|
|
|
|
|
|
|
r0 = r1 >> 8; |
|
|
|
|
r1 = (v_rotate_right<1>(r0) + r0) << 2; |
|
|
|
|
r0 = r0 << 3; |
|
|
|
|
|
|
|
|
|
g0 = (v_mul_hi(b0, _b2y) + v_mul_hi(g0, _g2y) + v_mul_hi(r0, _r2y)) >> 2; |
|
|
|
|
g1 = (v_mul_hi(b1, _b2y) + v_mul_hi(g1, _g2y) + v_mul_hi(r1, _r2y)) >> 2; |
|
|
|
|
v_uint8x16 pack_lo, pack_hi; |
|
|
|
|
v_zip(v_pack_u(v_reinterpret_as_s16(g0), v_reinterpret_as_s16(g0)), |
|
|
|
|
v_pack_u(v_reinterpret_as_s16(g1), v_reinterpret_as_s16(g1)), |
|
|
|
|
pack_lo, pack_hi); |
|
|
|
|
v_store(dst, pack_lo); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
return (int)(bayer - (bayer_end - width)); |
|
|
|
|
} |
|
|
|
@ -451,6 +221,8 @@ public: |
|
|
|
|
G R G R | G R G R | G R G R | G R G R |
|
|
|
|
B G B G | B G B G | B G B G | B G B G |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
#if CV_NEON |
|
|
|
|
uint16x8_t masklo = vdupq_n_u16(255); |
|
|
|
|
uint8x16x3_t pix; |
|
|
|
|
const uchar* bayer_end = bayer + width; |
|
|
|
@ -484,21 +256,109 @@ public: |
|
|
|
|
|
|
|
|
|
vst3q_u8(dst-1, pix); |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
v_uint16x8 delta1 = v_setall_u16(1), delta2 = v_setall_u16(2); |
|
|
|
|
v_uint16x8 mask = v_setall_u16(blue < 0 ? (ushort)(-1) : 0); |
|
|
|
|
v_uint16x8 masklo = v_setall_u16(0x00ff); |
|
|
|
|
v_uint8x16 z = v_setzero_u8(); |
|
|
|
|
const uchar* bayer_end = bayer + width; |
|
|
|
|
|
|
|
|
|
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 ) |
|
|
|
|
{ |
|
|
|
|
v_uint16x8 r0 = v_load((ushort*)bayer); |
|
|
|
|
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step)); |
|
|
|
|
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2)); |
|
|
|
|
|
|
|
|
|
v_uint16x8 b1 = (r0 & masklo) + (r2 & masklo); |
|
|
|
|
v_uint16x8 nextb1 = v_rotate_right<1>(b1); |
|
|
|
|
v_uint16x8 b0 = b1 + nextb1; |
|
|
|
|
b1 = (nextb1 + delta1) >> 1; |
|
|
|
|
b0 = (b0 + delta2) >> 2; |
|
|
|
|
// b0 b2 ... b14 b1 b3 ... b15
|
|
|
|
|
b0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(b0), v_reinterpret_as_s16(b1))); |
|
|
|
|
|
|
|
|
|
v_uint16x8 g0 = (r0 >> 8) + (r2 >> 8); |
|
|
|
|
v_uint16x8 g1 = r1 & masklo; |
|
|
|
|
g0 += v_rotate_right<1>(g1) + g1; |
|
|
|
|
g1 = v_rotate_right<1>(g1); |
|
|
|
|
g0 = (g0 + delta2) >> 2; |
|
|
|
|
// g0 g2 ... g14 g1 g3 ... g15
|
|
|
|
|
g0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(g0), v_reinterpret_as_s16(g1))); |
|
|
|
|
|
|
|
|
|
r0 = r1 >> 8; |
|
|
|
|
r1 = v_rotate_right<1>(r0) + r0; |
|
|
|
|
r1 = (r1 + delta1) >> 1; |
|
|
|
|
// r0 r2 ... r14 r1 r3 ... r15
|
|
|
|
|
r0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(r0), v_reinterpret_as_s16(r1))); |
|
|
|
|
|
|
|
|
|
b1 = (b0 ^ r0) & mask; |
|
|
|
|
b0 = b0 ^ b1; |
|
|
|
|
r0 = r0 ^ b1; |
|
|
|
|
|
|
|
|
|
// b1 g1 b3 g3 b5 g5...
|
|
|
|
|
v_uint8x16 pack_lo, pack_hi; |
|
|
|
|
v_zip(v_reinterpret_as_u8(b0), v_reinterpret_as_u8(g0), pack_lo, pack_hi); |
|
|
|
|
b1 = v_reinterpret_as_u16(pack_hi); |
|
|
|
|
// b0 g0 b2 g2 b4 g4 ....
|
|
|
|
|
b0 = v_reinterpret_as_u16(pack_lo); |
|
|
|
|
|
|
|
|
|
// r1 0 r3 0 r5 0 ...
|
|
|
|
|
v_zip(v_reinterpret_as_u8(r0), z, pack_lo, pack_hi); |
|
|
|
|
r1 = v_reinterpret_as_u16(pack_hi); |
|
|
|
|
// r0 0 r2 0 r4 0 ...
|
|
|
|
|
r0 = v_reinterpret_as_u16(pack_lo); |
|
|
|
|
|
|
|
|
|
// 0 b0 g0 r0 0 b2 g2 r2 ...
|
|
|
|
|
v_zip(b0, r0, g0, g1); |
|
|
|
|
g0 = v_reinterpret_as_u16(v_rotate_left<1>(v_reinterpret_as_u8(g0))); |
|
|
|
|
// 0 b8 g8 r8 0 b10 g10 r10 ...
|
|
|
|
|
g1 = v_reinterpret_as_u16(v_rotate_left<1>(v_reinterpret_as_u8(g1))); |
|
|
|
|
|
|
|
|
|
// b1 g1 r1 0 b3 g3 r3 0 ...
|
|
|
|
|
v_zip(b1, r1, r0, r1); |
|
|
|
|
// b9 g9 r9 0 b11 g11 r11 0 ...
|
|
|
|
|
|
|
|
|
|
// 0 b0 g0 r0 b1 g1 r1 0 ...
|
|
|
|
|
v_uint32x4 pack32_lo, pack32_hi; |
|
|
|
|
v_zip(v_reinterpret_as_u32(g0), v_reinterpret_as_u32(r0), pack32_lo, pack32_hi); |
|
|
|
|
b0 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_lo))); |
|
|
|
|
// 0 b4 g4 r4 b5 g5 r5 0 ...
|
|
|
|
|
b1 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_hi))); |
|
|
|
|
|
|
|
|
|
v_store_low(dst-1+0, v_reinterpret_as_u8(b0)); |
|
|
|
|
v_store_high(dst-1+6*1, v_reinterpret_as_u8(b0)); |
|
|
|
|
v_store_low(dst-1+6*2, v_reinterpret_as_u8(b1)); |
|
|
|
|
v_store_high(dst-1+6*3, v_reinterpret_as_u8(b1)); |
|
|
|
|
|
|
|
|
|
// 0 b8 g8 r8 b9 g9 r9 0 ...
|
|
|
|
|
v_zip(v_reinterpret_as_u32(g1), v_reinterpret_as_u32(r1), pack32_lo, pack32_hi); |
|
|
|
|
g0 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_lo))); |
|
|
|
|
// 0 b12 g12 r12 b13 g13 r13 0 ...
|
|
|
|
|
g1 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_hi))); |
|
|
|
|
|
|
|
|
|
v_store_low(dst-1+6*4, v_reinterpret_as_u8(g0)); |
|
|
|
|
v_store_high(dst-1+6*5, v_reinterpret_as_u8(g0)); |
|
|
|
|
|
|
|
|
|
v_store_low(dst-1+6*6, v_reinterpret_as_u8(g1)); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
return (int)(bayer - (bayer_end - width)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int bayer2RGBA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const |
|
|
|
|
int bayer2RGBA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue, const uchar alpha) const |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
B G B G | B G B G | B G B G | B G B G |
|
|
|
|
G R G R | G R G R | G R G R | G R G R |
|
|
|
|
B G B G | B G B G | B G B G | B G B G |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
#if CV_NEON |
|
|
|
|
uint16x8_t masklo = vdupq_n_u16(255); |
|
|
|
|
uint8x16x4_t pix; |
|
|
|
|
const uchar* bayer_end = bayer + width; |
|
|
|
|
pix.val[3] = vdupq_n_u8(255); |
|
|
|
|
pix.val[3] = vdupq_n_u8(alpha); |
|
|
|
|
|
|
|
|
|
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 56 ) |
|
|
|
|
{ |
|
|
|
@ -529,13 +389,198 @@ public: |
|
|
|
|
|
|
|
|
|
vst4q_u8(dst-1, pix); |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
v_uint16x8 delta1 = v_setall_u16(1), delta2 = v_setall_u16(2); |
|
|
|
|
v_uint16x8 mask = v_setall_u16(blue < 0 ? (ushort)(-1) : 0); |
|
|
|
|
v_uint16x8 masklo = v_setall_u16(0x00ff); |
|
|
|
|
v_uint8x16 a = v_setall_u8(alpha); |
|
|
|
|
const uchar* bayer_end = bayer + width; |
|
|
|
|
|
|
|
|
|
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 56 ) |
|
|
|
|
{ |
|
|
|
|
v_uint16x8 r0 = v_load((ushort*)bayer); |
|
|
|
|
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step)); |
|
|
|
|
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2)); |
|
|
|
|
|
|
|
|
|
v_uint16x8 b1 = (r0 & masklo) + (r2 & masklo); |
|
|
|
|
v_uint16x8 nextb1 = v_rotate_right<1>(b1); |
|
|
|
|
v_uint16x8 b0 = b1 + nextb1; |
|
|
|
|
b1 = (nextb1 + delta1) >> 1; |
|
|
|
|
b0 = (b0 + delta2) >> 2; |
|
|
|
|
// b0 b2 ... b14 b1 b3 ... b15
|
|
|
|
|
b0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(b0), v_reinterpret_as_s16(b1))); |
|
|
|
|
|
|
|
|
|
v_uint16x8 g0 = (r0 >> 8) + (r2 >> 8); |
|
|
|
|
v_uint16x8 g1 = r1 & masklo; |
|
|
|
|
g0 += v_rotate_right<1>(g1) + g1; |
|
|
|
|
g1 = v_rotate_right<1>(g1); |
|
|
|
|
g0 = (g0 + delta2) >> 2; |
|
|
|
|
// g0 g2 ... g14 g1 g3 ... g15
|
|
|
|
|
g0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(g0), v_reinterpret_as_s16(g1))); |
|
|
|
|
|
|
|
|
|
r0 = r1 >> 8; |
|
|
|
|
r1 = v_rotate_right<1>(r0) + r0; |
|
|
|
|
r1 = (r1 + delta1) >> 1; |
|
|
|
|
// r0 r2 ... r14 r1 r3 ... r15
|
|
|
|
|
r0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(r0), v_reinterpret_as_s16(r1))); |
|
|
|
|
|
|
|
|
|
b1 = (b0 ^ r0) & mask; |
|
|
|
|
b0 = b0 ^ b1; |
|
|
|
|
r0 = r0 ^ b1; |
|
|
|
|
|
|
|
|
|
// b1 g1 b3 g3 b5 g5...
|
|
|
|
|
v_uint8x16 pack_lo, pack_hi; |
|
|
|
|
v_zip(v_reinterpret_as_u8(b0), v_reinterpret_as_u8(g0), pack_lo, pack_hi); |
|
|
|
|
b1 = v_reinterpret_as_u16(pack_hi); |
|
|
|
|
// b0 g0 b2 g2 b4 g4 ....
|
|
|
|
|
b0 = v_reinterpret_as_u16(pack_lo); |
|
|
|
|
|
|
|
|
|
// r1 a r3 a r5 a ...
|
|
|
|
|
v_zip(v_reinterpret_as_u8(r0), a, pack_lo, pack_hi); |
|
|
|
|
r1 = v_reinterpret_as_u16(pack_hi); |
|
|
|
|
// r0 a r2 a r4 a ...
|
|
|
|
|
r0 = v_reinterpret_as_u16(pack_lo); |
|
|
|
|
|
|
|
|
|
// a b0 g0 r0 a b2 g2 r2 ...
|
|
|
|
|
v_zip(b0, r0, g0, g1); |
|
|
|
|
// a b8 g8 r8 a b10 g10 r10 ...
|
|
|
|
|
|
|
|
|
|
// b1 g1 r1 a b3 g3 r3 a ...
|
|
|
|
|
v_zip(b1, r1, r0, r1); |
|
|
|
|
// b9 g9 r9 a b11 g11 r11 a ...
|
|
|
|
|
|
|
|
|
|
// a b0 g0 r0 b1 g1 r1 a ...
|
|
|
|
|
v_uint32x4 pack32_lo, pack32_hi; |
|
|
|
|
v_zip(v_reinterpret_as_u32(g0), v_reinterpret_as_u32(r0), pack32_lo, pack32_hi); |
|
|
|
|
b0 = v_reinterpret_as_u16(pack32_lo); |
|
|
|
|
// a b4 g4 r4 b5 g5 r5 a ...
|
|
|
|
|
b1 = v_reinterpret_as_u16(pack32_hi); |
|
|
|
|
|
|
|
|
|
v_store_low(dst-1+0, v_reinterpret_as_u8(b0)); |
|
|
|
|
v_store_high(dst-1+8*1, v_reinterpret_as_u8(b0)); |
|
|
|
|
v_store_low(dst-1+8*2, v_reinterpret_as_u8(b1)); |
|
|
|
|
v_store_high(dst-1+8*3, v_reinterpret_as_u8(b1)); |
|
|
|
|
|
|
|
|
|
// a b8 g8 r8 b9 g9 r9 a ...
|
|
|
|
|
v_zip(v_reinterpret_as_u32(g1), v_reinterpret_as_u32(r1), pack32_lo, pack32_hi); |
|
|
|
|
g0 = v_reinterpret_as_u16(pack32_lo); |
|
|
|
|
// a b12 g12 r12 b13 g13 r13 a ...
|
|
|
|
|
g1 = v_reinterpret_as_u16(pack32_hi); |
|
|
|
|
|
|
|
|
|
v_store_low(dst-1+8*4, v_reinterpret_as_u8(g0)); |
|
|
|
|
v_store_high(dst-1+8*5, v_reinterpret_as_u8(g0)); |
|
|
|
|
|
|
|
|
|
v_store_low(dst-1+8*6, v_reinterpret_as_u8(g1)); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
return (int)(bayer - (bayer_end - width)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int bayer2RGB_EA(const uchar*, int, uchar*, int, int) const |
|
|
|
|
int bayer2RGB_EA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const |
|
|
|
|
{ |
|
|
|
|
return 0; |
|
|
|
|
const uchar* bayer_end = bayer + width; |
|
|
|
|
v_uint16x8 masklow = v_setall_u16(0x00ff); |
|
|
|
|
v_uint16x8 delta1 = v_setall_u16(1), delta2 = v_setall_u16(2); |
|
|
|
|
v_uint16x8 full = v_setall_u16((ushort)(-1)); |
|
|
|
|
v_uint8x16 z = v_setzero_u8(); |
|
|
|
|
v_uint16x8 mask = v_setall_u16(blue > 0 ? (ushort)(-1) : 0); |
|
|
|
|
|
|
|
|
|
for ( ; bayer <= bayer_end - 18; bayer += 14, dst += 42) |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
B G B G | B G B G | B G B G | B G B G |
|
|
|
|
G R G R | G R G R | G R G R | G R G R |
|
|
|
|
B G B G | B G B G | B G B G | B G B G |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
v_uint16x8 r0 = v_load((ushort*)bayer); |
|
|
|
|
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step)); |
|
|
|
|
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2)); |
|
|
|
|
|
|
|
|
|
v_uint16x8 b1 = (r0 & masklow) + (r2 & masklow); |
|
|
|
|
v_uint16x8 nextb1 = v_rotate_right<1>(b1); |
|
|
|
|
v_uint16x8 b0 = b1 + nextb1; |
|
|
|
|
b1 = (nextb1 + delta1) >> 1; |
|
|
|
|
b0 = (b0 + delta2) >> 2; |
|
|
|
|
// b0 b2 ... b14 b1 b3 ... b15
|
|
|
|
|
b0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(b0), v_reinterpret_as_s16(b1))); |
|
|
|
|
|
|
|
|
|
// vertical sum
|
|
|
|
|
v_uint16x8 r0g = r0 >> 8; |
|
|
|
|
v_uint16x8 r2g = r2 >> 8; |
|
|
|
|
v_uint16x8 sumv = ((r0g + r2g) + delta1) >> 1; |
|
|
|
|
// horizontal sum
|
|
|
|
|
v_uint16x8 g1 = r1 & masklow; |
|
|
|
|
v_uint16x8 nextg1 = v_rotate_right<1>(g1); |
|
|
|
|
v_uint16x8 sumg = (g1 + nextg1 + delta1) >> 1; |
|
|
|
|
|
|
|
|
|
// gradients
|
|
|
|
|
v_uint16x8 gradv = (r0g - r2g) + (r2g - r0g); |
|
|
|
|
v_uint16x8 gradg = (nextg1 - g1) + (g1 - nextg1); |
|
|
|
|
v_uint16x8 gmask = gradg > gradv; |
|
|
|
|
v_uint16x8 g0 = (gmask & sumv) + (sumg & (gmask ^ full)); |
|
|
|
|
// g0 g2 ... g14 g1 g3 ...
|
|
|
|
|
g0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(g0), v_reinterpret_as_s16(nextg1))); |
|
|
|
|
|
|
|
|
|
r0 = r1 >> 8; |
|
|
|
|
r1 = v_rotate_right<1>(r0) + r0; |
|
|
|
|
r1 = (r1 + delta1) >> 1; |
|
|
|
|
// r0 r2 ... r14 r1 r3 ... r15
|
|
|
|
|
r0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(r0), v_reinterpret_as_s16(r1))); |
|
|
|
|
|
|
|
|
|
b1 = (b0 ^ r0) & mask; |
|
|
|
|
b0 = b0 ^ b1; |
|
|
|
|
r0 = r0 ^ b1; |
|
|
|
|
|
|
|
|
|
// b1 g1 b3 g3 b5 g5...
|
|
|
|
|
v_uint8x16 pack_lo, pack_hi; |
|
|
|
|
v_zip(v_reinterpret_as_u8(b0), v_reinterpret_as_u8(g0), pack_lo, pack_hi); |
|
|
|
|
b1 = v_reinterpret_as_u16(pack_hi); |
|
|
|
|
// b0 g0 b2 g2 b4 g4 ....
|
|
|
|
|
b0 = v_reinterpret_as_u16(pack_lo); |
|
|
|
|
|
|
|
|
|
// r1 0 r3 0 r5 0 ...
|
|
|
|
|
v_zip(v_reinterpret_as_u8(r0), z, pack_lo, pack_hi); |
|
|
|
|
r1 = v_reinterpret_as_u16(pack_hi); |
|
|
|
|
// r0 0 r2 0 r4 0 ...
|
|
|
|
|
r0 = v_reinterpret_as_u16(pack_lo); |
|
|
|
|
|
|
|
|
|
// 0 b0 g0 r0 0 b2 g2 r2 ...
|
|
|
|
|
v_zip(b0, r0, g0, g1); |
|
|
|
|
g0 = v_reinterpret_as_u16(v_rotate_left<1>(v_reinterpret_as_u8(g0))); |
|
|
|
|
// 0 b8 g8 r8 0 b10 g10 r10 ...
|
|
|
|
|
g1 = v_reinterpret_as_u16(v_rotate_left<1>(v_reinterpret_as_u8(g1))); |
|
|
|
|
|
|
|
|
|
// b1 g1 r1 0 b3 g3 r3 0 ...
|
|
|
|
|
v_zip(b1, r1, r0, r1); |
|
|
|
|
// b9 g9 r9 0 b11 g11 r11 0 ...
|
|
|
|
|
|
|
|
|
|
// 0 b0 g0 r0 b1 g1 r1 0 ...
|
|
|
|
|
v_uint32x4 pack32_lo, pack32_hi; |
|
|
|
|
v_zip(v_reinterpret_as_u32(g0), v_reinterpret_as_u32(r0), pack32_lo, pack32_hi); |
|
|
|
|
b0 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_lo))); |
|
|
|
|
// 0 b4 g4 r4 b5 g5 r5 0 ...
|
|
|
|
|
b1 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_hi))); |
|
|
|
|
|
|
|
|
|
v_store_low(dst+0, v_reinterpret_as_u8(b0)); |
|
|
|
|
v_store_high(dst+6*1, v_reinterpret_as_u8(b0)); |
|
|
|
|
v_store_low(dst+6*2, v_reinterpret_as_u8(b1)); |
|
|
|
|
v_store_high(dst+6*3, v_reinterpret_as_u8(b1)); |
|
|
|
|
|
|
|
|
|
// 0 b8 g8 r8 b9 g9 r9 0 ...
|
|
|
|
|
v_zip(v_reinterpret_as_u32(g1), v_reinterpret_as_u32(r1), pack32_lo, pack32_hi); |
|
|
|
|
g0 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_lo))); |
|
|
|
|
// 0 b12 g12 r12 b13 g13 r13 0 ...
|
|
|
|
|
g1 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_hi))); |
|
|
|
|
|
|
|
|
|
v_store_low(dst+6*4, v_reinterpret_as_u8(g0)); |
|
|
|
|
v_store_high(dst+6*5, v_reinterpret_as_u8(g0)); |
|
|
|
|
|
|
|
|
|
v_store_low(dst+6*6, v_reinterpret_as_u8(g1)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return int(bayer - (bayer_end - width)); |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
#else |
|
|
|
@ -775,7 +820,7 @@ public: |
|
|
|
|
|
|
|
|
|
// simd optimization only for dcn == 3
|
|
|
|
|
int delta = dcn == 4 ? |
|
|
|
|
vecOp.bayer2RGBA(bayer, bayer_step, dst, size.width, blue) : |
|
|
|
|
vecOp.bayer2RGBA(bayer, bayer_step, dst, size.width, blue, alpha) : |
|
|
|
|
vecOp.bayer2RGB(bayer, bayer_step, dst, size.width, blue); |
|
|
|
|
bayer += delta; |
|
|
|
|
dst += delta*dcn; |
|
|
|
|