Drop best optimizations to reduce code size

Only keep the ICV_HLINE_X optimization to reduce code size.
pull/8182/head
chacha21 8 years ago
parent 8c7d29e526
commit fa4fd48072
  1. 669
      modules/imgproc/src/drawing.cpp

@ -1071,275 +1071,6 @@ EllipseEx( Mat& img, Point2l center, Size2l axes,
* Polygons filling *
\****************************************************************************************/
//Endian macros stolen from SQLITE
#if (defined(i386) || defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || \
defined(_M_AMD64) || defined(_M_ARM) || defined(__x86) || \
defined(__arm__) || defined(__aarch64__) || defined(_LITTLE_ENDIAN) || defined(LITTLE_ENDIAN))
# define OPENCV_BYTEORDER 1234
# define OPENCV_BIGENDIAN 0
# define OPENCV_LITTLEENDIAN 1
#elif (defined(sparc) || defined(__ppc__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN))
# define OPENCV_BYTEORDER 4321
# define OPENCV_BIGENDIAN 1
# define OPENCV_LITTLEENDIAN 0
#endif
#if !defined(OPENCV_BYTEORDER)
# define OPENCV_BYTEORDER 0
static const int opencvOne = 1;
# define OPENCV_BIGENDIAN (*((const char *)(&opencvOne))==0)
# define OPENCV_LITTLEENDIAN (*((const char *)(&opencvOne))==1)
#endif
# if defined(_MSC_VER) && _MSC_VER>=1400
# if !defined(_WIN32_WCE)
# include <intrin.h>
# pragma intrinsic(_byteswap_ushort)
# pragma intrinsic(_byteswap_ulong)
# pragma intrinsic(_byteswap_uint64)
# pragma intrinsic(_ReadWriteBarrier)
# else
# include <cmnintrin.h>
# endif
# endif
static inline uint16_t opencvLittleToHost16(const uchar* p){
#if OPENCV_BYTEORDER==1234
uint16_t x;
memcpy(&x,p,sizeof(x));
return x;
#elif OPENCV_BYTEORDER==4321 && defined(__GNUC__)
uint16_t x;
memcpy(&x,p,sizeof(x));
return (p[0]<<8) | (p[1]>>8);
#elif OPENCV_BYTEORDER==4321 && defined(_MSC_VER) && _MSC_VER>=1300
uint16_t x;
memcpy(&x,p,sizeof(x));
return _byteswap_ushort(x);
#elif OPENCV_LITTLEENDIAN
return x;
#else
return (p[0]<<8) | (p[1]>>8);
#endif
}
/*
static inline uint16_t opencvLittleToHost16(uint16_t x){
#if OPENCV_LITTLEENDIAN
return x;
#else
return opencvLittleToHost16((const uchar*)&x);
#endif
}
*/
static inline uint32_t opencvLittleToHost32(const uchar* p){
#if OPENCV_BYTEORDER==1234
uint32_t x;
memcpy(&x,p,sizeof(x));
return x;
#elif OPENCV_BYTEORDER==4321 && defined(__GNUC__)
uint32_t x;
memcpy(&x,p,sizeof(x));
return __builtin_bswap32(x);
#elif OPENCV_BYTEORDER==4321 && defined(_MSC_VER) && _MSC_VER>=1300
uint32_t x;
memcpy(&x,p,sizeof(x));
return _byteswap_ulong(x);
#elif OPENCV_LITTLEENDIAN
return x;
#else
return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
#endif
}
static inline uint32_t opencvLittleToHost32(uint32_t x){
#if OPENCV_LITTLEENDIAN
return x;
#else
return opencvLittleToHost32((const uchar*)&x);
#endif
}
static inline uint64_t opencvLittleToHost64(const uchar* p){
#if OPENCV_BYTEORDER==1234
uint64_t x;
memcpy(&x,p,sizeof(x));
return x;
#elif OPENCV_BYTEORDER==4321 && defined(__GNUC__)
uint64_t x;
memcpy(&x,p,sizeof(x));
return __builtin_bswap64(x);
#elif OPENCV_BYTEORDER==4321 && defined(_MSC_VER) && _MSC_VER>=1300
uint64_t x;
memcpy(&x,p,sizeof(x));
return _byteswap_uint64(x);
#elif OPENCV_LITTLEENDIAN
return x;
#else
return (p[0]<<56) | (p[1]<<40) | (p[2]<<24) | (p[3]<<8) | (p[4]>>8) | (p[5]>>24) | (p[6]>>40) | (p[7]>>56);
#endif
}
static inline uint64_t opencvLittleToHost64(uint64_t x){
#if OPENCV_LITTLEENDIAN
return x;
#else
return opencvLittleToHost64((const uchar*)&x);
#endif
}
/* helper macros: filling horizontal row */
#define is_aligned(POINTER, BYTE_COUNT) (((uintptr_t)(const void *)(POINTER)) % (BYTE_COUNT) == 0)
/*#define ICV_HLINE( ptr, xl, xr, color, pix_size ) \
{ \
uchar* hline_ptr = (uchar*)(ptr) + (xl)*(pix_size); \
uchar* hline_max_ptr = (uchar*)(ptr) + (xr)*(pix_size); \
\
for( ; hline_ptr <= hline_max_ptr; hline_ptr += (pix_size))\
{ \
int hline_j; \
for( hline_j = 0; hline_j < (pix_size); hline_j++ ) \
{ \
hline_ptr[hline_j] = ((uchar*)color)[hline_j]; \
} \
} \
}*/
/*
template <unsigned pix_size_forced>
static inline void icv_hline_impl(uchar* ptr, size_t xl, size_t xr, const uchar* color, unsigned pix_size_)
{
const unsigned pix_size = pix_size_forced ? pix_size_forced : pix_size_;
uchar* hline_ptr = ptr + xl*pix_size;
uchar* hline_max_ptr = ptr + xr*pix_size;
for ( ; hline_ptr <= hline_max_ptr; hline_ptr += pix_size)
{
for (unsigned c = 0; c < pix_size; c++)
{
hline_ptr[c] = color[c];
}
}
}
#define ICV_HLINE( ptr, xl, xr, color, pix_size ) \
{ \
if (pix_size == 1) \
icv_hline_impl<1>((uchar*)ptr, (xl), (xr), (const uchar*)color,pix_size); \
else if (pix_size == 3) \
icv_hline_impl<3>((uchar*)ptr, (xl), (xr), (const uchar*)color, pix_size); \
else if (pix_size == 4) \
icv_hline_impl<4>((uchar*)ptr, (xl), (xr), (const uchar*)color, pix_size); \
else \
icv_hline_impl<0>((uchar*)ptr, (xl), (xr), (const uchar*)color, pix_size); \
}
*/
/*
#define ICV_HLINE( ptr, xl, xr, color, pix_size ) \
if((pix_size) == 1) \
{ \
uchar* hline_ptr = (uchar*)(ptr) + (xl); \
uchar* hline_max_ptr = (uchar*)(ptr) + (xr); \
uchar hline_c = *(const uchar*)(color); \
\
memset(hline_ptr, hline_c, (hline_max_ptr - hline_ptr) + 1); \
} \
else if((pix_size) == 3) \
{ \
uchar* hline_ptr = (uchar*)(ptr) + (xl)*3; \
uchar* hline_end = (uchar*)(ptr) + (xr+1)*3; \
uchar* hbody24_start = std::min(hline_end, (uchar*)(24*(((uintptr_t)(hline_ptr)+23)/24))); \
uchar* hbody24_end = std::min(hline_end, (uchar*)(24*(((uintptr_t)(hline_end))/24))); \
uchar* hbody12_start = std::min(hline_end, (uchar*)(12*(((uintptr_t)(hline_ptr)+11)/12))); \
uchar* hbody12_end = std::min(hline_end, (uchar*)(12*(((uintptr_t)(hline_end))/12))); \
if (hbody24_start < hbody24_end) \
{ \
int offset = ((uintptr_t)(hbody24_start-hline_ptr))%3; \
uint64_t c4[3]; \
uchar* ptrC4 = reinterpret_cast<uchar*>(&c4); \
ptrC4[0] = ((uchar*)(color))[(offset++)%3]; \
ptrC4[1] = ((uchar*)(color))[(offset++)%3]; \
ptrC4[2] = ((uchar*)(color))[(offset++)%3]; \
memcpy(&ptrC4[3], &ptrC4[0], 3); \
memcpy(&ptrC4[6], &ptrC4[0], 6); \
memcpy(&ptrC4[12], &ptrC4[0], 12); \
c4[0] = opencvLittleToHost64(c4[0]); \
c4[1] = opencvLittleToHost64(c4[1]); \
c4[2] = opencvLittleToHost64(c4[2]); \
for(offset = 0 ; hline_ptr < hbody24_start; offset = (offset+1)%3)\
*hline_ptr++ = ((uchar*)(color))[offset]; \
for(uint64_t* ptr64 = reinterpret_cast<uint64_t*>(hbody24_start), *ptr64End = reinterpret_cast<uint64_t*>(hbody24_end) ; ptr64<ptr64End ; ) \
{ \
*ptr64++ = c4[0]; \
*ptr64++ = c4[1]; \
*ptr64++ = c4[2]; \
} \
for(offset = ((uintptr_t)(hbody24_end-(uchar*)(ptr)))%3, hline_ptr = hbody24_end ; hline_ptr < hline_end ; offset = (offset+1)%3) \
*hline_ptr++ = ((uchar*)(color))[offset]; \
} \
else if (hbody12_start < hbody12_end) \
{ \
int offset = ((uintptr_t)(hbody12_start-hline_ptr))%3; \
uint32_t c4[3]; \
uchar* ptrC4 = reinterpret_cast<uchar*>(&c4); \
ptrC4[0] = ((uchar*)(color))[(offset++)%3]; \
ptrC4[1] = ((uchar*)(color))[(offset++)%3]; \
ptrC4[2] = ((uchar*)(color))[(offset++)%3]; \
memcpy(&ptrC4[3], &ptrC4[0], 3); \
memcpy(&ptrC4[6], &ptrC4[0], 6); \
c4[0] = opencvLittleToHost32(c4[0]); \
c4[1] = opencvLittleToHost32(c4[1]); \
c4[2] = opencvLittleToHost32(c4[2]); \
for(offset = 0 ; hline_ptr < hbody12_start; offset = (offset+1)%3)\
*hline_ptr++ = ((uchar*)(color))[offset]; \
for(uint32_t* ptr32 = reinterpret_cast<uint32_t*>(hbody12_start), *ptr32End = reinterpret_cast<uint32_t*>(hbody12_end) ; ptr32<ptr32End ; ) \
{ \
*ptr32++ = c4[0]; \
*ptr32++ = c4[1]; \
*ptr32++ = c4[2]; \
} \
for(offset = ((uintptr_t)(hbody12_end-(uchar*)(ptr)))%3, hline_ptr = hbody12_end ; hline_ptr < hline_end ; offset = (offset+1)%3) \
*hline_ptr++ = ((uchar*)(color))[offset]; \
} \
else \
{ \
for( ; hline_ptr < hline_end ; ) \
{ \
*hline_ptr++ = ((uchar*)(color))[0]; \
*hline_ptr++ = ((uchar*)(color))[1]; \
*hline_ptr++ = ((uchar*)(color))[2]; \
} \
} \
} \
else if(((pix_size) == 4) && is_aligned(((uchar*)(ptr) + (xl)*4), 0x4)) \
{ \
uint32_t c = opencvLittleToHost32((uchar*)(color)); \
uint32_t* hline_ptr = (uint32_t*)(ptr) + xl; \
uint32_t* hline_max_ptr = (uint32_t*)(ptr) + xr; \
for( ; hline_ptr <= hline_max_ptr; ) \
*hline_ptr++ = c; \
} \
else \
{ \
uchar* hline_ptr = (uchar*)(ptr) + (xl)*(pix_size); \
uchar* hline_max_ptr = (uchar*)(ptr) + (xr)*(pix_size); \
\
for( ; hline_ptr <= hline_max_ptr; hline_ptr += (pix_size))\
{ \
int hline_j; \
for( hline_j = 0; hline_j < (pix_size); hline_j++ ) \
{ \
hline_ptr[hline_j] = ((uchar*)color)[hline_j]; \
} \
} \
}
*/
static inline void ICV_HLINE_X(uchar* ptr, int xl, int xr, const uchar* color, int pix_size)
{
uchar* hline_min_ptr = (uchar*)(ptr) + (xl)*(pix_size);
@ -1360,407 +1091,9 @@ static inline void ICV_HLINE_X(uchar* ptr, int xl, int xr, const uchar* color, i
}
//end ICV_HLINE_X()
static inline void ICV_HLINE_0(uchar* ptr, int xl, int xr, const uchar* color, int pix_size)
{
uchar* hline_ptr = (uchar*)(ptr) + (xl)*(pix_size);
uchar* hline_max_ptr = (uchar*)(ptr) + (xr)*(pix_size);
for( ; hline_ptr <= hline_max_ptr; hline_ptr += (pix_size))
{
int hline_j;
for( hline_j = 0; hline_j < (pix_size); hline_j++ )
{
hline_ptr[hline_j] = ((uchar*)color)[hline_j];
}
}
}
//end ICV_HLINE_0()
static inline void ICV_HLINE_1(uchar* ptr, int xl, int xr, const uchar* color)
{
uchar* hline_ptr = (uchar*)(ptr) + (xl);
uchar* hline_max_ptr = (uchar*)(ptr) + (xr);
uchar hline_c = *(const uchar*)(color);
memset(hline_ptr, hline_c, (hline_max_ptr - hline_ptr) + 1);
}
//end ICV_HLINE_1()
static inline void ICV_HLINE_2(uchar* ptr, int xl, int xr, const uchar* color)
{
if (is_aligned(((uchar*)(ptr) + (xl)*2), 0x2))
{
uint16_t c = opencvLittleToHost16(color);
uint16_t* hline_ptr = (uint16_t*)(ptr) + xl;
uint16_t* hline_max_ptr = (uint16_t*)(ptr) + xr;
for( ; hline_ptr <= hline_max_ptr; )
*hline_ptr++ = c;
}
else
{
ICV_HLINE_X(ptr, xl, xr, color, 2);
}
}
//end ICV_HLINE_2()
static inline void ICV_HLINE_3(uchar* ptr, int xl, int xr, const uchar* color)
{
uchar* hline_ptr = (uchar*)(ptr) + (xl)*3;
uchar* hline_end = (uchar*)(ptr) + (xr+1)*3;
uchar* hbody24_start = std::min(hline_end, (uchar*)(24*(((uintptr_t)(hline_ptr)+23)/24)));
uchar* hbody24_end = std::min(hline_end, (uchar*)(24*(((uintptr_t)(hline_end))/24)));
uchar* hbody12_start = std::min(hline_end, (uchar*)(12*(((uintptr_t)(hline_ptr)+11)/12)));
uchar* hbody12_end = std::min(hline_end, (uchar*)(12*(((uintptr_t)(hline_end))/12)));
if (hbody24_start < hbody24_end)
{
int offset = ((uintptr_t)(hbody24_start-hline_ptr))%3;
uint64_t c4[3];
uchar* ptrC4 = reinterpret_cast<uchar*>(&c4);
ptrC4[0] = ((uchar*)(color))[(offset++)%3];
ptrC4[1] = ((uchar*)(color))[(offset++)%3];
ptrC4[2] = ((uchar*)(color))[(offset++)%3];
memcpy(&ptrC4[3], &ptrC4[0], 3);
memcpy(&ptrC4[6], &ptrC4[0], 6);
memcpy(&ptrC4[12], &ptrC4[0], 12);
c4[0] = opencvLittleToHost64(c4[0]);
c4[1] = opencvLittleToHost64(c4[1]);
c4[2] = opencvLittleToHost64(c4[2]);
for(offset = 0 ; hline_ptr < hbody24_start; offset = (offset+1)%3)
*hline_ptr++ = ((uchar*)(color))[offset];
for(uint64_t* ptr64 = reinterpret_cast<uint64_t*>(hbody24_start), *ptr64End = reinterpret_cast<uint64_t*>(hbody24_end) ; ptr64<ptr64End ; )
{
*ptr64++ = c4[0];
*ptr64++ = c4[1];
*ptr64++ = c4[2];
}
for(offset = ((uintptr_t)(hbody24_end-(uchar*)(ptr)))%3, hline_ptr = hbody24_end ; hline_ptr < hline_end ; offset = (offset+1)%3)
*hline_ptr++ = ((uchar*)(color))[offset];
}
else if (hbody12_start < hbody12_end)
{
int offset = ((uintptr_t)(hbody12_start-hline_ptr))%3;
uint32_t c4[3];
uchar* ptrC4 = reinterpret_cast<uchar*>(&c4);
ptrC4[0] = ((uchar*)(color))[(offset++)%3];
ptrC4[1] = ((uchar*)(color))[(offset++)%3];
ptrC4[2] = ((uchar*)(color))[(offset++)%3];
memcpy(&ptrC4[3], &ptrC4[0], 3);
memcpy(&ptrC4[6], &ptrC4[0], 6);
c4[0] = opencvLittleToHost32(c4[0]);
c4[1] = opencvLittleToHost32(c4[1]);
c4[2] = opencvLittleToHost32(c4[2]);
for(offset = 0 ; hline_ptr < hbody12_start; offset = (offset+1)%3)
*hline_ptr++ = ((uchar*)(color))[offset];
for(uint32_t* ptr32 = reinterpret_cast<uint32_t*>(hbody12_start), *ptr32End = reinterpret_cast<uint32_t*>(hbody12_end) ; ptr32<ptr32End ; )
{
*ptr32++ = c4[0];
*ptr32++ = c4[1];
*ptr32++ = c4[2];
}
for(offset = ((uintptr_t)(hbody12_end-(uchar*)(ptr)))%3, hline_ptr = hbody12_end ; hline_ptr < hline_end ; offset = (offset+1)%3)
*hline_ptr++ = ((uchar*)(color))[offset];
}
else
{
ICV_HLINE_X(ptr, xl, xr, color, 3);
}
}
//end ICV_HLINE_3()
static inline void ICV_HLINE_4(uchar* ptr, int xl, int xr, const uchar* color)
{
if (is_aligned(((uchar*)(ptr) + (xl)*4), 0x4))
{
uint32_t c = opencvLittleToHost32(color);
uint32_t* hline_ptr = (uint32_t*)(ptr) + xl;
uint32_t* hline_max_ptr = (uint32_t*)(ptr) + xr;
for( ; hline_ptr <= hline_max_ptr; )
*hline_ptr++ = c;
}
else
{
ICV_HLINE_X(ptr, xl, xr, color, 4);
}
}
//end ICV_HLINE_4()
static inline void ICV_HLINE_6(uchar* ptr, int xl, int xr, const uchar* color)
{
uchar* hline_ptr = (uchar*)(ptr) + (xl)*6;
uchar* hline_end = (uchar*)(ptr) + (xr+1)*6;
uchar* hbody24_start = std::min(hline_end, (uchar*)(24*(((uintptr_t)(hline_ptr)+23)/24)));
uchar* hbody24_end = std::min(hline_end, (uchar*)(24*(((uintptr_t)(hline_end))/24)));
uchar* hbody12_start = std::min(hline_end, (uchar*)(12*(((uintptr_t)(hline_ptr)+11)/12)));
uchar* hbody12_end = std::min(hline_end, (uchar*)(12*(((uintptr_t)(hline_end))/12)));
if (hbody24_start < hbody24_end)
{
int offset = ((uintptr_t)(hbody24_start-hline_ptr))%6;
uint64_t c4[3];
uchar* ptrC4 = reinterpret_cast<uchar*>(&c4);
ptrC4[0] = ((uchar*)(color))[(offset++)%6];
ptrC4[1] = ((uchar*)(color))[(offset++)%6];
ptrC4[2] = ((uchar*)(color))[(offset++)%6];
ptrC4[3] = ((uchar*)(color))[(offset++)%6];
ptrC4[4] = ((uchar*)(color))[(offset++)%6];
ptrC4[5] = ((uchar*)(color))[(offset++)%6];
memcpy(&ptrC4[6], &ptrC4[0], 6);
memcpy(&ptrC4[12], &ptrC4[0], 12);
c4[0] = opencvLittleToHost64(c4[0]);
c4[1] = opencvLittleToHost64(c4[1]);
c4[2] = opencvLittleToHost64(c4[2]);
for(offset = 0 ; hline_ptr < hbody24_start; offset = (offset+1)%6)
*hline_ptr++ = ((uchar*)(color))[offset];
for(uint64_t* ptr64 = reinterpret_cast<uint64_t*>(hbody24_start), *ptr64End = reinterpret_cast<uint64_t*>(hbody24_end) ; ptr64<ptr64End ; )
{
*ptr64++ = c4[0];
*ptr64++ = c4[1];
*ptr64++ = c4[2];
}
for(offset = ((uintptr_t)(hbody24_end-(uchar*)(ptr)))%6, hline_ptr = hbody24_end ; hline_ptr < hline_end ; offset = (offset+1)%6)
*hline_ptr++ = ((uchar*)(color))[offset];
}
else if (hbody12_start < hbody12_end)
{
int offset = ((uintptr_t)(hbody12_start-hline_ptr))%6;
uint32_t c4[3];
uchar* ptrC4 = reinterpret_cast<uchar*>(&c4);
ptrC4[0] = ((uchar*)(color))[(offset++)%6];
ptrC4[1] = ((uchar*)(color))[(offset++)%6];
ptrC4[2] = ((uchar*)(color))[(offset++)%6];
ptrC4[3] = ((uchar*)(color))[(offset++)%6];
ptrC4[4] = ((uchar*)(color))[(offset++)%6];
ptrC4[5] = ((uchar*)(color))[(offset++)%6];
memcpy(&ptrC4[6], &ptrC4[0], 6);
c4[0] = opencvLittleToHost32(c4[0]);
c4[1] = opencvLittleToHost32(c4[1]);
c4[2] = opencvLittleToHost32(c4[2]);
for(offset = 0 ; hline_ptr < hbody12_start; offset = (offset+1)%6)
*hline_ptr++ = ((uchar*)(color))[offset];
for(uint32_t* ptr32 = reinterpret_cast<uint32_t*>(hbody12_start), *ptr32End = reinterpret_cast<uint32_t*>(hbody12_end) ; ptr32<ptr32End ; )
{
*ptr32++ = c4[0];
*ptr32++ = c4[1];
*ptr32++ = c4[2];
}
for(offset = ((uintptr_t)(hbody12_end-(uchar*)(ptr)))%3, hline_ptr = hbody12_end ; hline_ptr < hline_end ; offset = (offset+1)%3)
*hline_ptr++ = ((uchar*)(color))[offset];
}
else
{
ICV_HLINE_X(ptr, xl, xr, color, 6);
}
}
//end ICV_HLINE_6()
static inline void ICV_HLINE_8(uchar* ptr, int xl, int xr, const uchar* color)
{
if (is_aligned(((uchar*)(ptr) + (xl)*8), 0x8))
{
uint64_t c = opencvLittleToHost64(color);
uint64_t* hline_ptr = (uint64_t*)((uchar*)(ptr) + (xl)*(8));
uint64_t* hline_max_ptr = (uint64_t*)((uchar*)(ptr) + (xr)*(8));
for( ; hline_ptr <= hline_max_ptr; )
*hline_ptr++ = c;
}
else if (is_aligned(((uchar*)(ptr) + (xl)*8), 0x4))
{
uint32_t c[2] = {opencvLittleToHost32(color+0x00),
opencvLittleToHost32(color+0x04)};
uint32_t* hline_ptr = (uint32_t*)((uchar*)(ptr) + (xl)*(8));
uint32_t* hline_max_ptr = (uint32_t*)((uchar*)(ptr) + (xr)*(8));
for( ; hline_ptr <= hline_max_ptr; )
{
*hline_ptr++ = c[0];
*hline_ptr++ = c[1];
}
}
else
{
ICV_HLINE_X(ptr, xl, xr, color, 8);
}
}
//end ICV_HLINE_8()
/*
static inline void ICV_HLINE_12(uchar* ptr, int xl, int xr, const uchar* color)
{
if (is_aligned(((uchar*)(ptr) + (xl)*12), 0x4))
{
uint32_t c[3] = {opencvLittleToHost32(color+0x00),
opencvLittleToHost32(color+0x04),
opencvLittleToHost32(color+0x08)};
uint32_t* hline_ptr = (uint32_t*)((uchar*)(ptr) + (xl)*(12));
uint32_t* hline_max_ptr = (uint32_t*)((uchar*)(ptr) + (xr)*(12));
for( ; hline_ptr <= hline_max_ptr; )
{
*hline_ptr++ = c[0];
*hline_ptr++ = c[1];
*hline_ptr++ = c[2];
}
}
else
{
ICV_HLINE_X(ptr, xl, xr, color, 12);
}
}
//end ICV_HLINE_12()
static inline void ICV_HLINE_16(uchar* ptr, int xl, int xr, const uchar* color)
{
if (is_aligned(((uchar*)(ptr) + (xl)*16), 0x8))
{
uint64_t c[2] = {opencvLittleToHost64(color+0x00),
opencvLittleToHost64(color+0x08)};
uint64_t* hline_ptr = (uint64_t*)((uchar*)(ptr) + (xl)*(16));
uint64_t* hline_max_ptr = (uint64_t*)((uchar*)(ptr) + (xr)*(16));
for( ; hline_ptr <= hline_max_ptr; )
{
*hline_ptr++ = c[0];
*hline_ptr++ = c[1];
}
}
else if (is_aligned(((uchar*)(ptr) + (xl)*16), 0x4))
{
uint32_t c[4] = {opencvLittleToHost32(color+0x00),
opencvLittleToHost32(color+0x04),
opencvLittleToHost32(color+0x08),
opencvLittleToHost32(color+0x0C)};
uint32_t* hline_ptr = (uint32_t*)((uchar*)(ptr) + (xl)*(16));
uint32_t* hline_max_ptr = (uint32_t*)((uchar*)(ptr) + (xr)*(16));
for( ; hline_ptr <= hline_max_ptr; )
{
*hline_ptr++ = c[0];
*hline_ptr++ = c[1];
*hline_ptr++ = c[2];
*hline_ptr++ = c[3];
}
}
else
{
ICV_HLINE_X(ptr, xl, xr, color, 16);
}
}
//end ICV_HLINE_16()
static inline void ICV_HLINE_24(uchar* ptr, int xl, int xr, const uchar* color)
{
if (is_aligned(((uchar*)(ptr) + (xl)*24), 0x8))
{
uint64_t c[3] = {opencvLittleToHost64(color+0x00),
opencvLittleToHost64(color+0x08),
opencvLittleToHost64(color+0x10)};
uint64_t* hline_ptr = (uint64_t*)((uchar*)(ptr) + (xl)*(24));
uint64_t* hline_max_ptr = (uint64_t*)((uchar*)(ptr) + (xr)*(24));
for( ; hline_ptr <= hline_max_ptr; )
{
*hline_ptr++ = c[0];
*hline_ptr++ = c[1];
*hline_ptr++ = c[2];
}
}
else if (is_aligned(((uchar*)(ptr) + (xl)*24), 0x4))
{
uint32_t c[6] = {opencvLittleToHost32(color+0x00),
opencvLittleToHost32(color+0x04),
opencvLittleToHost32(color+0x08),
opencvLittleToHost32(color+0x0C),
opencvLittleToHost32(color+0x10),
opencvLittleToHost32(color+0x14)};
uint32_t* hline_ptr = (uint32_t*)((uchar*)(ptr) + (xl)*(24));
uint32_t* hline_max_ptr = (uint32_t*)((uchar*)(ptr) + (xr)*(24));
for( ; hline_ptr <= hline_max_ptr; )
{
*hline_ptr++ = c[0];
*hline_ptr++ = c[1];
*hline_ptr++ = c[2];
*hline_ptr++ = c[3];
*hline_ptr++ = c[4];
*hline_ptr++ = c[5];
}
}
else
{
ICV_HLINE_X(ptr, xl, xr, color, 24);
}
}
//end ICV_HLINE_24()
static inline void ICV_HLINE_32(uchar* ptr, int xl, int xr, const uchar* color)
{
if (is_aligned(((uchar*)(ptr) + (xl)*32), 0x8))
{
uint64_t c[4] = {opencvLittleToHost64(color+0x00),
opencvLittleToHost64(color+0x08),
opencvLittleToHost64(color+0x10),
opencvLittleToHost64(color+0x18)};
uint64_t* hline_ptr = (uint64_t*)((uchar*)(ptr) + (xl)*(32));
uint64_t* hline_max_ptr = (uint64_t*)((uchar*)(ptr) + (xr)*(32));
for( ; hline_ptr <= hline_max_ptr; )
{
*hline_ptr++ = c[0];
*hline_ptr++ = c[1];
*hline_ptr++ = c[2];
*hline_ptr++ = c[3];
}
}
else if (is_aligned(((uchar*)(ptr) + (xl)*2324), 0x4))
{
uint32_t c[8] = {opencvLittleToHost32(color+0x00),
opencvLittleToHost32(color+0x04),
opencvLittleToHost32(color+0x08),
opencvLittleToHost32(color+0x0C),
opencvLittleToHost32(color+0x10),
opencvLittleToHost32(color+0x14),
opencvLittleToHost32(color+0x18),
opencvLittleToHost32(color+0x1C)};
uint32_t* hline_ptr = (uint32_t*)((uchar*)(ptr) + (xl)*(32));
uint32_t* hline_max_ptr = (uint32_t*)((uchar*)(ptr) + (xr)*(32));
for( ; hline_ptr <= hline_max_ptr; )
{
*hline_ptr++ = c[0];
*hline_ptr++ = c[1];
*hline_ptr++ = c[2];
*hline_ptr++ = c[3];
*hline_ptr++ = c[4];
*hline_ptr++ = c[5];
*hline_ptr++ = c[6];
*hline_ptr++ = c[7];
}
}
else
{
ICV_HLINE_X(ptr, xl, xr, color, 32);
}
}
//end ICV_HLINE_32()
*/
static const bool ICV_HLINE_OPTIMIZATION = true;
static inline void ICV_HLINE(uchar* ptr, int xl, int xr, const void* color, int pix_size)
{
if (!ICV_HLINE_OPTIMIZATION)
ICV_HLINE_0(ptr, xl, xr, reinterpret_cast<const uchar*>(color), pix_size);
else if (pix_size == 1)
ICV_HLINE_1(ptr, xl, xr, reinterpret_cast<const uchar*>(color));
else if (pix_size == 2)
ICV_HLINE_2(ptr, xl, xr, reinterpret_cast<const uchar*>(color));
else if (pix_size == 3)
ICV_HLINE_3(ptr, xl, xr, reinterpret_cast<const uchar*>(color));
else if (pix_size == 4)
ICV_HLINE_4(ptr, xl, xr, reinterpret_cast<const uchar*>(color));
else if (pix_size == 6)
ICV_HLINE_6(ptr, xl, xr, reinterpret_cast<const uchar*>(color));
else if (pix_size == 8)
ICV_HLINE_8(ptr, xl, xr, reinterpret_cast<const uchar*>(color));
//timings do not show relevant improvement when element_size >= 12
/*else if (pix_size == 12)
ICV_HLINE_12(ptr, xl, xr, reinterpret_cast<const uchar*>(color));
else if (pix_size == 16)
ICV_HLINE_16(ptr, xl, xr, reinterpret_cast<const uchar*>(color));
else if (pix_size == 24)
ICV_HLINE_24(ptr, xl, xr, reinterpret_cast<const uchar*>(color));
else if (pix_size == 32)
ICV_HLINE_32(ptr, xl, xr, reinterpret_cast<const uchar*>(color));*/
else
ICV_HLINE_X(ptr, xl, xr, reinterpret_cast<const uchar*>(color), pix_size);
ICV_HLINE_X(ptr, xl, xr, reinterpret_cast<const uchar*>(color), pix_size);
}
//end ICV_HLINE()

Loading…
Cancel
Save