diff --git a/modules/imgproc/src/drawing.cpp b/modules/imgproc/src/drawing.cpp index 3307b77f6e..9345eb35cc 100644 --- a/modules/imgproc/src/drawing.cpp +++ b/modules/imgproc/src/drawing.cpp @@ -1104,33 +1104,33 @@ static const int opencvOne = 1; # endif # endif -/* -static inline uint32_t opencvBigToHost32(const uchar* p){ -#if OPENCV_BYTEORDER==4321 - uint32_t x; - memcpy(&x,p,4); +static inline uint16_t opencvLittleToHost16(const uchar* p){ +#if OPENCV_BYTEORDER==1234 + uint16_t x; + memcpy(&x,p,sizeof(x)); + return x; +#elif OPENCV_BYTEORDER==4321 && defined(__GNUC__) + uint16_t x; + memcpy(&x,p,sizeof(x)); + return (p[0]<<8) | (p[1]>>8); +#elif OPENCV_BYTEORDER==4321 && defined(_MSC_VER) && _MSC_VER>=1300 + uint16_t x; + memcpy(&x,p,sizeof(x)); + return _byteswap_ushort(x); +#elif OPENCV_LITTLEENDIAN return x; -#elif OPENCV_BYTEORDER==1234 && defined(__GNUC__) - uint32_t x; - memcpy(&x,p,4); - return __builtin_bswap32(x); -#elif OPENCV_BYTEORDER==1234 && defined(_MSC_VER) && _MSC_VER>=1300 - uint32_t x; - memcpy(&x,p,4); - return _byteswap_ulong(x); #else - return ((unsigned)p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3]; + return (p[0]<<8) | (p[1]>>8); #endif } -static inline uint32_t opencvBigToHost32(uint32_t x){ -#if OPENCV_BYTEORDER==4321 +static inline uint16_t opencvLittleToHost16(uint16_t x){ +#if OPENCV_LITTLEENDIAN return x; #else - return opencvBigToHost32((uchar*)&x); + return opencvLittleToHost16((uchar*)&x); #endif } -*/ static inline uint32_t opencvLittleToHost32(const uchar* p){ #if OPENCV_BYTEORDER==1234 @@ -1338,6 +1338,26 @@ else \ } */ +static inline void ICV_HLINE_X(uchar* ptr, int xl, int xr, const uchar* color, int pix_size) +{ + uchar* hline_min_ptr = (uchar*)(ptr) + (xl)*(pix_size); + uchar* hline_end_ptr = (uchar*)(ptr) + (xr+1)*(pix_size); + uchar* hline_ptr = hline_min_ptr; + if (hline_min_ptr < hline_end_ptr) + { + memcpy(hline_ptr, color, pix_size); + hline_ptr += pix_size; + }//end if (hline_min_ptr < hline_end_ptr) + size_t sizeToCopy = pix_size; + while(hline_ptr < hline_end_ptr) + { + memcpy(hline_ptr, hline_min_ptr, sizeToCopy); + hline_ptr += sizeToCopy; + sizeToCopy = std::min(2*sizeToCopy, static_cast(hline_end_ptr-hline_ptr)); + }//end while(hline_ptr < hline_end_ptr) +} +//end ICV_HLINE_X() + static inline void ICV_HLINE_0(uchar* ptr, int xl, int xr, const uchar* color, int pix_size) { uchar* hline_ptr = (uchar*)(ptr) + (xl)*(pix_size); @@ -1345,7 +1365,7 @@ static inline void ICV_HLINE_0(uchar* ptr, int xl, int xr, const uchar* color, i for( ; hline_ptr <= hline_max_ptr; hline_ptr += (pix_size)) { int hline_j; - for( hline_j = 0; hline_j < (4); hline_j++ ) + for( hline_j = 0; hline_j < (pix_size); hline_j++ ) { hline_ptr[hline_j] = ((uchar*)color)[hline_j]; } @@ -1360,6 +1380,24 @@ static inline void ICV_HLINE_1(uchar* ptr, int xl, int xr, const uchar* color) uchar hline_c = *(const uchar*)(color); memset(hline_ptr, hline_c, (hline_max_ptr - hline_ptr) + 1); } +//end ICV_HLINE_1() + +static inline void ICV_HLINE_2(uchar* ptr, int xl, int xr, const uchar* color) +{ + if (is_aligned(((uchar*)(ptr) + (xl)*2), 0x2)) + { + uint16_t c = opencvLittleToHost16((uchar*)(color)); + uint16_t* hline_ptr = (uint16_t*)(ptr) + xl; + uint16_t* hline_max_ptr = (uint16_t*)(ptr) + xr; + for( ; hline_ptr <= hline_max_ptr; ) + *hline_ptr++ = c; + } + else + { + ICV_HLINE_X(ptr, xl, xr, color, 2); + } +} +//end ICV_HLINE_2() static inline void ICV_HLINE_3(uchar* ptr, int xl, int xr, const uchar* color) { @@ -1420,12 +1458,7 @@ static inline void ICV_HLINE_3(uchar* ptr, int xl, int xr, const uchar* color) } else { - for( ; hline_ptr < hline_end ; ) - { - *hline_ptr++ = ((uchar*)(color))[0]; - *hline_ptr++ = ((uchar*)(color))[1]; - *hline_ptr++ = ((uchar*)(color))[2]; - } + ICV_HLINE_X(ptr, xl, xr, color, 3); } } //end ICV_HLINE_3() @@ -1442,30 +1475,288 @@ static inline void ICV_HLINE_4(uchar* ptr, int xl, int xr, const uchar* color) } else { - uchar* hline_ptr = (uchar*)(ptr) + (xl)*(4); - uchar* hline_max_ptr = (uchar*)(ptr) + (xr)*(4); - for( ; hline_ptr <= hline_max_ptr; hline_ptr += (4)) + ICV_HLINE_X(ptr, xl, xr, color, 4); + } +} +//end ICV_HLINE_4() + +static inline void ICV_HLINE_6(uchar* ptr, int xl, int xr, const uchar* color) +{ + uchar* hline_ptr = (uchar*)(ptr) + (xl)*6; + uchar* hline_end = (uchar*)(ptr) + (xr+1)*6; + uchar* hbody24_start = std::min(hline_end, (uchar*)(24*(((uintptr_t)(hline_ptr)+23)/24))); + uchar* hbody24_end = std::min(hline_end, (uchar*)(24*(((uintptr_t)(hline_end))/24))); + uchar* hbody12_start = std::min(hline_end, (uchar*)(12*(((uintptr_t)(hline_ptr)+11)/12))); + uchar* hbody12_end = std::min(hline_end, (uchar*)(12*(((uintptr_t)(hline_end))/12))); + if (hbody24_start < hbody24_end) + { + int offset = ((uintptr_t)(hbody24_start-hline_ptr))%6; + uint64_t c4[3]; + uchar* ptrC4 = reinterpret_cast(&c4); + ptrC4[0] = ((uchar*)(color))[(offset++)%6]; + ptrC4[1] = ((uchar*)(color))[(offset++)%6]; + ptrC4[2] = ((uchar*)(color))[(offset++)%6]; + ptrC4[3] = ((uchar*)(color))[(offset++)%6]; + ptrC4[4] = ((uchar*)(color))[(offset++)%6]; + ptrC4[5] = ((uchar*)(color))[(offset++)%6]; + memcpy(&ptrC4[6], &ptrC4[0], 6); + memcpy(&ptrC4[12], &ptrC4[0], 12); + c4[0] = opencvLittleToHost64(c4[0]); + c4[1] = opencvLittleToHost64(c4[1]); + c4[2] = opencvLittleToHost64(c4[2]); + for(offset = 0 ; hline_ptr < hbody24_start; offset = (offset+1)%6) + *hline_ptr++ = ((uchar*)(color))[offset]; + for(uint64_t* ptr64 = reinterpret_cast(hbody24_start), *ptr64End = reinterpret_cast(hbody24_end) ; ptr64(&c4); + ptrC4[0] = ((uchar*)(color))[(offset++)%6]; + ptrC4[1] = ((uchar*)(color))[(offset++)%6]; + ptrC4[2] = ((uchar*)(color))[(offset++)%6]; + ptrC4[3] = ((uchar*)(color))[(offset++)%6]; + ptrC4[4] = ((uchar*)(color))[(offset++)%6]; + ptrC4[5] = ((uchar*)(color))[(offset++)%6]; + memcpy(&ptrC4[6], &ptrC4[0], 6); + c4[0] = opencvLittleToHost32(c4[0]); + c4[1] = opencvLittleToHost32(c4[1]); + c4[2] = opencvLittleToHost32(c4[2]); + for(offset = 0 ; hline_ptr < hbody12_start; offset = (offset+1)%6) + *hline_ptr++ = ((uchar*)(color))[offset]; + for(uint32_t* ptr32 = reinterpret_cast(hbody12_start), *ptr32End = reinterpret_cast(hbody12_end) ; ptr32(color), pix_size); + else if (pix_size == 1) ICV_HLINE_1(ptr, xl, xr, reinterpret_cast(color)); + else if (pix_size == 2) + ICV_HLINE_2(ptr, xl, xr, reinterpret_cast(color)); else if (pix_size == 3) ICV_HLINE_3(ptr, xl, xr, reinterpret_cast(color)); else if (pix_size == 4) ICV_HLINE_4(ptr, xl, xr, reinterpret_cast(color)); + else if (pix_size == 6) + ICV_HLINE_6(ptr, xl, xr, reinterpret_cast(color)); + else if (pix_size == 8) + ICV_HLINE_8(ptr, xl, xr, reinterpret_cast(color)); + //timings do not show relevant improvement when element_size >= 12 + /*else if (pix_size == 12) + ICV_HLINE_12(ptr, xl, xr, reinterpret_cast(color)); + else if (pix_size == 16) + ICV_HLINE_16(ptr, xl, xr, reinterpret_cast(color)); + else if (pix_size == 24) + ICV_HLINE_24(ptr, xl, xr, reinterpret_cast(color)); + else if (pix_size == 32) + ICV_HLINE_32(ptr, xl, xr, reinterpret_cast(color));*/ else - ICV_HLINE_0(ptr, xl, xr, reinterpret_cast(color), pix_size); + ICV_HLINE_X(ptr, xl, xr, reinterpret_cast(color), pix_size); } //end ICV_HLINE()