diff --git a/modules/matlab/CMakeLists.txt b/modules/matlab/CMakeLists.txt index f0258a10ad..ae35bcdd05 100644 --- a/modules/matlab/CMakeLists.txt +++ b/modules/matlab/CMakeLists.txt @@ -118,7 +118,7 @@ if (NOT MEX_WORKS) ${CMAKE_CURRENT_SOURCE_DIR}/test/test_compiler.cpp WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/junk ERROR_VARIABLE MEX_ERROR - #OUTPUT_QUIET + OUTPUT_QUIET ) if (MEX_ERROR) diff --git a/modules/matlab/include/mxarray.hpp b/modules/matlab/include/mxarray.hpp index 00543486a0..f34c2f6d38 100644 --- a/modules/matlab/include/mxarray.hpp +++ b/modules/matlab/include/mxarray.hpp @@ -2,6 +2,7 @@ #define OPENCV_MXARRAY_HPP_ #include "mex.h" +#include "transpose.hpp" #include #include #include @@ -496,39 +497,6 @@ cv::Mat MxArray::toMat() const { // MATRIX TRANSPOSE // ---------------------------------------------------------------------------- -template -void gemt(const char major, const size_t M, const size_t N, const InputScalar* a, size_t lda, OutputScalar* b, size_t ldb) { - switch (major) { - case 'R': - for (size_t m = 0; m < M; ++m) { - InputScalar const * arow = a + m*lda; - InputScalar const * const aend = arow + N; - OutputScalar * bcol = b + m; - while (arow < aend) { - *bcol = *arow; - arow++; - bcol+=ldb; - } - } - return; - case 'C': - for (size_t n = 0; n < N; ++n) { - InputScalar const * acol = a + n*lda; - InputScalar const * const aend = acol + M; - OutputScalar * brow = b + n; - while (acol < aend) { - *brow = *acol; - acol++; - brow+=ldb; - } - } - return; - default: - error(std::string("Unknown ordering given: ").append(std::string(1,major))); - } -} - - template void deepCopyAndTranspose(const cv::Mat& in, MxArray& out) { diff --git a/modules/matlab/include/transpose.hpp b/modules/matlab/include/transpose.hpp new file mode 100644 index 0000000000..2a820f43f9 --- /dev/null +++ b/modules/matlab/include/transpose.hpp @@ -0,0 +1,98 @@ +#ifndef OPENCV_TRANSPOSE_HPP_ +#define OPENCV_TRANSPOSE_HPP_ + +template +void transposeBlock(const size_t M, const size_t N, const InputScalar* src, size_t lda, OutputScalar* dst, size_t ldb) { + InputScalar cache[16]; + // copy the source into the cache contiguously + for (size_t n = 0; n < N; ++n) + for (size_t m = 0; m < M; ++m) + cache[m+n*4] = src[m+n*lda]; + // copy the destination out of the cache contiguously + for (size_t m = 0; m < M; ++m) + for (size_t n = 0; n < N; ++n) + dst[n+m*ldb] = cache[m+n*4]; +} + +template +void transpose4x4(const InputScalar* src, size_t lda, OutputScalar* dst, size_t ldb) { + InputScalar cache[16]; + // copy the source into the cache contiguously + cache[0] = src[0]; cache[1] = src[1]; cache[2] = src[2]; cache[3] = src[3]; src+=lda; + cache[4] = src[0]; cache[5] = src[1]; cache[6] = src[2]; cache[7] = src[3]; src+=lda; + cache[8] = src[0]; cache[9] = src[1]; cache[10] = src[2]; cache[11] = src[3]; src+=lda; + cache[12] = src[0]; cache[13] = src[1]; cache[14] = src[2]; cache[15] = src[3]; src+=lda; + // copy the destination out of the contiguously + dst[0] = cache[0]; dst[1] = cache[4]; dst[2] = cache[8]; dst[3] = cache[12]; dst+=ldb; + dst[0] = cache[1]; dst[1] = cache[5]; dst[2] = cache[9]; dst[3] = cache[13]; dst+=ldb; + dst[0] = cache[2]; dst[1] = cache[6]; dst[2] = cache[10]; dst[3] = cache[14]; dst+=ldb; + dst[0] = cache[3]; dst[1] = cache[7]; dst[2] = cache[11]; dst[3] = cache[15]; dst+=ldb; +} + + +/* + * Vanilla copy, transpose and cast + */ +template +void gemt(const char major, const size_t M, const size_t N, const InputScalar* a, size_t lda, OutputScalar* b, size_t ldb) { + // 1x1 transpose is just copy + if (M == 1 && N == 1) { *b = *a; return; } + + // get the interior 4x4 blocks, and the extra skirting + const size_t Fblock = (major == 'R') ? N/4 : M/4; + const size_t Frem = (major == 'R') ? N%4 : M%4; + const size_t Sblock = (major == 'R') ? M/4 : N/4; + const size_t Srem = (major == 'R') ? M%4 : N%4; + + // if less than 4x4, invoke the block transpose immediately + if (M < 4 && N < 4) { transposeBlock(Frem, Srem, a, lda, b, ldb); return; } + + // transpose 4x4 blocks + const InputScalar* aptr = a; + OutputScalar* bptr = b; + for (size_t second = 0; second < Sblock; ++second) { + aptr = a + second*lda; + bptr = b + second; + for (size_t first = 0; first < Fblock; ++first) { + transposeBlock(4, 4, aptr, lda, bptr, ldb); + //transpose4x4(aptr, lda, bptr, ldb); + aptr+=4; + bptr+=4*ldb; + } + // transpose trailing blocks on primary dimension + transposeBlock(Frem, 4, aptr, lda, bptr, ldb); + } + // transpose trailing blocks on secondary dimension + aptr = a + 4*Sblock*lda; + bptr = b + 4*Sblock; + for (size_t first = 0; first < Fblock; ++first) { + transposeBlock(4, Srem, aptr, lda, bptr, ldb); + aptr+=4; + bptr+=4*ldb; + } + // transpose bottom right-hand corner + transposeBlock(Frem, Srem, aptr, lda, bptr, ldb); +} + +#ifdef __SSE2__ +/* + * SSE2 supported fast copy, transpose and cast + */ +#include + +template <> +void transpose4x4(const float* src, size_t lda, float* dst, size_t ldb) { + __m128 row0, row1, row2, row3; + row0 = _mm_loadu_ps(src); + row1 = _mm_loadu_ps(src+lda); + row2 = _mm_loadu_ps(src+2*lda); + row3 = _mm_loadu_ps(src+3*lda); + _MM_TRANSPOSE4_PS(row0, row1, row2, row3); + _mm_storeu_ps(dst, row0); + _mm_storeu_ps(dst+ldb, row1); + _mm_storeu_ps(dst+2*ldb, row2); + _mm_storeu_ps(dst+3*ldb, row3); +} + +#endif +#endif diff --git a/modules/matlab/io/FileBuffer.hpp b/modules/matlab/io/FileBuffer.hpp new file mode 100644 index 0000000000..61acd48bdd --- /dev/null +++ b/modules/matlab/io/FileBuffer.hpp @@ -0,0 +1,48 @@ +#ifndef OPENCV_FILEBUFFER_HPP_ +#define OPENCV_FILEBUFFER_HPP_ + +#include +#include + +class EndianFileBuffer : public std::streambuf { +private: + const int fd_; + const size_t put_back_; + std::vector buffer_; + + // prevent copy construction + EndianFileBuffer(const EndianFileBuffer&); + EndianFileBuffer& operator=(const EndianFileBuffer&); + +public: + explicit EndianFileBuffer(int fd, size_t buffer_sz, size_t put_back) : + fd_(fd), put_back_(max(put_back, 1)), buffer_(max(buffer_sz, put_back_) + put_back_) { + char *end = &buffer_.front() + buffer_.size(); + setg(end, end, end); + } + + std::streambuf::int_type underflow() { + if (gptr() < egptr()) // buffer not exhausted + return traits_type::to_int_type(*gptr()); + + char *base = &buffer_.front(); + char *start = base; + + if (eback() == base) { // true when this isn't the first fill + std::memmove(base, egptr() - put_back_, put_back_); + start += put_back_; + } + + // start is now the start of the buffer + // refill from the file + read(fd_, start, buffer_.size() - (start - base)); + if (n == 0) return traits_type::eof(); + + // set buffer pointers + setg(base, start, start + n); + return traits_type::to_int_type(*gptr()); + } + +}; + +#endif diff --git a/modules/matlab/io/MatlabIO.cpp b/modules/matlab/io/MatlabIO.cpp new file mode 100644 index 0000000000..3f61d940e3 --- /dev/null +++ b/modules/matlab/io/MatlabIO.cpp @@ -0,0 +1,32 @@ +#include +#include + +const char* day[] = { "Sun", "Mon", "Tue", "Wed", "Thurs", "Fri", "Sat" }; +const char* month[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; +const char* arch = "${MEX_ARCH}" + +std::string formatCurrentTime() { + ostringstream oss; + time_t rawtime; + struct tm* timeinfo; + int dom, hour, min, sec, year; + // compute the current time + time(&rawtime); + timeinfo = localtime(&rawtime); + // extract the components of interest + dom = timeinfo->tm_mday; + hour = timeinfo->tm_hour; + min = timeinfo->tm_min; + sec = timeinfo->tm_sec; + year = timeinfo->year + 1900; + oss << day[timeinfo->tm_wday] << " " << month[timeinfo->tm_mon] + << " " << dom << " " << hour << ":" << min << ":" << sec << " " << year; + return oss.str(); +} + +void MatlabIO::whos() { + std::cout << "-------------------- whos --------------------" << std::endl; + std::cout << "Filename: " << filename() << std::endl; + std::cout << "File size: " << filesize() << "MB" << std::endl << std::endl; + std::cout << "Name size bytes type" << std::endl; + std::cout << "----------------------------------------------" << std::endl; diff --git a/modules/matlab/io/MatlabIO.hpp b/modules/matlab/io/MatlabIO.hpp new file mode 100644 index 0000000000..9723bfc86a --- /dev/null +++ b/modules/matlab/io/MatlabIO.hpp @@ -0,0 +1,81 @@ +#ifndef MATLAB_IO_HPP_ +#define MATLAB_IO_HPP_ + +#include +#include "map.hpp" + +namespace Matlab { + namespace IO { + static const int VERSION_5 = 5; + static const int VERSION_73 = 73; + } + +class Index { +private: + //! the name of the field (if associative container) + std::string name_; + //! beginning of the data field in the file + size_t begin_; + //! address after the last data field + size_t end_; + //! Matlab stored-type + int stored_type_; + //! Matlab actual type (sometimes compression is used) + int type_; + //! is the field compressed? + bool compressed_; + //! are the descendents associative (mappings) + bool associative_; + //! the descendents of this node + union { + //! valid if the container is a sequence (list) + std::vector sequence_; + //! valid if the container is a mapping (associative) + Map mapping_; + }; + +}; + +class MatlabIONode { + +}; + +class MatlabIO { +private: + // member variables + static const int HEADER_LENGTH = 116; + static const int SUBSYS_LENGTH = 8; + static const int ENDIAN_LENGTH = 2; + char header_[HEADER_LENGTH+1]; + char subsys_[SUBSYS_LENGTH+1]; + char endian_[ENDIAN_LENGTH+1]; + int version_; + bool byte_swap_; + std::string filename_; + // uses a custom stream buffer for fast memory-mapped access and endian swapping + std::fstream stream_; + //! the main file index. The top-level index must be associative + Index index_; + + // internal methods + void getFileHeader(); + void setFileHeader(); + + void getHeader(); + void setHeader(); + +public: + // construct/destruct + MatlabIO() {} + ~MatlabIO {} + + // global read and write routines + std::string filename(void); + bool open(const std::string& filename, const std::string& mode); + + // index the contents of the file + void index(); + + // print all of the top-level variables in the file +} +#endif