Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
171 lines
6.4 KiB
171 lines
6.4 KiB
/*M/////////////////////////////////////////////////////////////////////////////////////// |
|
// |
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
|
// |
|
// By downloading, copying, installing or using the software you agree to this license. |
|
// If you do not agree to this license, do not download, install, |
|
// copy or use the software. |
|
// |
|
// |
|
// License Agreement |
|
// For Open Source Computer Vision Library |
|
// |
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. |
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
|
// Third party copyrights are property of their respective owners. |
|
// |
|
// @Authors |
|
// Peng Xiao, pengxiao@multicorewareinc.com |
|
// |
|
// Redistribution and use in source and binary forms, with or without modification, |
|
// are permitted provided that the following conditions are met: |
|
// |
|
// * Redistribution's of source code must retain the above copyright notice, |
|
// this list of conditions and the following disclaimer. |
|
// |
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
|
// this list of conditions and the following disclaimer in the documentation |
|
// and/or other oclMaterials provided with the distribution. |
|
// |
|
// * The name of the copyright holders may not be used to endorse or promote products |
|
// derived from this software without specific prior written permission. |
|
// |
|
// This software is provided by the copyright holders and contributors as is and |
|
// any express or implied warranties, including, but not limited to, the implied |
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
|
// indirect, incidental, special, exemplary, or consequential damages |
|
// (including, but not limited to, procurement of substitute goods or services; |
|
// loss of use, data, or profits; or business interruption) however caused |
|
// and on any theory of liability, whether in contract, strict liability, |
|
// or tort (including negligence or otherwise) arising in any way out of |
|
// the use of this software, even if advised of the possibility of such damage. |
|
// |
|
//M*/ |
|
|
|
#include <iomanip> |
|
#include "precomp.hpp" |
|
|
|
#ifdef HAVE_CLAMDBLAS |
|
|
|
#include "clAmdBlas.h" |
|
|
|
#if !defined HAVE_OPENCL |
|
void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, |
|
const oclMat &src3, double beta, oclMat &dst, int flags) |
|
{ |
|
throw_nogpu(); |
|
} |
|
#elif !defined HAVE_CLAMDBLAS |
|
void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, |
|
const oclMat &src3, double beta, oclMat &dst, int flags) |
|
{ |
|
CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented"); |
|
} |
|
#else |
|
|
|
using namespace cv; |
|
|
|
void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, |
|
const oclMat &src3, double beta, oclMat &dst, int flags) |
|
{ |
|
CV_Assert(src1.cols == src2.rows && |
|
(src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols)); |
|
CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported |
|
if(!src3.empty()) |
|
{ |
|
src3.copyTo(dst); |
|
} |
|
else |
|
{ |
|
dst.create(src1.rows, src2.cols, src1.type()); |
|
dst.setTo(Scalar::all(0)); |
|
} |
|
openCLSafeCall( clAmdBlasSetup() ); |
|
|
|
const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; |
|
const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; |
|
const clAmdBlasOrder order = clAmdBlasRowMajor; |
|
|
|
const int M = src1.rows; |
|
const int N = src2.cols; |
|
const int K = src1.cols; |
|
int lda = src1.step; |
|
int ldb = src2.step; |
|
int ldc = dst.step; |
|
int offa = src1.offset; |
|
int offb = src2.offset; |
|
int offc = dst.offset; |
|
|
|
|
|
switch(src1.type()) |
|
{ |
|
case CV_32FC1: |
|
lda /= sizeof(float); |
|
ldb /= sizeof(float); |
|
ldc /= sizeof(float); |
|
offa /= sizeof(float); |
|
offb /= sizeof(float); |
|
offc /= sizeof(float); |
|
openCLSafeCall |
|
( |
|
clAmdBlasSgemmEx(order, transA, transB, M, N, K, |
|
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, |
|
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) |
|
); |
|
break; |
|
case CV_64FC1: |
|
lda /= sizeof(double); |
|
ldb /= sizeof(double); |
|
ldc /= sizeof(double); |
|
offa /= sizeof(double); |
|
offb /= sizeof(double); |
|
offc /= sizeof(double); |
|
openCLSafeCall |
|
( |
|
clAmdBlasDgemmEx(order, transA, transB, M, N, K, |
|
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, |
|
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) |
|
); |
|
break; |
|
case CV_32FC2: |
|
{ |
|
lda /= sizeof(std::complex<float>); |
|
ldb /= sizeof(std::complex<float>); |
|
ldc /= sizeof(std::complex<float>); |
|
offa /= sizeof(std::complex<float>); |
|
offb /= sizeof(std::complex<float>); |
|
offc /= sizeof(std::complex<float>); |
|
cl_float2 alpha_2 = {{alpha, 0}}; |
|
cl_float2 beta_2 = {{beta, 0}}; |
|
openCLSafeCall |
|
( |
|
clAmdBlasCgemmEx(order, transA, transB, M, N, K, |
|
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, |
|
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) |
|
); |
|
} |
|
break; |
|
case CV_64FC2: |
|
{ |
|
lda /= sizeof(std::complex<double>); |
|
ldb /= sizeof(std::complex<double>); |
|
ldc /= sizeof(std::complex<double>); |
|
offa /= sizeof(std::complex<double>); |
|
offb /= sizeof(std::complex<double>); |
|
offc /= sizeof(std::complex<double>); |
|
cl_double2 alpha_2 = {{alpha, 0}}; |
|
cl_double2 beta_2 = {{beta, 0}}; |
|
openCLSafeCall |
|
( |
|
clAmdBlasZgemmEx(order, transA, transB, M, N, K, |
|
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, |
|
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) |
|
); |
|
} |
|
break; |
|
} |
|
clAmdBlasTeardown(); |
|
} |
|
#endif |
|
#endif
|
|
|