Power, Tanh and Channels ReLU layer ocl support

Signed-off-by: Li Peng <peng.li@intel.com>
pull/10602/head
Li Peng 7 years ago
parent 4189214d04
commit 7bc017601f
  1. 83
      modules/dnn/src/layers/elementwise_layers.cpp
  2. 9
      modules/dnn/src/opencl/activations.cl
  3. 6
      modules/dnn/test/test_layers.cpp

@ -267,7 +267,6 @@ struct ReLUFunctor
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
std::vector<UMat> inputs;
std::vector<UMat> outputs;
@ -287,7 +286,7 @@ struct ReLUFunctor
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
size_t gSize = src.total();
CV_Assert(kernel.run(1, &gSize, &wgSize, false));
CV_Assert(kernel.run(1, &gSize, NULL, false));
}
return true;
@ -395,8 +394,28 @@ struct TanHFunctor
#ifdef HAVE_OPENCL
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
// TODO: implement OCL version
return false;
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
String buildopt = oclGetTMacro(inputs[0]);
for (size_t i = 0; i < inputs.size(); i++)
{
UMat& src = inputs[i];
UMat& dst = outputs[i];
ocl::Kernel kernel("TanHForward", ocl::dnn::activations_oclsrc, buildopt);
kernel.set(0, (int)src.total());
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
size_t gSize = src.total();
CV_Assert(kernel.run(1, &gSize, NULL, false));
}
return true;
}
#endif
@ -594,8 +613,31 @@ struct PowerFunctor
#ifdef HAVE_OPENCL
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
// TODO: implement OCL version
return false;
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
String buildopt = oclGetTMacro(inputs[0]);
for (size_t i = 0; i < inputs.size(); i++)
{
UMat& src = inputs[i];
UMat& dst = outputs[i];
ocl::Kernel kernel("PowForward", ocl::dnn::activations_oclsrc, buildopt);
kernel.set(0, (int)src.total());
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
kernel.set(3, (float)power);
kernel.set(4, (float)scale);
kernel.set(5, (float)shift);
size_t gSize = src.total();
CV_Assert(kernel.run(1, &gSize, NULL, false));
}
return true;
}
#endif
@ -624,9 +666,11 @@ struct ChannelsPReLUFunctor
{
typedef ChannelsPReLULayer Layer;
Mat scale;
UMat scale_umat;
explicit ChannelsPReLUFunctor(const Mat& scale_=Mat()) : scale(scale_)
{
scale_umat = scale.getUMat(ACCESS_READ);
}
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
@ -669,8 +713,31 @@ struct ChannelsPReLUFunctor
#ifdef HAVE_OPENCL
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
// TODO: implement OCL version
return false;
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
String buildopt = oclGetTMacro(inputs[0]);
for (size_t i = 0; i < inputs.size(); i++)
{
UMat& src = inputs[i];
UMat& dst = outputs[i];
ocl::Kernel kernel("PReLUForward", ocl::dnn::activations_oclsrc, buildopt);
kernel.set(0, (int)src.total());
kernel.set(1, (int)src.size[1]);
kernel.set(2, (int)total(shape(src), 2));
kernel.set(3, ocl::KernelArg::PtrReadOnly(src));
kernel.set(4, ocl::KernelArg::PtrWriteOnly(dst));
kernel.set(5, ocl::KernelArg::PtrReadOnly(scale_umat));
size_t gSize = src.total();
CV_Assert(kernel.run(1, &gSize, NULL, false));
}
return true;
}
#endif

@ -54,6 +54,15 @@ __kernel void ReLUForward(const int count, __global const T* in, __global T* out
#endif
}
__kernel void PReLUForward(const int count, const int channels, const int plane_size,
__global const T* in, __global T* out, __global const T* slope_data)
{
int index = get_global_id(0);
int c = (index / plane_size) % channels;
if(index < count)
out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[c];
}
__kernel void TanHForward(const int count, __global T* in, __global T* out) {
int index = get_global_id(0);
if(index < count)

@ -331,6 +331,12 @@ TEST(Layer_Test_PReLU, Accuracy)
testLayerUsingCaffeModels("layer_prelu_fc", DNN_TARGET_CPU, true, false);
}
OCL_TEST(Layer_Test_PReLU, Accuracy)
{
testLayerUsingCaffeModels("layer_prelu", DNN_TARGET_OPENCL, true);
testLayerUsingCaffeModels("layer_prelu_fc", DNN_TARGET_OPENCL, true, false);
}
//template<typename XMat>
//static void test_Layer_Concat()
//{

Loading…
Cancel
Save