2019-04-17 10:49:11 +08:00
|
|
|
//
|
|
|
|
// ConvWinograd.cpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2019/01/08.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
2021-03-12 18:41:50 +08:00
|
|
|
#include "backend/opencl/execution/image/ConvWinograd.hpp"
|
2019-04-17 10:49:11 +08:00
|
|
|
#include <string.h>
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "core/Backend.hpp"
|
2020-03-03 05:47:07 +08:00
|
|
|
#include "core/ConvolutionCommon.hpp"
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "math/WingoradGenerater.hpp"
|
|
|
|
#include "backend/opencl/core/OpenCLRunningUtils.hpp"
|
2019-04-17 10:49:11 +08:00
|
|
|
#define UNIT 2
|
|
|
|
#define INTERP 1
|
|
|
|
namespace MNN {
|
|
|
|
namespace OpenCL {
|
2023-02-28 10:41:24 +08:00
|
|
|
bool ConvWinograd::valid(const Convolution2DCommon* common, const Tensor* input, const Tensor* output, int maxWidth, int maxHeight, int limit) {
|
2019-04-17 10:49:11 +08:00
|
|
|
if (common->strideX() != 1 || common->strideY() != 1) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (common->dilateX() != 1 || common->dilateY() != 1) {
|
|
|
|
return false;
|
|
|
|
}
|
2023-03-20 11:32:29 +08:00
|
|
|
if(common->kernelX() != common->kernelY()) {
|
2020-11-05 16:41:56 +08:00
|
|
|
return false;
|
|
|
|
}
|
2023-03-20 11:32:29 +08:00
|
|
|
if(common->kernelX() != 3 && common->kernelX() != 5){
|
2023-02-28 10:41:24 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-03-20 11:32:29 +08:00
|
|
|
int ic = input->channel();
|
|
|
|
int oc = common->outputCount();
|
|
|
|
int ow = output->width();
|
|
|
|
int oh =output->height();
|
|
|
|
int kh = common->kernelX();
|
|
|
|
int wUnit = UP_DIV(ow, UNIT);
|
|
|
|
int hUnit = UP_DIV(oh, UNIT);
|
|
|
|
int alpha = kh + UNIT - 1;
|
|
|
|
int sourceWidth = UP_DIV(ic, 4) * 4 * wUnit;
|
2023-02-28 10:41:24 +08:00
|
|
|
int sourceHeight = alpha * alpha * hUnit;
|
|
|
|
int destWidth = alpha * alpha * wUnit * 4;
|
2023-03-20 11:32:29 +08:00
|
|
|
int destHeight = UP_DIV(ic, 4) * hUnit;
|
2023-02-28 10:41:24 +08:00
|
|
|
|
|
|
|
if(sourceWidth > maxWidth || sourceHeight > maxHeight || destWidth > maxWidth || destHeight > maxHeight){
|
|
|
|
return false;
|
|
|
|
}
|
2023-03-20 11:32:29 +08:00
|
|
|
if(ic >= 32 && oc >= 32){
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return ((oc * oh * ow) / (ic * kh) <= 5);
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
|
2021-03-12 18:41:50 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
ConvWinograd::ConvWinograd(const MNN::Convolution2D* op, Backend* backend) : Execution(backend) {
|
|
|
|
mOpenCLBackend = static_cast<OpenCLBackend*>(backend);
|
|
|
|
mCommon = op->common();
|
2019-05-14 19:54:21 +08:00
|
|
|
MNN_ASSERT((3 == mCommon->kernelY() && 3 == mCommon->kernelX()) ||
|
|
|
|
(5 == mCommon->kernelX() && 5 == mCommon->kernelY()));
|
2019-04-17 10:49:11 +08:00
|
|
|
MNN_ASSERT(1 == mCommon->strideX() && 1 == mCommon->strideY());
|
|
|
|
MNN_ASSERT(1 == mCommon->dilateX() && 1 == mCommon->dilateY());
|
|
|
|
auto runTime = mOpenCLBackend->getOpenCLRuntime();
|
|
|
|
int ky = mCommon->kernelY();
|
|
|
|
int kx = mCommon->kernelX();
|
|
|
|
|
|
|
|
int weightSize = 0;
|
|
|
|
const float* filterDataPtr = nullptr;
|
|
|
|
|
2020-03-03 05:47:07 +08:00
|
|
|
std::shared_ptr<MNN::ConvolutionCommon::Int8Common> quanCommon;
|
2019-04-17 10:49:11 +08:00
|
|
|
if (nullptr != op->quanParameter()) {
|
2020-03-03 05:47:07 +08:00
|
|
|
quanCommon = ConvolutionCommon::load(op->quanParameter(), true);
|
2019-04-17 10:49:11 +08:00
|
|
|
if (nullptr == quanCommon) {
|
|
|
|
MNN_ERROR("Memory not Enough, can't extract IDST Convolution \n");
|
|
|
|
}
|
|
|
|
if (quanCommon->weightFloat.get() == nullptr) {
|
|
|
|
MNN_PRINT("quanCommon->weightFloat.get() == nullptr \n");
|
|
|
|
}
|
|
|
|
// Back to float
|
|
|
|
filterDataPtr = quanCommon->weightFloat.get();
|
|
|
|
weightSize = quanCommon->weightFloat.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nullptr == filterDataPtr) {
|
|
|
|
weightSize = op->weight()->size();
|
|
|
|
filterDataPtr = op->weight()->data();
|
|
|
|
}
|
|
|
|
|
|
|
|
int co = mCommon->outputCount();
|
|
|
|
int ci = weightSize / co / mCommon->kernelX() / mCommon->kernelY();
|
|
|
|
auto coC4 = UP_DIV(co, 4);
|
|
|
|
auto ciC4 = UP_DIV(ci, 4);
|
|
|
|
auto queue = runTime->commandQueue();
|
|
|
|
|
|
|
|
auto imageChannelType = CL_HALF_FLOAT;
|
|
|
|
if (mOpenCLBackend->getPrecision() == BackendConfig::Precision_High) {
|
|
|
|
imageChannelType = CL_FLOAT;
|
|
|
|
}
|
|
|
|
// Create Image
|
|
|
|
{
|
|
|
|
mBias.reset(new cl::Image2D(runTime->context(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RGBA, imageChannelType),
|
|
|
|
UP_DIV(co, 4), 1, 0, nullptr, nullptr));
|
2020-11-05 16:41:56 +08:00
|
|
|
|
|
|
|
int buffer_size = ALIGN_UP4(co);
|
|
|
|
if(mOpenCLBackend->getOpenCLRuntime()->isWeightCpuTransHalf()) {
|
|
|
|
buffer_size *= sizeof(half_float::half);
|
|
|
|
} else {
|
|
|
|
buffer_size *= sizeof(float);
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
std::shared_ptr<cl::Buffer> biasBuffer(
|
2020-11-05 16:41:56 +08:00
|
|
|
new cl::Buffer(runTime->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, buffer_size));
|
2019-12-27 22:16:57 +08:00
|
|
|
|
2019-08-15 17:30:39 +08:00
|
|
|
cl_int error;
|
2020-11-05 16:41:56 +08:00
|
|
|
auto biasC = queue.enqueueMapBuffer(*biasBuffer, CL_TRUE, CL_MAP_WRITE, 0, buffer_size, nullptr, nullptr, &error);
|
2019-08-15 17:30:39 +08:00
|
|
|
if(biasC != nullptr && error == CL_SUCCESS){
|
2020-11-05 16:41:56 +08:00
|
|
|
if(mOpenCLBackend->getOpenCLRuntime()->isWeightCpuTransHalf()){
|
|
|
|
for(int i=0; i<co; i++) {
|
|
|
|
((half_float::half*)biasC)[i] = (half_float::half)(op->bias()->data()[i]);
|
|
|
|
}
|
|
|
|
for(int i=co; i<ALIGN_UP4(co); i++) {
|
|
|
|
((half_float::half*)biasC)[i] = (half_float::half)(0.0f);
|
|
|
|
}
|
|
|
|
}else{
|
|
|
|
::memset(biasC, 0, buffer_size);
|
|
|
|
::memcpy(biasC, op->bias()->data(), co * sizeof(float));
|
|
|
|
}
|
2019-07-02 18:01:08 +08:00
|
|
|
}else{
|
|
|
|
MNN_ERROR("Map error biasC == nullptr \n");
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
queue.enqueueUnmapMemObject(*biasBuffer, biasC);
|
|
|
|
copyBufferToImage(runTime, *biasBuffer, *mBias, coC4, 1);
|
|
|
|
|
|
|
|
std::shared_ptr<Tensor> sourceWeight(
|
2019-05-05 20:27:57 +08:00
|
|
|
Tensor::create<float>(std::vector<int>{co, ci, ky, kx}, (void*)(filterDataPtr), Tensor::CAFFE));
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
int unit = UNIT;
|
|
|
|
int kernelSize = kx;
|
|
|
|
Math::WinogradGenerater generator(unit, kernelSize, INTERP);
|
|
|
|
int alpha = unit + kernelSize - 1;
|
|
|
|
auto weightDest = generator.allocTransformWeight(sourceWeight.get());
|
|
|
|
generator.transformWeight(weightDest.get(), sourceWeight.get());
|
|
|
|
auto weightDestSize = weightDest->size();
|
2020-11-05 16:41:56 +08:00
|
|
|
|
|
|
|
buffer_size = weightDest->elementSize();
|
|
|
|
if(mOpenCLBackend->getOpenCLRuntime()->isWeightCpuTransHalf()) {
|
|
|
|
buffer_size *= sizeof(half_float::half);
|
|
|
|
} else {
|
|
|
|
buffer_size *= sizeof(float);
|
|
|
|
}
|
|
|
|
cl::Buffer weightBuffer(runTime->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, buffer_size);
|
2019-04-17 10:49:11 +08:00
|
|
|
{
|
2019-08-15 17:30:39 +08:00
|
|
|
cl_int error;
|
2020-11-05 16:41:56 +08:00
|
|
|
auto weightPtr = queue.enqueueMapBuffer(weightBuffer, CL_TRUE, CL_MAP_WRITE, 0, buffer_size, nullptr, nullptr, &error);
|
2019-08-15 17:30:39 +08:00
|
|
|
if(weightPtr != nullptr && error == CL_SUCCESS){
|
2020-11-05 16:41:56 +08:00
|
|
|
if(mOpenCLBackend->getOpenCLRuntime()->isWeightCpuTransHalf()){
|
|
|
|
for(int i=0; i<weightDest->elementSize(); i++) {
|
|
|
|
((half_float::half*)weightPtr)[i] = (half_float::half)(weightDest->host<float>()[i]);
|
|
|
|
}
|
|
|
|
}else{
|
|
|
|
::memcpy(weightPtr, weightDest->host<float>(), buffer_size);
|
|
|
|
}
|
2019-07-02 18:01:08 +08:00
|
|
|
} else{
|
|
|
|
MNN_ERROR("Map error weightPtr == nullptr \n");
|
|
|
|
}
|
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
queue.enqueueUnmapMemObject(weightBuffer, weightPtr);
|
|
|
|
}
|
|
|
|
mWeight.reset(new cl::Image2D(runTime->context(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RGBA, imageChannelType),
|
|
|
|
ciC4 * 4, coC4 * alpha * alpha, 0, nullptr, nullptr));
|
|
|
|
copyBufferToImage(runTime, weightBuffer, *mWeight, ciC4 * 4, coC4 * alpha * alpha);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorCode ConvWinograd::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
|
|
|
|
auto input = inputs[0];
|
|
|
|
auto output = outputs[0];
|
|
|
|
mKernelX = mCommon->kernelX();
|
|
|
|
mKernelY = mCommon->kernelY();
|
|
|
|
mStrideX = mCommon->strideX();
|
|
|
|
mStrideY = mCommon->strideY();
|
|
|
|
mPadMode = mCommon->padMode();
|
2020-06-23 14:02:09 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
int alpha = mCommon->kernelX() + UNIT - 1;
|
|
|
|
auto wUnit = UP_DIV(output->width(), UNIT);
|
|
|
|
auto hUnit = UP_DIV(output->height(), UNIT);
|
2021-06-23 17:20:53 +08:00
|
|
|
|
|
|
|
auto pad = ConvolutionCommon::convolutionPad(inputs[0], outputs[0], mCommon);
|
|
|
|
const int padY = pad.second;
|
|
|
|
const int padX = pad.first;
|
|
|
|
|
2019-05-14 19:54:21 +08:00
|
|
|
auto runTime = mOpenCLBackend->getOpenCLRuntime();
|
2023-06-16 09:42:45 +08:00
|
|
|
startRecord(runTime, mRecording);
|
2019-05-14 19:54:21 +08:00
|
|
|
|
|
|
|
auto bn = backend();
|
2019-04-17 10:49:11 +08:00
|
|
|
mSource.reset(Tensor::createDevice<float>(
|
2023-02-28 10:41:24 +08:00
|
|
|
std::vector<int>{alpha * alpha, input->channel(), hUnit, wUnit}, Tensor::CAFFE_C4));
|
2019-04-17 10:49:11 +08:00
|
|
|
mDest.reset(Tensor::createDevice<float>(
|
2023-02-28 10:41:24 +08:00
|
|
|
std::vector<int>{UP_DIV(output->channel(), 4), wUnit * 4, hUnit, alpha * alpha}, Tensor::CAFFE_C4));
|
2019-05-14 19:54:21 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
bn->onAcquireBuffer(mSource.get(), Backend::DYNAMIC);
|
|
|
|
bn->onAcquireBuffer(mDest.get(), Backend::DYNAMIC);
|
|
|
|
bn->onReleaseBuffer(mSource.get(), Backend::DYNAMIC);
|
|
|
|
bn->onReleaseBuffer(mDest.get(), Backend::DYNAMIC);
|
|
|
|
|
2019-05-14 19:54:21 +08:00
|
|
|
auto icC4 = UP_DIV(input->channel(), 4);
|
|
|
|
auto ocC4 = UP_DIV(output->channel(), 4);
|
2023-02-28 10:41:24 +08:00
|
|
|
|
|
|
|
uint32_t total_num = input->batch();
|
2020-06-23 14:02:09 +08:00
|
|
|
mSourceTransform.resize(total_num);
|
|
|
|
mMatMul.resize(total_num);
|
|
|
|
mDestTransform.resize(total_num);
|
|
|
|
mMaxWGS_S.resize(total_num);
|
|
|
|
mMaxWGS_D.resize(total_num);
|
|
|
|
mMaxWGS_M.resize(total_num);
|
|
|
|
|
|
|
|
std::set<std::string> basic;
|
|
|
|
/*Create Kernel*/
|
2023-02-28 10:41:24 +08:00
|
|
|
for(int i = 0; i < input->batch(); i++) {
|
2020-06-23 14:02:09 +08:00
|
|
|
char format[20];
|
|
|
|
::memset(format, 0, sizeof(format));
|
|
|
|
sprintf(format, "%d_%d_%d", UNIT, mKernelX, INTERP);
|
|
|
|
auto formatStr = std::string(format);
|
|
|
|
mSourceTransform[i] =
|
|
|
|
runTime->buildKernel("winogradTransformSource" + formatStr,
|
|
|
|
"winogradTransformSource", basic);
|
|
|
|
mMaxWGS_S[i] = static_cast<uint32_t>(mOpenCLBackend->getOpenCLRuntime()->getMaxWorkGroupSize(mSourceTransform[i]));
|
|
|
|
{
|
|
|
|
std::set<std::string> buildOptions = basic;
|
|
|
|
if (mCommon->relu()) {
|
|
|
|
buildOptions.emplace("-DRELU");
|
|
|
|
}
|
|
|
|
if (mCommon->relu6()) {
|
|
|
|
buildOptions.emplace("-DRELU6");
|
|
|
|
}
|
|
|
|
mDestTransform[i] =
|
|
|
|
runTime->buildKernel("winogradTransformDest" + formatStr,
|
|
|
|
"winogradTransformDest", buildOptions);
|
|
|
|
mMaxWGS_D[i] = static_cast<uint32_t>(mOpenCLBackend->getOpenCLRuntime()->getMaxWorkGroupSize(mDestTransform[i]));
|
|
|
|
}
|
2023-02-28 10:41:24 +08:00
|
|
|
mMatMul[i] = runTime->buildKernel("gemm", "gemmWinograd", basic);
|
2020-06-23 14:02:09 +08:00
|
|
|
mMaxWGS_M[i] = static_cast<uint32_t>(mOpenCLBackend->getOpenCLRuntime()->getMaxWorkGroupSize(mMatMul[i]));
|
|
|
|
}
|
|
|
|
|
|
|
|
mGWS_S.resize(total_num);
|
|
|
|
mGWS_D.resize(total_num);
|
|
|
|
mGWS_M.resize(total_num);
|
|
|
|
mLWS_S.resize(total_num);
|
|
|
|
mLWS_D.resize(total_num);
|
|
|
|
mLWS_M.resize(total_num);
|
2019-05-14 19:54:21 +08:00
|
|
|
|
2020-06-23 14:02:09 +08:00
|
|
|
for (int b = 0; b < input->batch(); ++b) {
|
2023-02-28 10:41:24 +08:00
|
|
|
mSourceTransform[b].setArg(0, openCLImage(input));
|
|
|
|
mSourceTransform[b].setArg(1, openCLImage(mSource.get()));
|
|
|
|
mSourceTransform[b].setArg(2, wUnit);
|
|
|
|
mSourceTransform[b].setArg(3, hUnit);
|
|
|
|
mSourceTransform[b].setArg(4, padX);
|
|
|
|
mSourceTransform[b].setArg(5, padY);
|
|
|
|
mSourceTransform[b].setArg(6, input->width());
|
|
|
|
mSourceTransform[b].setArg(7, input->height());
|
|
|
|
mSourceTransform[b].setArg(8, icC4);
|
|
|
|
mSourceTransform[b].setArg(9, b);
|
|
|
|
|
|
|
|
mMatMul[b].setArg(0, openCLImage(mSource.get()));
|
|
|
|
mMatMul[b].setArg(1, *mWeight);
|
|
|
|
mMatMul[b].setArg(2, openCLImage(mDest.get()));
|
|
|
|
mMatMul[b].setArg(3, wUnit);
|
|
|
|
mMatMul[b].setArg(4, hUnit);
|
|
|
|
mMatMul[b].setArg(5, ocC4);
|
|
|
|
mMatMul[b].setArg(6, icC4);
|
|
|
|
mMatMul[b].setArg(7, alpha*alpha);
|
|
|
|
|
|
|
|
mDestTransform[b].setArg(0, openCLImage(mDest.get()));
|
|
|
|
mDestTransform[b].setArg(1, *mBias);
|
|
|
|
mDestTransform[b].setArg(2, openCLImage(output));
|
|
|
|
mDestTransform[b].setArg(3, wUnit);
|
|
|
|
mDestTransform[b].setArg(4, hUnit);
|
|
|
|
mDestTransform[b].setArg(5, output->width());
|
|
|
|
mDestTransform[b].setArg(6, output->height());
|
|
|
|
mDestTransform[b].setArg(7, ocC4);
|
|
|
|
mDestTransform[b].setArg(8, b);
|
|
|
|
|
|
|
|
/*Source Transform*/
|
|
|
|
{
|
|
|
|
mGWS_S[b] = {static_cast<uint32_t>(wUnit * hUnit), static_cast<uint32_t>(icC4)};
|
|
|
|
std::string kernelName = "winogradTransformSource";
|
|
|
|
mLWS_S[b] = localWS2DDefault(mGWS_S[b], mMaxWGS_S[b], mOpenCLBackend->getOpenCLRuntime(), kernelName, mSourceTransform[b]).first;
|
2023-06-16 09:42:45 +08:00
|
|
|
recordKernel2d(mSourceTransform[b], mGWS_S[b], mLWS_S[b], mOpenCLBackend->getOpenCLRuntime());
|
2023-02-28 10:41:24 +08:00
|
|
|
}
|
2020-06-23 14:02:09 +08:00
|
|
|
|
2023-02-28 10:41:24 +08:00
|
|
|
/*MatMul*/
|
|
|
|
{
|
|
|
|
auto gemmHeight = ocC4;
|
|
|
|
mGWS_M[b] = {static_cast<uint32_t>(UP_DIV(wUnit, 4) * hUnit), static_cast<uint32_t>(alpha * alpha * ocC4)};
|
|
|
|
std::string kernelName = "gemmWinograd";
|
|
|
|
mLWS_M[b] = localWS2DDefault(mGWS_M[b], mMaxWGS_M[b], mOpenCLBackend->getOpenCLRuntime(), kernelName, mMatMul[b]).first;
|
2023-06-16 09:42:45 +08:00
|
|
|
recordKernel2d(mMatMul[b], mGWS_M[b], mLWS_M[b], mOpenCLBackend->getOpenCLRuntime());
|
2023-02-28 10:41:24 +08:00
|
|
|
}
|
2020-06-23 14:02:09 +08:00
|
|
|
|
2023-02-28 10:41:24 +08:00
|
|
|
// Dest Transform
|
|
|
|
{
|
|
|
|
mGWS_D[b] = {static_cast<uint32_t>(wUnit*hUnit), static_cast<uint32_t>(ocC4)};
|
|
|
|
std::string kernelName = "winogradTransformDest";
|
|
|
|
mLWS_D[b] = localWS2DDefault(mGWS_D[b], mMaxWGS_D[b], mOpenCLBackend->getOpenCLRuntime(), kernelName, mDestTransform[b]).first;
|
2023-06-16 09:42:45 +08:00
|
|
|
recordKernel2d(mDestTransform[b], mGWS_D[b], mLWS_D[b], mOpenCLBackend->getOpenCLRuntime());
|
2020-06-23 14:02:09 +08:00
|
|
|
}
|
|
|
|
}
|
2023-06-16 09:42:45 +08:00
|
|
|
endRecord(runTime, mRecording);
|
2020-06-23 14:02:09 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
return NO_ERROR;
|
|
|
|
}
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
ErrorCode ConvWinograd::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
|
|
|
|
auto input = inputs[0];
|
|
|
|
auto output = outputs[0];
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
|
|
|
int costTime = 0;
|
2023-06-16 09:42:45 +08:00
|
|
|
#else
|
|
|
|
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
|
|
|
mOpenCLBackend->getOpenCLRuntime()->getRecordings()->emplace_back(mRecording);
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
#endif
|
2019-06-05 10:45:59 +08:00
|
|
|
for (int b = 0; b < input->batch(); ++b) {
|
2023-02-28 10:41:24 +08:00
|
|
|
/*Source Transform*/
|
|
|
|
{
|
|
|
|
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
|
|
|
cl::Event event;
|
|
|
|
runKernel2D(mSourceTransform[b], mGWS_S[b], mLWS_S[b],
|
|
|
|
mOpenCLBackend->getOpenCLRuntime(), &event);
|
2020-05-28 19:04:27 +08:00
|
|
|
|
2023-02-28 10:41:24 +08:00
|
|
|
int costTime0 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
|
|
|
costTime += costTime0;
|
|
|
|
MNN_PRINT("kernel cost:%d us ConvWino0\n",costTime0);
|
|
|
|
#else
|
|
|
|
runKernel2D(mSourceTransform[b], mGWS_S[b], mLWS_S[b],
|
|
|
|
mOpenCLBackend->getOpenCLRuntime());
|
|
|
|
#endif
|
|
|
|
}
|
2019-06-05 10:45:59 +08:00
|
|
|
|
2023-02-28 10:41:24 +08:00
|
|
|
/*MatMul*/
|
|
|
|
{
|
|
|
|
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
|
|
|
cl::Event event;
|
|
|
|
runKernel2D(mMatMul[b], mGWS_M[b], mLWS_M[b],
|
|
|
|
mOpenCLBackend->getOpenCLRuntime(), &event);
|
|
|
|
|
|
|
|
int costTime1 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
|
|
|
costTime += costTime1;
|
|
|
|
MNN_PRINT("kernel cost:%d us ConvWino1\n",costTime1);
|
|
|
|
#else
|
|
|
|
runKernel2D(mMatMul[b], mGWS_M[b], mLWS_M[b],
|
|
|
|
mOpenCLBackend->getOpenCLRuntime());
|
|
|
|
#endif
|
|
|
|
}
|
2019-06-05 10:45:59 +08:00
|
|
|
|
2023-02-28 10:41:24 +08:00
|
|
|
// Dest Transform
|
|
|
|
{
|
|
|
|
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
|
|
|
cl::Event event;
|
|
|
|
runKernel2D(mDestTransform[b], mGWS_D[b], mLWS_D[b],
|
|
|
|
mOpenCLBackend->getOpenCLRuntime(), &event);
|
2020-05-28 19:04:27 +08:00
|
|
|
|
2023-02-28 10:41:24 +08:00
|
|
|
int costTime2 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
|
|
|
costTime += costTime2;
|
|
|
|
MNN_PRINT("kernel cost:%d us ConvWino2\n",costTime2);
|
|
|
|
#else
|
|
|
|
runKernel2D(mDestTransform[b], mGWS_D[b], mLWS_D[b],
|
|
|
|
mOpenCLBackend->getOpenCLRuntime());
|
|
|
|
#endif
|
2019-05-14 19:54:21 +08:00
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
|
|
|
MNN_PRINT("kernel cost:%d us ConvWino total\n",costTime);
|
|
|
|
#endif
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace OpenCL
|
|
|
|
} // namespace MNN
|