Merge pull request #2583 from alibaba/feature/bugfix

Feature/bugfix
This commit is contained in:
jxt1234 2023-09-07 10:01:42 +08:00 committed by GitHub
commit 32f72f4fb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
46 changed files with 125 additions and 167 deletions

View File

@ -59,8 +59,7 @@ bool convertNCHWBufferToImage(const Tensor *input, Tensor *output, cl::Kernel &b
}
#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us inputFormatTransform\n",costTime);
runtime->pushEvent({"inputFormatTransform", event});
#endif
return true;
}
@ -111,8 +110,7 @@ bool convertNHWCBufferToImage(const Tensor *input, Tensor *output, cl::Kernel &b
}
#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us inputFormatTransform\n",costTime);
runtime->pushEvent({"inputFormatTransform", event});
#endif
return true;
}
@ -167,8 +165,7 @@ bool convertImageToNCHWBuffer(const Tensor *input, Tensor *output, cl::Kernel &i
}
#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us outputFormatTransform\n",costTime);
runtime->pushEvent({"outputFormatTransform", event});
#endif
return true;
}
@ -220,8 +217,7 @@ bool convertNC4HW4BufferToImage(const Tensor *input, Tensor *output, cl::Kernel
}
#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us inputFormatTransform\n",costTime);
runtime->pushEvent({"inputFormatTransform", event});
#endif
return true;
}
@ -285,8 +281,7 @@ bool convertImageToNC4HW4Buffer(const Tensor *input, Tensor *output, cl::Kernel
}
#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us outputFormatTransform\n",costTime);
runtime->pushEvent({"outputFormatTransform", event});
#endif
return true;
}
@ -341,8 +336,7 @@ bool convertImageToNHWCBuffer(const Tensor *input, Tensor *output, cl::Kernel &i
}
#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us outputFormatTransform\n",costTime);
runtime->pushEvent({"outputFormatTransform", event});
#endif
return true;

View File

@ -514,14 +514,15 @@ void OpenCLBackend::onResizeEnd() {
void OpenCLBackend::onExecuteBegin() const {
mOpenCLRuntime->mQueueCount = 0;
mOpenCLRuntime->mKernelTime = 0;
mOpenCLRuntime->clearRecord();
mOpenCLRuntime->clearEvent();
}
void OpenCLBackend::onExecuteEnd() const {
mOpenCLRuntime->mQueueCount = 0;
mOpenCLRuntime->clearRecord();
mOpenCLRuntime->enqeueRecord();
mOpenCLRuntime->printEventTime();
}
@ -698,7 +699,7 @@ void OpenCLBackend::copyFromDevice(const Tensor* srcTensor, const Tensor* dstTen
mOpenCLRuntime->clearRecord();
//Convert format
mCLRuntime->convertFromDevice(srcTensor, (const Tensor*)&interTensor, data_format, false);
mOpenCLRuntime->printEventTime();
#ifdef ENABLE_OPENCL_TIME_PROFILER
mOpenCLRuntime->commandQueue().finish();
@ -743,10 +744,6 @@ void OpenCLBackend::copyFromDevice(const Tensor* srcTensor, const Tensor* dstTen
hostPtr = nullptr;
}
}
#ifdef ENABLE_OPENCL_TIME_PROFILER
MNN_PRINT("total kernel time:%d us\n", (int)mOpenCLRuntime->mKernelTime);
#endif
}

View File

@ -364,6 +364,7 @@ OpenCLRuntime::~OpenCLRuntime() {
#ifdef LOG_VERBOSE
MNN_PRINT("start ~OpenCLRuntime !\n");
#endif
clearEvent();
releaseRecord();
mBuildProgramMap.clear();
mRecordings.clear();
@ -779,4 +780,24 @@ void OpenCLRuntime::releaseRecord(){
}
#endif
}
void OpenCLRuntime::printEventTime(){
#ifdef ENABLE_OPENCL_TIME_PROFILER
if(mEvents.empty()){
return;
}
for(int i = 0; i < mEvents.size(); ++i){
auto event = &mEvents[i].second;
cl_int res = event->wait();
MNN_CHECK_CL_SUCCESS(res, "clEvent");
auto StartNanos = event->getProfilingInfo<CL_PROFILING_COMMAND_START>();
auto StopNanos = event->getProfilingInfo<CL_PROFILING_COMMAND_END>();
auto kernel_time = (unsigned int)((StopNanos - StartNanos) / 1000.0);
mKernelTime += kernel_time;
MNN_PRINT("kernel time = %d us %s\n", kernel_time, mEvents[i].first.c_str());
}
mEvents.clear();
MNN_PRINT("total kernel time = %d us\n", mKernelTime);
#endif
}
} // namespace MNN

View File

@ -113,6 +113,14 @@ public:
std::string getDeviceName() {
return mDeviceName;
}
void pushEvent(std::pair<std::string, cl::Event> data) {
return mEvents.push_back(data);
}
void printEventTime();
void clearEvent(){
mKernelTime = 0;
mEvents.clear();
}
uint64_t maxAllocSize() const;
void setCommandQueueProfileEnable();
void setCommandQueueProfileDisable();
@ -181,6 +189,7 @@ private:
GpuType mGpuType;
MaliAr mMaliAr;
float mCLVersion = 1.0f;
std::vector<std::pair<std::string, cl::Event>> mEvents;
#ifdef MNN_OPENCL_SVM_ENABLE
cl_device_svm_capabilities mSvmCapabilities;

View File

@ -107,8 +107,7 @@ ErrorCode ArgMaxBufExecution::onExecute(const std::vector<Tensor*>& inputs, cons
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us ArgMax\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ArgMax", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime());

View File

@ -23,11 +23,6 @@ ErrorCode CastBufExecution::onResize(const std::vector<Tensor*>& inputs, const s
Tensor* output = outputs[0];
auto openCLBackend = static_cast<OpenCLBackend*>(backend());
auto runtime = openCLBackend->getOpenCLRuntime();
#ifdef MNN_SUPPORT_INTEL_SUBGROUP
if (runtime->isSupportedIntelSubgroup()) {
return SubgrouponResize(inputs, outputs);
}
#endif /* MNN_SUPPORT_INTEL_SUBGROUP */
mKernel = runtime->buildKernel("cast_buf", "cast_buf", mBuildOptions);
mMaxWorkGroupSize = static_cast<uint32_t>(runtime->getMaxWorkGroupSize(mKernel));
@ -75,8 +70,7 @@ ErrorCode CastBufExecution::onExecute(const std::vector<Tensor*>& inputs, const
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Cast\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Cast", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime());

View File

@ -674,8 +674,7 @@ ErrorCode ConvBufExecution::onExecute(const std::vector<Tensor *> &inputs, const
#ifdef ENABLE_OPENCL_TIME_PROFILER
cl::Event event;
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us ConvBuf2D\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvBuf2D", event});
#else
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
#endif

View File

@ -570,9 +570,6 @@ ErrorCode ConvBufWinograd::onExecute(const std::vector<Tensor*>& inputs, const s
auto input = inputs[0];
auto output = outputs[0];
#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = 0;
#endif
for (int b = 0; b < input->batch(); ++b) {
int index = b;
/*Source Transform*/
@ -581,10 +578,7 @@ ErrorCode ConvBufWinograd::onExecute(const std::vector<Tensor*>& inputs, const s
cl::Event event;
runKernel2D(mSourceTransform[index], mGWS_S[index], mLWS_S[index],
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime0 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
costTime += costTime0;
MNN_PRINT("kernel cost:%d us ConvWino0\n",costTime0);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino0", event});
#else
runKernel2D(mSourceTransform[index], mGWS_S[index], mLWS_S[index],
mOpenCLBackend->getOpenCLRuntime());
@ -600,10 +594,7 @@ ErrorCode ConvBufWinograd::onExecute(const std::vector<Tensor*>& inputs, const s
} else {
runKernel2D(mMatMul[index], mGWS_M[index], mLWS_M[index], mOpenCLBackend->getOpenCLRuntime(), &event);
}
int costTime1 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
costTime += costTime1;
MNN_PRINT("kernel cost:%d us ConvWino1\n",costTime1);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino1", event});
#else
if (mUseSubgroup) {
run3DKernelDefault(mMatMul[index], mGWS_M[index], mLWS_M[index], mOpenCLBackend->getOpenCLRuntime());
@ -619,19 +610,13 @@ ErrorCode ConvBufWinograd::onExecute(const std::vector<Tensor*>& inputs, const s
cl::Event event;
runKernel2D(mDestTransform[index], mGWS_D[index], mLWS_D[index],
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime2 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
costTime += costTime2;
MNN_PRINT("kernel cost:%d us ConvWino2\n",costTime2);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino2", event});
#else
runKernel2D(mDestTransform[index], mGWS_D[index], mLWS_D[index],
mOpenCLBackend->getOpenCLRuntime());
#endif
}
}
#ifdef ENABLE_OPENCL_TIME_PROFILER
MNN_PRINT("kernel cost:%d us ConvWino total\n",costTime);
#endif
return NO_ERROR;
}

View File

@ -397,8 +397,7 @@ ErrorCode ConvSubgroupBuf::onExecute(const std::vector<Tensor *> &inputs, const
cl::Event event;
run3DKernelDefault(mTranseKernel, mTranseGlobalWorkSize, mTranseLocalWorkSize, mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime0 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us ConvSubgroup transe\n", costTime0);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvSubgroup", event});
#else
run3DKernelDefault(mTranseKernel, mTranseGlobalWorkSize, mTranseLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
#endif
@ -407,8 +406,7 @@ ErrorCode ConvSubgroupBuf::onExecute(const std::vector<Tensor *> &inputs, const
#ifdef ENABLE_OPENCL_TIME_PROFILER
cl::Event event;
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us ConvSubgroupBuf2D\n", costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvSubgroupBuf2D", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
#endif

View File

@ -177,8 +177,8 @@ ErrorCode DeconvBufExecution::onExecute(const std::vector<Tensor *> &inputs, con
mOpenCLBackend->getOpenCLRuntime(),
&event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us DeconvBuf\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DeconvBuf", event});
#else
run3DKernelDefault(mKernel, mGWS, mLWS,
mOpenCLBackend->getOpenCLRuntime());

View File

@ -307,9 +307,7 @@ ErrorCode DepthwiseConvBufExecution::onExecute(const std::vector<Tensor *> &inpu
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(),
&event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us DepthwiseConvBuf\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseConvBuf", event});
#else
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime());

View File

@ -260,8 +260,8 @@ ErrorCode DepthwiseConvSubgroupBufExecution::onExecute(const std::vector<Tensor
run3DKernelDefault(mTranseKernel, mTranseGlobalWorkSize, mTranseLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us DepthwiseConvSubgroup transe\n", costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseConvSubgroup transe", event});
#else
run3DKernelDefault(mTranseKernel, mTranseGlobalWorkSize, mTranseLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime());
@ -274,8 +274,7 @@ ErrorCode DepthwiseConvSubgroupBufExecution::onExecute(const std::vector<Tensor
mOpenCLBackend->getOpenCLRuntime(),
&event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us DepthwiseConvSubgroupBuf\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseConvSubgroupBuf", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime());

View File

@ -91,8 +91,7 @@ ErrorCode GridSampleBufExecution::onExecute(const std::vector<Tensor *> &inputs,
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us GridSample\n", costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"GridSample", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
#endif

View File

@ -102,8 +102,7 @@ ErrorCode Interp3DBufExecution::onExecute(const std::vector<Tensor *> &inputs, c
run3DKernelDefault(mKernel, mGWS, mLWS,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Interp\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Interp", event});
#else
run3DKernelDefault(mKernel, mGWS, mLWS, mOpenCLBackend->getOpenCLRuntime());
#endif

View File

@ -96,8 +96,7 @@ ErrorCode InterpBufExecution::onExecute(const std::vector<Tensor *> &inputs, con
run3DKernelDefault(mKernel, mGWS, mLWS,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Interp\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Interp", event});
#else
run3DKernelDefault(mKernel, mGWS, mLWS, mOpenCLBackend->getOpenCLRuntime());
#endif

View File

@ -175,8 +175,7 @@ ErrorCode LayerNormBufExecution::onExecute(const std::vector<Tensor *> &inputs,
run3DKernelDefault(mKernel, mGWS, mLWS,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us LayerNormBuf\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"LayerNormBuf", event});
#else
run3DKernelDefault(mKernel, mGWS, mLWS, mOpenCLBackend->getOpenCLRuntime());
#endif

View File

@ -112,8 +112,7 @@ ErrorCode MatMulBufExecution::onExecute(const std::vector<Tensor *> &inputs, con
cl::Event event;
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, runtime, &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us MatmulBuf\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"MatmulBuf", event});
#else
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, runtime, nullptr);
#endif

View File

@ -254,8 +254,7 @@ ErrorCode PoolBufExecution::onExecute(const std::vector<Tensor *> &inputs, const
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Pooling\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Pooling", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime());

View File

@ -69,8 +69,7 @@ ErrorCode RangeBufExecution::onExecute(const std::vector<Tensor*>& inputs, const
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Range\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Range", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime());

View File

@ -209,8 +209,8 @@ ErrorCode ReductionBufExecution::onExecute(const std::vector<Tensor *> &inputs,
cl::Event event;
run3DKernelDefault(mReduct1DKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Reduct1D\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Reduct1D", event});
#else
run3DKernelDefault(mReduct1DKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime());

View File

@ -160,8 +160,7 @@ ErrorCode ScaleBufExecution::onExecute(const std::vector<Tensor *> &inputs, cons
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Scale\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Scale", event});
#else
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime());

View File

@ -70,8 +70,7 @@ ErrorCode SelectBufExecution::onExecute(const std::vector<Tensor*>& inputs, cons
runKernel2D(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Select\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Select", event});
#else
runKernel2D(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime());

View File

@ -140,8 +140,7 @@ ErrorCode SoftmaxBufExecution::onExecute(const std::vector<Tensor *> &inputs, co
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Softmax\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Softmax", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
#endif

View File

@ -136,8 +136,7 @@ ErrorCode UnaryBufExecution::onExecute(const std::vector<Tensor*>& inputs, const
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Unary\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Unary", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime());

View File

@ -53,8 +53,7 @@ ErrorCode CommonExecution::onExecute(const std::vector<Tensor *> &inputs, const
&event);
}
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us %s%d\n",costTime, EnumNameOpType(mOpType), idx++);
runtime->pushEvent({EnumNameOpType(mOpType) + std::to_string(idx++), event});
#else
if(lws_null == true) {
res = runtime->commandQueue().enqueueNDRangeKernel(unit.kernel,

View File

@ -562,8 +562,7 @@ ErrorCode ConvExecution::onExecute(const std::vector<Tensor *> &inputs, const st
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
float costTime = mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%f us Conv UseLocalMem\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Conv UseLocalMem", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
@ -583,8 +582,7 @@ ErrorCode ConvExecution::onExecute(const std::vector<Tensor *> &inputs, const st
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Conv2D\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Conv2D", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -352,9 +352,7 @@ ErrorCode ConvWinograd::onExecute(const std::vector<Tensor*>& inputs, const std:
auto input = inputs[0];
auto output = outputs[0];
#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = 0;
#else
#ifndef ENABLE_OPENCL_TIME_PROFILER
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
mOpenCLBackend->getOpenCLRuntime()->getRecordings()->emplace_back(mRecording);
@ -369,9 +367,7 @@ ErrorCode ConvWinograd::onExecute(const std::vector<Tensor*>& inputs, const std:
runKernel2D(mSourceTransform[b], mGWS_S[b], mLWS_S[b],
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime0 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
costTime += costTime0;
MNN_PRINT("kernel cost:%d us ConvWino0\n",costTime0);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino0", event});
#else
runKernel2D(mSourceTransform[b], mGWS_S[b], mLWS_S[b],
mOpenCLBackend->getOpenCLRuntime());
@ -385,9 +381,7 @@ ErrorCode ConvWinograd::onExecute(const std::vector<Tensor*>& inputs, const std:
runKernel2D(mMatMul[b], mGWS_M[b], mLWS_M[b],
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime1 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
costTime += costTime1;
MNN_PRINT("kernel cost:%d us ConvWino1\n",costTime1);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino1", event});
#else
runKernel2D(mMatMul[b], mGWS_M[b], mLWS_M[b],
mOpenCLBackend->getOpenCLRuntime());
@ -401,18 +395,13 @@ ErrorCode ConvWinograd::onExecute(const std::vector<Tensor*>& inputs, const std:
runKernel2D(mDestTransform[b], mGWS_D[b], mLWS_D[b],
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime2 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
costTime += costTime2;
MNN_PRINT("kernel cost:%d us ConvWino2\n",costTime2);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino2", event});
#else
runKernel2D(mDestTransform[b], mGWS_D[b], mLWS_D[b],
mOpenCLBackend->getOpenCLRuntime());
#endif
}
}
#ifdef ENABLE_OPENCL_TIME_PROFILER
MNN_PRINT("kernel cost:%d us ConvWino total\n",costTime);
#endif
return NO_ERROR;
}

View File

@ -178,8 +178,7 @@ ErrorCode DeconvExecution::onExecute(const std::vector<Tensor *> &inputs, const
mOpenCLBackend->getOpenCLRuntime(),
&event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Deconv\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Deconv", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -165,8 +165,7 @@ ErrorCode DepthwiseConvExecution::onExecute(const std::vector<Tensor *> &inputs,
mOpenCLBackend->getOpenCLRuntime(),
&event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us DepthwiseConv\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseConv", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -168,8 +168,7 @@ ErrorCode DepthwiseDeconvExecution::onExecute(const std::vector<Tensor *> &input
mOpenCLBackend->getOpenCLRuntime(),
&event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us DepthwiseDeconv\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseDeconv", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -83,8 +83,7 @@ ErrorCode FuseExecution::onExecute(const std::vector<Tensor *> &inputs, const st
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Fuse\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Fuse", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -92,8 +92,7 @@ ErrorCode GridSampleExecution::onExecute(const std::vector<Tensor *> &inputs, co
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us GridSample\n", costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"GridSample", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -103,8 +103,7 @@ ErrorCode Interp3DExecution::onExecute(const std::vector<Tensor *> &inputs, cons
run3DKernelDefault(mKernel, mGWS, mLWS,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Interp3D\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Interp3D", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -95,8 +95,7 @@ ErrorCode InterpExecution::onExecute(const std::vector<Tensor *> &inputs, const
run3DKernelDefault(mKernel, mGWS, mLWS,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Interp\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Interp", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -176,8 +176,7 @@ ErrorCode LayerNormExecution::onExecute(const std::vector<Tensor *> &inputs, con
run3DKernelDefault(mKernel, mGWS, mLWS,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us LayerNorm\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"LayerNorm", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -116,8 +116,7 @@ ErrorCode MatMulExecution::onExecute(const std::vector<Tensor *> &inputs, const
cl::Event event;
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, runtime, &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Matmul\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Matmul", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -151,8 +151,7 @@ ErrorCode PoolExecution::onExecute(const std::vector<Tensor *> &inputs, const st
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Pooling\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Pooling", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -200,8 +200,7 @@ ErrorCode ReductionExecution::onExecute(const std::vector<Tensor *> &inputs, con
#ifdef ENABLE_OPENCL_TIME_PROFILER
cl::Event event;
run3DKernelDefault(mReduct1DKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Reduct1D\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Reduct1D", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -129,8 +129,7 @@ ErrorCode RoiPooling::onExecute(const std::vector<Tensor *> &inputs, const std::
run3DKernelDefault(mKernel, mGWS, mLWS,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us RoiPooling\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"RoiPooling", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -171,8 +171,7 @@ ErrorCode ScaleExecution::onExecute(const std::vector<Tensor *> &inputs, const s
cl::Event event;
run3DKernelDefault(mKernel, mGWS, mLWS, mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Softmax\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"scale", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -139,8 +139,7 @@ ErrorCode SoftmaxExecution::onExecute(const std::vector<Tensor *> &inputs, const
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Softmax\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Softmax", event});
#else
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())

View File

@ -74,8 +74,7 @@ ErrorCode UnaryExecution::onExecute(const std::vector<Tensor*>& inputs, const st
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Unary\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Unary", event});
#else
auto openCLBackend = static_cast<OpenCLBackend*>(backend());
if(openCLBackend->getOpenCLRuntime()->isUseRecordQueue()){

View File

@ -210,6 +210,7 @@ public:
// Check Zero for inputs[2]
bool zero = false;
auto type = inputs[2]->getType();
if (!TensorUtils::getDescribe(inputs[2])->isMutable && inputs[2]->deviceId() == 0) {
switch (type.code) {
case halide_type_int:
{
@ -237,6 +238,7 @@ public:
default:
break;
}
}
if (zero) {
return true;
}

View File

@ -294,5 +294,5 @@ public:
}
};
REGISTER_SHAPE_INPUTS(StridedSliceComputer, OpType_StridedSlice, (std::vector<int>{1,2,3}));
REGISTER_SHAPE_INPUTS(StridedSliceComputer, OpType_StridedSlice, (std::vector<int>{1,2,3,4}));
} // namespace MNN

View File

@ -44,5 +44,5 @@ class TopKV2SizeComputer : public SizeComputer {
}
};
REGISTER_SHAPE_INPUTS(TopKV2SizeComputer, OpType_TopKV2, {1});
REGISTER_SHAPE_INPUTS(TopKV2SizeComputer, OpType_TopKV2, (std::vector<int>{1,2}));
} // namespace MNN

View File

@ -16,6 +16,9 @@ void CastParamsToHalf(std::unique_ptr<MNN::OpT>& op) {
case MNN::OpType_Convolution:
case MNN::OpType_ConvolutionDepthwise: {
auto param = op->main.AsConvolution2D();
if (param->quanParameter != nullptr) {
break;
}
const int weightSize = param->weight.size();
// const int biasSize = param->bias.size();
std::vector<half_float::half> quantizedFp16Weight;