mirror of https://github.com/alibaba/MNN.git
commit
32f72f4fb9
|
@ -59,8 +59,7 @@ bool convertNCHWBufferToImage(const Tensor *input, Tensor *output, cl::Kernel &b
|
|||
}
|
||||
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
int costTime = (int)runtime->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us inputFormatTransform\n",costTime);
|
||||
runtime->pushEvent({"inputFormatTransform", event});
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
@ -111,8 +110,7 @@ bool convertNHWCBufferToImage(const Tensor *input, Tensor *output, cl::Kernel &b
|
|||
}
|
||||
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
int costTime = (int)runtime->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us inputFormatTransform\n",costTime);
|
||||
runtime->pushEvent({"inputFormatTransform", event});
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
@ -167,8 +165,7 @@ bool convertImageToNCHWBuffer(const Tensor *input, Tensor *output, cl::Kernel &i
|
|||
}
|
||||
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
int costTime = (int)runtime->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us outputFormatTransform\n",costTime);
|
||||
runtime->pushEvent({"outputFormatTransform", event});
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
@ -220,8 +217,7 @@ bool convertNC4HW4BufferToImage(const Tensor *input, Tensor *output, cl::Kernel
|
|||
}
|
||||
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
int costTime = (int)runtime->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us inputFormatTransform\n",costTime);
|
||||
runtime->pushEvent({"inputFormatTransform", event});
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
@ -285,8 +281,7 @@ bool convertImageToNC4HW4Buffer(const Tensor *input, Tensor *output, cl::Kernel
|
|||
}
|
||||
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
int costTime = (int)runtime->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us outputFormatTransform\n",costTime);
|
||||
runtime->pushEvent({"outputFormatTransform", event});
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
@ -341,8 +336,7 @@ bool convertImageToNHWCBuffer(const Tensor *input, Tensor *output, cl::Kernel &i
|
|||
}
|
||||
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
int costTime = (int)runtime->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us outputFormatTransform\n",costTime);
|
||||
runtime->pushEvent({"outputFormatTransform", event});
|
||||
#endif
|
||||
|
||||
return true;
|
||||
|
|
|
@ -514,14 +514,15 @@ void OpenCLBackend::onResizeEnd() {
|
|||
|
||||
void OpenCLBackend::onExecuteBegin() const {
|
||||
mOpenCLRuntime->mQueueCount = 0;
|
||||
mOpenCLRuntime->mKernelTime = 0;
|
||||
mOpenCLRuntime->clearRecord();
|
||||
mOpenCLRuntime->clearEvent();
|
||||
}
|
||||
|
||||
void OpenCLBackend::onExecuteEnd() const {
|
||||
mOpenCLRuntime->mQueueCount = 0;
|
||||
mOpenCLRuntime->clearRecord();
|
||||
mOpenCLRuntime->enqeueRecord();
|
||||
mOpenCLRuntime->printEventTime();
|
||||
}
|
||||
|
||||
|
||||
|
@ -698,7 +699,7 @@ void OpenCLBackend::copyFromDevice(const Tensor* srcTensor, const Tensor* dstTen
|
|||
mOpenCLRuntime->clearRecord();
|
||||
//Convert format
|
||||
mCLRuntime->convertFromDevice(srcTensor, (const Tensor*)&interTensor, data_format, false);
|
||||
|
||||
mOpenCLRuntime->printEventTime();
|
||||
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
mOpenCLRuntime->commandQueue().finish();
|
||||
|
@ -743,10 +744,6 @@ void OpenCLBackend::copyFromDevice(const Tensor* srcTensor, const Tensor* dstTen
|
|||
hostPtr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
MNN_PRINT("total kernel time:%d us\n", (int)mOpenCLRuntime->mKernelTime);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -364,6 +364,7 @@ OpenCLRuntime::~OpenCLRuntime() {
|
|||
#ifdef LOG_VERBOSE
|
||||
MNN_PRINT("start ~OpenCLRuntime !\n");
|
||||
#endif
|
||||
clearEvent();
|
||||
releaseRecord();
|
||||
mBuildProgramMap.clear();
|
||||
mRecordings.clear();
|
||||
|
@ -779,4 +780,24 @@ void OpenCLRuntime::releaseRecord(){
|
|||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void OpenCLRuntime::printEventTime(){
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
if(mEvents.empty()){
|
||||
return;
|
||||
}
|
||||
for(int i = 0; i < mEvents.size(); ++i){
|
||||
auto event = &mEvents[i].second;
|
||||
cl_int res = event->wait();
|
||||
MNN_CHECK_CL_SUCCESS(res, "clEvent");
|
||||
auto StartNanos = event->getProfilingInfo<CL_PROFILING_COMMAND_START>();
|
||||
auto StopNanos = event->getProfilingInfo<CL_PROFILING_COMMAND_END>();
|
||||
auto kernel_time = (unsigned int)((StopNanos - StartNanos) / 1000.0);
|
||||
mKernelTime += kernel_time;
|
||||
MNN_PRINT("kernel time = %d us %s\n", kernel_time, mEvents[i].first.c_str());
|
||||
}
|
||||
mEvents.clear();
|
||||
MNN_PRINT("total kernel time = %d us\n", mKernelTime);
|
||||
#endif
|
||||
}
|
||||
} // namespace MNN
|
||||
|
|
|
@ -113,6 +113,14 @@ public:
|
|||
std::string getDeviceName() {
|
||||
return mDeviceName;
|
||||
}
|
||||
void pushEvent(std::pair<std::string, cl::Event> data) {
|
||||
return mEvents.push_back(data);
|
||||
}
|
||||
void printEventTime();
|
||||
void clearEvent(){
|
||||
mKernelTime = 0;
|
||||
mEvents.clear();
|
||||
}
|
||||
uint64_t maxAllocSize() const;
|
||||
void setCommandQueueProfileEnable();
|
||||
void setCommandQueueProfileDisable();
|
||||
|
@ -181,6 +189,7 @@ private:
|
|||
GpuType mGpuType;
|
||||
MaliAr mMaliAr;
|
||||
float mCLVersion = 1.0f;
|
||||
std::vector<std::pair<std::string, cl::Event>> mEvents;
|
||||
|
||||
#ifdef MNN_OPENCL_SVM_ENABLE
|
||||
cl_device_svm_capabilities mSvmCapabilities;
|
||||
|
|
|
@ -107,8 +107,7 @@ ErrorCode ArgMaxBufExecution::onExecute(const std::vector<Tensor*>& inputs, cons
|
|||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us ArgMax\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ArgMax", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
|
|
@ -23,11 +23,6 @@ ErrorCode CastBufExecution::onResize(const std::vector<Tensor*>& inputs, const s
|
|||
Tensor* output = outputs[0];
|
||||
auto openCLBackend = static_cast<OpenCLBackend*>(backend());
|
||||
auto runtime = openCLBackend->getOpenCLRuntime();
|
||||
#ifdef MNN_SUPPORT_INTEL_SUBGROUP
|
||||
if (runtime->isSupportedIntelSubgroup()) {
|
||||
return SubgrouponResize(inputs, outputs);
|
||||
}
|
||||
#endif /* MNN_SUPPORT_INTEL_SUBGROUP */
|
||||
mKernel = runtime->buildKernel("cast_buf", "cast_buf", mBuildOptions);
|
||||
mMaxWorkGroupSize = static_cast<uint32_t>(runtime->getMaxWorkGroupSize(mKernel));
|
||||
|
||||
|
@ -75,8 +70,7 @@ ErrorCode CastBufExecution::onExecute(const std::vector<Tensor*>& inputs, const
|
|||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Cast\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Cast", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
|
|
@ -674,8 +674,7 @@ ErrorCode ConvBufExecution::onExecute(const std::vector<Tensor *> &inputs, const
|
|||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
cl::Event event;
|
||||
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us ConvBuf2D\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvBuf2D", event});
|
||||
#else
|
||||
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
|
||||
#endif
|
||||
|
|
|
@ -570,9 +570,6 @@ ErrorCode ConvBufWinograd::onExecute(const std::vector<Tensor*>& inputs, const s
|
|||
auto input = inputs[0];
|
||||
auto output = outputs[0];
|
||||
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
int costTime = 0;
|
||||
#endif
|
||||
for (int b = 0; b < input->batch(); ++b) {
|
||||
int index = b;
|
||||
/*Source Transform*/
|
||||
|
@ -581,10 +578,7 @@ ErrorCode ConvBufWinograd::onExecute(const std::vector<Tensor*>& inputs, const s
|
|||
cl::Event event;
|
||||
runKernel2D(mSourceTransform[index], mGWS_S[index], mLWS_S[index],
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime0 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
costTime += costTime0;
|
||||
MNN_PRINT("kernel cost:%d us ConvWino0\n",costTime0);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino0", event});
|
||||
#else
|
||||
runKernel2D(mSourceTransform[index], mGWS_S[index], mLWS_S[index],
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
@ -600,10 +594,7 @@ ErrorCode ConvBufWinograd::onExecute(const std::vector<Tensor*>& inputs, const s
|
|||
} else {
|
||||
runKernel2D(mMatMul[index], mGWS_M[index], mLWS_M[index], mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
}
|
||||
|
||||
int costTime1 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
costTime += costTime1;
|
||||
MNN_PRINT("kernel cost:%d us ConvWino1\n",costTime1);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino1", event});
|
||||
#else
|
||||
if (mUseSubgroup) {
|
||||
run3DKernelDefault(mMatMul[index], mGWS_M[index], mLWS_M[index], mOpenCLBackend->getOpenCLRuntime());
|
||||
|
@ -619,19 +610,13 @@ ErrorCode ConvBufWinograd::onExecute(const std::vector<Tensor*>& inputs, const s
|
|||
cl::Event event;
|
||||
runKernel2D(mDestTransform[index], mGWS_D[index], mLWS_D[index],
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime2 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
costTime += costTime2;
|
||||
MNN_PRINT("kernel cost:%d us ConvWino2\n",costTime2);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino2", event});
|
||||
#else
|
||||
runKernel2D(mDestTransform[index], mGWS_D[index], mLWS_D[index],
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
MNN_PRINT("kernel cost:%d us ConvWino total\n",costTime);
|
||||
#endif
|
||||
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
|
|
@ -397,8 +397,7 @@ ErrorCode ConvSubgroupBuf::onExecute(const std::vector<Tensor *> &inputs, const
|
|||
|
||||
cl::Event event;
|
||||
run3DKernelDefault(mTranseKernel, mTranseGlobalWorkSize, mTranseLocalWorkSize, mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
int costTime0 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us ConvSubgroup transe\n", costTime0);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvSubgroup", event});
|
||||
#else
|
||||
run3DKernelDefault(mTranseKernel, mTranseGlobalWorkSize, mTranseLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
|
||||
#endif
|
||||
|
@ -407,8 +406,7 @@ ErrorCode ConvSubgroupBuf::onExecute(const std::vector<Tensor *> &inputs, const
|
|||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
cl::Event event;
|
||||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us ConvSubgroupBuf2D\n", costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvSubgroupBuf2D", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
|
||||
#endif
|
||||
|
|
|
@ -177,8 +177,8 @@ ErrorCode DeconvBufExecution::onExecute(const std::vector<Tensor *> &inputs, con
|
|||
mOpenCLBackend->getOpenCLRuntime(),
|
||||
&event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us DeconvBuf\n",costTime);
|
||||
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DeconvBuf", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGWS, mLWS,
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
|
|
@ -307,9 +307,7 @@ ErrorCode DepthwiseConvBufExecution::onExecute(const std::vector<Tensor *> &inpu
|
|||
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(),
|
||||
&event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us DepthwiseConvBuf\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseConvBuf", event});
|
||||
#else
|
||||
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
|
|
@ -260,8 +260,8 @@ ErrorCode DepthwiseConvSubgroupBufExecution::onExecute(const std::vector<Tensor
|
|||
|
||||
run3DKernelDefault(mTranseKernel, mTranseGlobalWorkSize, mTranseLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us DepthwiseConvSubgroup transe\n", costTime);
|
||||
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseConvSubgroup transe", event});
|
||||
#else
|
||||
run3DKernelDefault(mTranseKernel, mTranseGlobalWorkSize, mTranseLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
@ -274,8 +274,7 @@ ErrorCode DepthwiseConvSubgroupBufExecution::onExecute(const std::vector<Tensor
|
|||
mOpenCLBackend->getOpenCLRuntime(),
|
||||
&event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us DepthwiseConvSubgroupBuf\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseConvSubgroupBuf", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
|
|
@ -90,9 +90,8 @@ ErrorCode GridSampleBufExecution::onExecute(const std::vector<Tensor *> &inputs,
|
|||
cl::Event event;
|
||||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us GridSample\n", costTime);
|
||||
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"GridSample", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
|
||||
#endif
|
||||
|
|
|
@ -102,8 +102,7 @@ ErrorCode Interp3DBufExecution::onExecute(const std::vector<Tensor *> &inputs, c
|
|||
run3DKernelDefault(mKernel, mGWS, mLWS,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Interp\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Interp", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGWS, mLWS, mOpenCLBackend->getOpenCLRuntime());
|
||||
#endif
|
||||
|
|
|
@ -96,8 +96,7 @@ ErrorCode InterpBufExecution::onExecute(const std::vector<Tensor *> &inputs, con
|
|||
run3DKernelDefault(mKernel, mGWS, mLWS,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Interp\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Interp", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGWS, mLWS, mOpenCLBackend->getOpenCLRuntime());
|
||||
#endif
|
||||
|
|
|
@ -175,8 +175,7 @@ ErrorCode LayerNormBufExecution::onExecute(const std::vector<Tensor *> &inputs,
|
|||
run3DKernelDefault(mKernel, mGWS, mLWS,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us LayerNormBuf\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"LayerNormBuf", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGWS, mLWS, mOpenCLBackend->getOpenCLRuntime());
|
||||
#endif
|
||||
|
|
|
@ -111,9 +111,8 @@ ErrorCode MatMulBufExecution::onExecute(const std::vector<Tensor *> &inputs, con
|
|||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
cl::Event event;
|
||||
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, runtime, &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us MatmulBuf\n",costTime);
|
||||
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"MatmulBuf", event});
|
||||
#else
|
||||
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, runtime, nullptr);
|
||||
#endif
|
||||
|
|
|
@ -254,8 +254,7 @@ ErrorCode PoolBufExecution::onExecute(const std::vector<Tensor *> &inputs, const
|
|||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Pooling\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Pooling", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
|
|
@ -69,8 +69,7 @@ ErrorCode RangeBufExecution::onExecute(const std::vector<Tensor*>& inputs, const
|
|||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Range\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Range", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
|
|
@ -209,8 +209,8 @@ ErrorCode ReductionBufExecution::onExecute(const std::vector<Tensor *> &inputs,
|
|||
cl::Event event;
|
||||
run3DKernelDefault(mReduct1DKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Reduct1D\n",costTime);
|
||||
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Reduct1D", event});
|
||||
#else
|
||||
run3DKernelDefault(mReduct1DKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
|
|
@ -160,8 +160,7 @@ ErrorCode ScaleBufExecution::onExecute(const std::vector<Tensor *> &inputs, cons
|
|||
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Scale\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Scale", event});
|
||||
#else
|
||||
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
|
|
@ -70,8 +70,7 @@ ErrorCode SelectBufExecution::onExecute(const std::vector<Tensor*>& inputs, cons
|
|||
runKernel2D(mKernel, mGlobalWorkSize, mLocalSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Select\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Select", event});
|
||||
#else
|
||||
runKernel2D(mKernel, mGlobalWorkSize, mLocalSize,
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
|
|
@ -140,8 +140,7 @@ ErrorCode SoftmaxBufExecution::onExecute(const std::vector<Tensor *> &inputs, co
|
|||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Softmax\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Softmax", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
|
||||
#endif
|
||||
|
|
|
@ -136,8 +136,7 @@ ErrorCode UnaryBufExecution::onExecute(const std::vector<Tensor*>& inputs, const
|
|||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Unary\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Unary", event});
|
||||
#else
|
||||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
|
|
@ -53,8 +53,7 @@ ErrorCode CommonExecution::onExecute(const std::vector<Tensor *> &inputs, const
|
|||
&event);
|
||||
}
|
||||
|
||||
int costTime = (int)runtime->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us %s%d\n",costTime, EnumNameOpType(mOpType), idx++);
|
||||
runtime->pushEvent({EnumNameOpType(mOpType) + std::to_string(idx++), event});
|
||||
#else
|
||||
if(lws_null == true) {
|
||||
res = runtime->commandQueue().enqueueNDRangeKernel(unit.kernel,
|
||||
|
|
|
@ -562,8 +562,7 @@ ErrorCode ConvExecution::onExecute(const std::vector<Tensor *> &inputs, const st
|
|||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
float costTime = mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%f us Conv UseLocalMem\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Conv UseLocalMem", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
@ -583,8 +582,7 @@ ErrorCode ConvExecution::onExecute(const std::vector<Tensor *> &inputs, const st
|
|||
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Conv2D\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Conv2D", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -352,9 +352,7 @@ ErrorCode ConvWinograd::onExecute(const std::vector<Tensor*>& inputs, const std:
|
|||
auto input = inputs[0];
|
||||
auto output = outputs[0];
|
||||
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
int costTime = 0;
|
||||
#else
|
||||
#ifndef ENABLE_OPENCL_TIME_PROFILER
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
mOpenCLBackend->getOpenCLRuntime()->getRecordings()->emplace_back(mRecording);
|
||||
|
@ -368,10 +366,8 @@ ErrorCode ConvWinograd::onExecute(const std::vector<Tensor*>& inputs, const std:
|
|||
cl::Event event;
|
||||
runKernel2D(mSourceTransform[b], mGWS_S[b], mLWS_S[b],
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime0 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
costTime += costTime0;
|
||||
MNN_PRINT("kernel cost:%d us ConvWino0\n",costTime0);
|
||||
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino0", event});
|
||||
#else
|
||||
runKernel2D(mSourceTransform[b], mGWS_S[b], mLWS_S[b],
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
@ -384,10 +380,8 @@ ErrorCode ConvWinograd::onExecute(const std::vector<Tensor*>& inputs, const std:
|
|||
cl::Event event;
|
||||
runKernel2D(mMatMul[b], mGWS_M[b], mLWS_M[b],
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime1 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
costTime += costTime1;
|
||||
MNN_PRINT("kernel cost:%d us ConvWino1\n",costTime1);
|
||||
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino1", event});
|
||||
#else
|
||||
runKernel2D(mMatMul[b], mGWS_M[b], mLWS_M[b],
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
|
@ -400,19 +394,14 @@ ErrorCode ConvWinograd::onExecute(const std::vector<Tensor*>& inputs, const std:
|
|||
cl::Event event;
|
||||
runKernel2D(mDestTransform[b], mGWS_D[b], mLWS_D[b],
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime2 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
costTime += costTime2;
|
||||
MNN_PRINT("kernel cost:%d us ConvWino2\n",costTime2);
|
||||
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino2", event});
|
||||
#else
|
||||
runKernel2D(mDestTransform[b], mGWS_D[b], mLWS_D[b],
|
||||
mOpenCLBackend->getOpenCLRuntime());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
MNN_PRINT("kernel cost:%d us ConvWino total\n",costTime);
|
||||
#endif
|
||||
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
|
|
@ -178,8 +178,7 @@ ErrorCode DeconvExecution::onExecute(const std::vector<Tensor *> &inputs, const
|
|||
mOpenCLBackend->getOpenCLRuntime(),
|
||||
&event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Deconv\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Deconv", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -165,8 +165,7 @@ ErrorCode DepthwiseConvExecution::onExecute(const std::vector<Tensor *> &inputs,
|
|||
mOpenCLBackend->getOpenCLRuntime(),
|
||||
&event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us DepthwiseConv\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseConv", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -168,8 +168,7 @@ ErrorCode DepthwiseDeconvExecution::onExecute(const std::vector<Tensor *> &input
|
|||
mOpenCLBackend->getOpenCLRuntime(),
|
||||
&event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us DepthwiseDeconv\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseDeconv", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -83,8 +83,7 @@ ErrorCode FuseExecution::onExecute(const std::vector<Tensor *> &inputs, const st
|
|||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Fuse\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Fuse", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -91,9 +91,8 @@ ErrorCode GridSampleExecution::onExecute(const std::vector<Tensor *> &inputs, co
|
|||
cl::Event event;
|
||||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us GridSample\n", costTime);
|
||||
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"GridSample", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -103,8 +103,7 @@ ErrorCode Interp3DExecution::onExecute(const std::vector<Tensor *> &inputs, cons
|
|||
run3DKernelDefault(mKernel, mGWS, mLWS,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Interp3D\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Interp3D", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -95,8 +95,7 @@ ErrorCode InterpExecution::onExecute(const std::vector<Tensor *> &inputs, const
|
|||
run3DKernelDefault(mKernel, mGWS, mLWS,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Interp\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Interp", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -176,8 +176,7 @@ ErrorCode LayerNormExecution::onExecute(const std::vector<Tensor *> &inputs, con
|
|||
run3DKernelDefault(mKernel, mGWS, mLWS,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us LayerNorm\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"LayerNorm", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -115,9 +115,8 @@ ErrorCode MatMulExecution::onExecute(const std::vector<Tensor *> &inputs, const
|
|||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
cl::Event event;
|
||||
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, runtime, &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Matmul\n",costTime);
|
||||
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Matmul", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -151,8 +151,7 @@ ErrorCode PoolExecution::onExecute(const std::vector<Tensor *> &inputs, const st
|
|||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Pooling\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Pooling", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -200,8 +200,7 @@ ErrorCode ReductionExecution::onExecute(const std::vector<Tensor *> &inputs, con
|
|||
#ifdef ENABLE_OPENCL_TIME_PROFILER
|
||||
cl::Event event;
|
||||
run3DKernelDefault(mReduct1DKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Reduct1D\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Reduct1D", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -129,8 +129,7 @@ ErrorCode RoiPooling::onExecute(const std::vector<Tensor *> &inputs, const std::
|
|||
run3DKernelDefault(mKernel, mGWS, mLWS,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us RoiPooling\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"RoiPooling", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -171,8 +171,7 @@ ErrorCode ScaleExecution::onExecute(const std::vector<Tensor *> &inputs, const s
|
|||
cl::Event event;
|
||||
run3DKernelDefault(mKernel, mGWS, mLWS, mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Softmax\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"scale", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -139,8 +139,7 @@ ErrorCode SoftmaxExecution::onExecute(const std::vector<Tensor *> &inputs, const
|
|||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Softmax\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Softmax", event});
|
||||
#else
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
if(mOpenCLBackend->getOpenCLRuntime()->isDevideOpRecord())
|
||||
|
|
|
@ -74,8 +74,7 @@ ErrorCode UnaryExecution::onExecute(const std::vector<Tensor*>& inputs, const st
|
|||
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
|
||||
mOpenCLBackend->getOpenCLRuntime(), &event);
|
||||
|
||||
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
|
||||
MNN_PRINT("kernel cost:%d us Unary\n",costTime);
|
||||
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Unary", event});
|
||||
#else
|
||||
auto openCLBackend = static_cast<OpenCLBackend*>(backend());
|
||||
if(openCLBackend->getOpenCLRuntime()->isUseRecordQueue()){
|
||||
|
|
|
@ -210,32 +210,34 @@ public:
|
|||
// Check Zero for inputs[2]
|
||||
bool zero = false;
|
||||
auto type = inputs[2]->getType();
|
||||
switch (type.code) {
|
||||
case halide_type_int:
|
||||
{
|
||||
if (type.bits == 8) {
|
||||
zero = inputs[2]->host<int8_t>()[0] == 0;
|
||||
} else if (type.bits == 32) {
|
||||
zero = inputs[2]->host<int32_t>()[0] == 0;
|
||||
if (!TensorUtils::getDescribe(inputs[2])->isMutable && inputs[2]->deviceId() == 0) {
|
||||
switch (type.code) {
|
||||
case halide_type_int:
|
||||
{
|
||||
if (type.bits == 8) {
|
||||
zero = inputs[2]->host<int8_t>()[0] == 0;
|
||||
} else if (type.bits == 32) {
|
||||
zero = inputs[2]->host<int32_t>()[0] == 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case halide_type_uint:
|
||||
{
|
||||
if (type.bits == 8) {
|
||||
zero = inputs[2]->host<uint8_t>()[0] == 0;
|
||||
} else if (type.bits == 32) {
|
||||
zero = inputs[2]->host<uint32_t>()[0] == 0;
|
||||
break;
|
||||
case halide_type_uint:
|
||||
{
|
||||
if (type.bits == 8) {
|
||||
zero = inputs[2]->host<uint8_t>()[0] == 0;
|
||||
} else if (type.bits == 32) {
|
||||
zero = inputs[2]->host<uint32_t>()[0] == 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case halide_type_float:
|
||||
{
|
||||
zero = inputs[2]->host<float>()[0] == 0.0f;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case halide_type_float:
|
||||
{
|
||||
zero = inputs[2]->host<float>()[0] == 0.0f;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (zero) {
|
||||
return true;
|
||||
|
|
|
@ -294,5 +294,5 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
REGISTER_SHAPE_INPUTS(StridedSliceComputer, OpType_StridedSlice, (std::vector<int>{1,2,3}));
|
||||
REGISTER_SHAPE_INPUTS(StridedSliceComputer, OpType_StridedSlice, (std::vector<int>{1,2,3,4}));
|
||||
} // namespace MNN
|
||||
|
|
|
@ -44,5 +44,5 @@ class TopKV2SizeComputer : public SizeComputer {
|
|||
}
|
||||
};
|
||||
|
||||
REGISTER_SHAPE_INPUTS(TopKV2SizeComputer, OpType_TopKV2, {1});
|
||||
REGISTER_SHAPE_INPUTS(TopKV2SizeComputer, OpType_TopKV2, (std::vector<int>{1,2}));
|
||||
} // namespace MNN
|
||||
|
|
|
@ -16,6 +16,9 @@ void CastParamsToHalf(std::unique_ptr<MNN::OpT>& op) {
|
|||
case MNN::OpType_Convolution:
|
||||
case MNN::OpType_ConvolutionDepthwise: {
|
||||
auto param = op->main.AsConvolution2D();
|
||||
if (param->quanParameter != nullptr) {
|
||||
break;
|
||||
}
|
||||
const int weightSize = param->weight.size();
|
||||
// const int biasSize = param->bias.size();
|
||||
std::vector<half_float::half> quantizedFp16Weight;
|
||||
|
|
Loading…
Reference in New Issue