MNN/source/backend/cpu/compute/Convolution1x1Strassen.cpp

121 lines
5.1 KiB
C++
Raw Normal View History

2019-04-17 10:49:11 +08:00
//
// Convolution1x1Strassen.cpp
// MNN
//
// Created by MNN on 2019/02/12.
// Copyright © 2018, Alibaba Group Holding Limited
//
2020-02-26 09:57:17 +08:00
#include "Convolution1x1Strassen.hpp"
2019-04-17 10:49:11 +08:00
#include <string.h>
2019-12-27 22:16:57 +08:00
#include "core/BufferAllocator.hpp"
#include "backend/cpu/CPUBackend.hpp"
2020-02-26 09:57:17 +08:00
#include "CommonOptFunction.h"
2019-12-27 22:16:57 +08:00
#include "core/Concurrency.h"
2020-02-26 09:57:17 +08:00
#include "ConvOpt.h"
2019-12-27 22:16:57 +08:00
#include "core/Macro.h"
2019-04-17 10:49:11 +08:00
namespace MNN {
void Convolution1x1Strassen::_init(const Convolution2DCommon *common, Backend *b, const float *originWeight, size_t originWeightSize, const float *bias, size_t biasSize) {
2019-04-17 10:49:11 +08:00
mPostFunction = CPUConvolution::getPostFunction();
auto outputCount = (int)biasSize;
auto mSrcCount = (int)originWeightSize / outputCount;
int ePack, lPack, hPack;
MNNGetMatMulPackMode(&ePack, &lPack, &hPack);
mWeight.reset(Tensor::createDevice<float>(std::vector<int>{UP_DIV(outputCount, hPack), UP_DIV(mSrcCount, lPack), lPack * hPack}));
mValid = b->onAcquireBuffer(mWeight.get(), Backend::STATIC);
2019-04-17 10:49:11 +08:00
if (!mValid) {
MNN_ERROR("Not Enough Memory\n");
return;
}
MNNPackForMatMul_B(mWeight->host<float>(), originWeight, outputCount, mSrcCount, true);
2019-04-17 10:49:11 +08:00
mBias.reset(Tensor::createDevice<float>(std::vector<int>{UP_DIV(outputCount, 4), 4}));
mValid = b->onAcquireBuffer(mBias.get(), Backend::STATIC);
if (!mValid) {
MNN_ERROR("Not Enough Memory\n");
return;
}
::memset(mBias->host<float>(), 0, mBias->size());
::memcpy(mBias->host<float>(), bias, biasSize * sizeof(float));
mStracssenComputor.reset(new StrassenMatrixComputor(b, true, 5));
}
Convolution1x1Strassen::Convolution1x1Strassen(const Convolution2DCommon *common, Backend *b, const float *originWeight,
size_t originWeightSize, const float *bias, size_t biasSize)
: CPUConvolution(common, b) {
_init(common, b, originWeight, originWeightSize, bias, biasSize);
2019-04-17 10:49:11 +08:00
}
Convolution1x1Strassen::~Convolution1x1Strassen() {
if (nullptr != mWeight) {
backend()->onReleaseBuffer(mWeight.get(), Backend::STATIC);
}
backend()->onReleaseBuffer(mBias.get(), Backend::STATIC);
}
ErrorCode Convolution1x1Strassen::onReleaseCache() {
return NO_ERROR;
}
ErrorCode Convolution1x1Strassen::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
CPUConvolution::onResize(inputs, outputs);
auto input = inputs[0];
auto output = outputs[0];
auto ic = input->channel();
auto oc = output->channel();
2019-04-17 10:49:11 +08:00
auto outputPlane = output->height() * output->width();
auto e = outputPlane;
auto l = ic;
auto h = oc;
2019-04-17 10:49:11 +08:00
mTempOutputBatch.reset();
mTempInputBatch.reset();
int ePack, lPack, hPack;
MNNGetMatMulPackMode(&ePack, &lPack, &hPack);
mTempInputPack.reset(Tensor::createDevice<float>({UP_DIV(e, ePack), UP_DIV(l, lPack), ePack * lPack}));
mTempOutputPack.reset(Tensor::createDevice<float>({UP_DIV(e, ePack), UP_DIV(h, hPack), ePack * hPack}));
mTempInputBatch.reset(Tensor::createDevice<float>({l, e}));
mTempOutputBatch.reset(Tensor::createDevice<float>({h, e}));
bool res = backend()->onAcquireBuffer(mTempInputBatch.get(), Backend::DYNAMIC);
res = res && backend()->onAcquireBuffer(mTempInputPack.get(), Backend::DYNAMIC);
res = res && backend()->onAcquireBuffer(mTempOutputPack.get(), Backend::DYNAMIC);
res = res && backend()->onAcquireBuffer(mTempOutputBatch.get(), Backend::DYNAMIC);
if (!res) {
return OUT_OF_MEMORY;
2019-04-17 10:49:11 +08:00
}
mStracssenComputor->onReset();
auto code = mStracssenComputor->onEncode({mTempInputPack.get(), mWeight.get()}, {mTempOutputPack.get()});
if (NO_ERROR != code) {
return code;
2019-04-17 10:49:11 +08:00
}
res = backend()->onReleaseBuffer(mTempInputBatch.get(), Backend::DYNAMIC);
res = res && backend()->onReleaseBuffer(mTempInputPack.get(), Backend::DYNAMIC);
res = res && backend()->onReleaseBuffer(mTempOutputPack.get(), Backend::DYNAMIC);
res = res && backend()->onReleaseBuffer(mTempOutputBatch.get(), Backend::DYNAMIC);
2019-04-17 10:49:11 +08:00
return NO_ERROR;
}
ErrorCode Convolution1x1Strassen::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
auto input = inputs[0];
auto output = outputs[0];
auto ic = input->channel();
auto oc = output->channel();
auto outputPlane = output->height() * output->width();
auto e = outputPlane;
auto l = ic;
auto h = oc;
auto ocC4 = UP_DIV(oc, 4);
2019-04-17 10:49:11 +08:00
for (int batchIndex = 0; batchIndex < input->batch(); ++batchIndex) {
MNNPackTranspose(mTempInputBatch->host<float>(), input->host<float>() + batchIndex * input->stride(0), e, l);
MNNPackForMatMul_A(mTempInputPack->host<float>(), mTempInputBatch->host<float>(), e, l, false);
mStracssenComputor->onExecute();
MNNUnpackForMatMul_C(mTempOutputBatch->host<float>(), mTempOutputPack->host<float>(), e, h);
MNNUnpackTranspose(output->host<float>() + batchIndex * output->stride(0), mTempOutputBatch->host<float>(), outputPlane, oc);
mPostFunction(output->host<float>() + batchIndex * output->stride(0), mBias->host<float>(), outputPlane, ocC4);
2019-04-17 10:49:11 +08:00
}
return NO_ERROR;
}
} // namespace MNN