2019-04-17 10:49:11 +08:00
|
|
|
//
|
|
|
|
// MetalConvolutionCommon.mm
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2019/02/25.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
2019-12-27 22:16:57 +08:00
|
|
|
#import "backend/metal/MetalConvolutionCommon.hpp"
|
|
|
|
#import "core/Macro.h"
|
|
|
|
#import "backend/metal/MetalBackend.hpp"
|
|
|
|
#import "backend/metal/MetalConvolution1x1.hpp"
|
|
|
|
#import "backend/metal/MetalConvolutionWinograd.hpp"
|
|
|
|
#import "core/TensorUtils.hpp"
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
#if MNN_METAL_ENABLED
|
|
|
|
namespace MNN {
|
|
|
|
|
|
|
|
static id<MTLBuffer> biasForConv(MNNMetalContext *context, const Convolution2D *conv) {
|
|
|
|
auto bias = conv->bias();
|
|
|
|
auto oc = conv->common()->outputCount();
|
2023-03-20 11:32:29 +08:00
|
|
|
auto bias_size = UP_DIV(oc, 16) * 16 * sizeof(metal_float);
|
|
|
|
auto buffer = [context newDeviceBuffer:bias_size access:CPUWriteOnly];
|
2019-04-17 10:49:11 +08:00
|
|
|
auto src = bias->data();
|
|
|
|
auto dst = (metal_float *)buffer.contents;
|
2023-03-20 11:32:29 +08:00
|
|
|
::memset(dst, 0, bias_size);
|
2019-04-17 10:49:11 +08:00
|
|
|
#pragma clang loop vectorize(enable) unroll(enable)
|
|
|
|
for (int i = 0; i < oc; i++) {
|
|
|
|
dst[i] = src[i];
|
|
|
|
}
|
|
|
|
return buffer;
|
|
|
|
}
|
|
|
|
|
2021-11-30 10:10:53 +08:00
|
|
|
MetalConvolutionCommon::MetalConvolutionCommon(Backend *backend, const MNN::Op *op) : Execution(backend) {
|
2019-04-17 10:49:11 +08:00
|
|
|
auto context = (__bridge MNNMetalContext *)static_cast<MetalBackend *>(backend)->context();
|
|
|
|
auto conv = op->main_as_Convolution2D();
|
|
|
|
auto common = conv->common();
|
2021-09-18 15:52:30 +08:00
|
|
|
mOp = op;
|
2019-04-17 10:49:11 +08:00
|
|
|
mDepthwise = op->type() == OpType_ConvolutionDepthwise;
|
|
|
|
mGroups = common->group();
|
|
|
|
mKernelX = common->kernelX();
|
|
|
|
mKernelY = common->kernelY();
|
|
|
|
mPadMode = common->padMode();
|
|
|
|
mPadX = common->padX();
|
|
|
|
mPadY = common->padY();
|
|
|
|
mStrideX = common->strideX();
|
|
|
|
mStrideY = common->strideY();
|
|
|
|
mDilateX = common->dilateX();
|
|
|
|
mDilateY = common->dilateY();
|
|
|
|
mBias = biasForConv(context, conv);
|
|
|
|
mActivationType = common->relu() ? 1 : (common->relu6() ? 2 : 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorCode MetalConvolutionCommon::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorCode MetalConvolutionCommon::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
2020-11-05 16:41:56 +08:00
|
|
|
return onFloat(inputs[0], outputs[0]);
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename FType, typename TType>
|
|
|
|
static id<MTLBuffer> weightInBlock(MNNMetalContext *context, int group, int oc, int ic, int kh, int kw,
|
|
|
|
const FType *src) {
|
|
|
|
auto goc = oc / group;
|
|
|
|
auto gic = ic / group;
|
|
|
|
auto goc_4 = UP_DIV(goc, 4);
|
|
|
|
auto gic_4 = UP_DIV(gic, 4);
|
2023-03-20 11:32:29 +08:00
|
|
|
auto weight_len = group * ROUND_UP(goc_4, 4) * gic_4 * kw * kh * 16 * sizeof(TType);
|
|
|
|
auto buffer = [context newDeviceBuffer:weight_len access:CPUWriteOnly];
|
2019-04-17 10:49:11 +08:00
|
|
|
auto dst = (TType *)buffer.contents;
|
2023-03-20 11:32:29 +08:00
|
|
|
::memset(dst, 0, weight_len);
|
2019-04-17 10:49:11 +08:00
|
|
|
for (int g = 0; g < group; g++) {
|
|
|
|
auto g_dst = dst + g * goc_4 * gic_4 * kh * kw * 16; // g
|
|
|
|
for (int o = 0; o < goc; o++) {
|
|
|
|
auto zo = o / 4, ro = o % 4;
|
|
|
|
auto o_dst = g_dst + zo * gic_4 * kh * kw * 16 + ro * 4; // o/4 x 4
|
|
|
|
for (int i = 0; i < gic; i++) {
|
|
|
|
auto zi = i / 4, ri = i % 4;
|
|
|
|
auto i_dst = o_dst + zi * kh * kw * 16 + ri; // i/4 x 4
|
|
|
|
for (int h = 0; h < kh; h++) {
|
|
|
|
for (int w = 0; w < kw; w++) {
|
|
|
|
// to [g][o/4][i/4][h][w][16]
|
|
|
|
// from [g][o][i][h][w]
|
|
|
|
i_dst[(h * kw + w) * 16] = *src++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
void MetalConvolutionCommon::loadWeight(const MNN::Convolution2D *conv) {
|
2020-03-12 16:07:38 +08:00
|
|
|
std::shared_ptr<ConvolutionCommon::Int8Common> qnt = NULL;
|
2019-04-17 10:49:11 +08:00
|
|
|
if (conv->quanParameter()) {
|
2023-09-20 20:16:25 +08:00
|
|
|
qnt = ConvolutionCommon::load(conv, backend(), true);
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
mWeight = weightForConv(conv, qnt.get(), mDepthwise);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
id<MTLBuffer> MetalConvolutionCommon::weightForFloat(int group, int oc, int ic, int kh, int kw, const float *src) {
|
|
|
|
auto backend = static_cast<MetalBackend *>(this->backend());
|
|
|
|
auto context = (__bridge MNNMetalContext *)static_cast<MetalBackend *>(backend)->context();
|
|
|
|
return weightInBlock<float, metal_float>(context, group, oc, ic, kh, kw, src);
|
2021-09-18 15:52:30 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 16:07:38 +08:00
|
|
|
id<MTLBuffer> MetalConvolutionCommon::weightForConv(const Convolution2D *conv, ConvolutionCommon::Int8Common *qnt,
|
2019-04-17 10:49:11 +08:00
|
|
|
bool depthwise) {
|
|
|
|
// param
|
|
|
|
auto size = qnt ? MAX(qnt->weight.size(), qnt->weightFloat.size()) : conv->weight()->size();
|
|
|
|
auto common = conv->common();
|
|
|
|
auto kw = common->kernelX();
|
|
|
|
auto kh = common->kernelY();
|
|
|
|
auto group = common->group();
|
|
|
|
auto oc = common->outputCount();
|
|
|
|
auto ic = size / kw / kh / (oc / group);
|
|
|
|
|
|
|
|
// convert
|
2020-11-05 16:41:56 +08:00
|
|
|
if (qnt && qnt->weightFloat.size() > 0) {
|
2019-04-17 10:49:11 +08:00
|
|
|
return weightForFloat(group, oc, ic, kh, kw, qnt->weightFloat.get());
|
|
|
|
} else {
|
|
|
|
return weightForFloat(group, oc, ic, kh, kw, conv->weight()->data());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} // namespace MNN
|
|
|
|
|
|
|
|
#endif
|