2020-11-05 16:41:56 +08:00
|
|
|
//
|
|
|
|
// ConvSingleInputExecution.cpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2020/08/22.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
|
|
|
#include "ConvSingleInputExecution.hpp"
|
2022-08-12 10:30:48 +08:00
|
|
|
#include "ConvWinogradExecution.hpp"
|
2024-02-29 16:21:40 +08:00
|
|
|
#include "ConvImplicitExecution.hpp"
|
2022-08-12 10:30:48 +08:00
|
|
|
#include "ConvCutlassExecution.hpp"
|
2023-04-11 11:12:00 +08:00
|
|
|
#include "MultiInputConvExecution.hpp"
|
2023-04-18 18:54:46 +08:00
|
|
|
#ifdef ENABLE_CUDA_QUANT
|
2023-02-28 10:41:24 +08:00
|
|
|
#include "int8/ConvInt8CutlassExecution.hpp"
|
2023-04-18 18:54:46 +08:00
|
|
|
#endif
|
2024-04-19 11:58:21 +08:00
|
|
|
#ifdef MNN_LOW_MEMORY
|
|
|
|
#include "weight_only_quant/ConvFpAIntBExecution.hpp"
|
|
|
|
#endif
|
2023-06-16 09:42:45 +08:00
|
|
|
#include "bf16/ConvCutlassBf16Execution.hpp"
|
2022-09-30 10:02:52 +08:00
|
|
|
#include "backend/cuda/core/CUDATools.hpp"
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
namespace MNN {
|
|
|
|
namespace CUDA {
|
|
|
|
|
|
|
|
class CUDAConvolutionCreator : public CUDABackend::Creator {
|
|
|
|
public:
|
2022-09-30 10:02:52 +08:00
|
|
|
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
2020-11-05 16:41:56 +08:00
|
|
|
const MNN::Op* op, Backend* backend) const override {
|
|
|
|
if (nullptr != op->main_as_Convolution2D()->quanParameter()) {
|
|
|
|
auto quan = op->main_as_Convolution2D()->quanParameter();
|
|
|
|
if (1 == quan->type() || 2 == quan->type()) {
|
2021-11-30 10:10:53 +08:00
|
|
|
if (quan->has_scaleInt()) {
|
|
|
|
// Don't support IDST-int8 because of error
|
|
|
|
return nullptr;
|
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
|
|
|
}
|
2022-08-12 10:30:48 +08:00
|
|
|
|
2024-04-19 11:58:21 +08:00
|
|
|
#ifdef MNN_LOW_MEMORY
|
|
|
|
auto conv2dParams = op->main_as_Convolution2D();
|
|
|
|
bool isMemoryLowWeightOnlyQuant = (conv2dParams->quanParameter() != nullptr && conv2dParams->quanParameter()->buffer() != nullptr);
|
|
|
|
isMemoryLowWeightOnlyQuant = isMemoryLowWeightOnlyQuant && (static_cast<CUDABackend*>(backend)->getMemoryMode() == BackendConfig::Memory_Low);
|
|
|
|
isMemoryLowWeightOnlyQuant = isMemoryLowWeightOnlyQuant && ConvFpAIntBExecution::isValid(op->main_as_Convolution2D(), backend);
|
|
|
|
if (isMemoryLowWeightOnlyQuant) {
|
|
|
|
std::shared_ptr<ConvFpAIntBExecution::Resource> resource(new ConvFpAIntBExecution::Resource(backend, op));
|
|
|
|
return new ConvFpAIntBExecution(backend, op, resource);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2023-04-11 11:12:00 +08:00
|
|
|
if (inputs.size() == 2 || inputs.size() == 3) {
|
|
|
|
return new MultiInputConvExecution(op, backend);
|
|
|
|
}
|
|
|
|
|
2022-08-12 10:30:48 +08:00
|
|
|
auto conv = op->main_as_Convolution2D()->common();
|
2024-02-29 16:21:40 +08:00
|
|
|
if(ConvImplicitExecution::isValid(op->main_as_Convolution2D(), inputs[0], outputs[0], backend)) { // inputs[0] is invalid now.
|
|
|
|
std::shared_ptr<ConvImplicitExecution::Resource> resource(new ConvImplicitExecution::Resource(backend, op));
|
|
|
|
return new ConvImplicitExecution(backend, op, resource);
|
|
|
|
}
|
2022-09-30 10:02:52 +08:00
|
|
|
if(ConvWinogradExecution::isValid(op->main_as_Convolution2D())) { // inputs[0] is invalid now.
|
2022-08-12 10:30:48 +08:00
|
|
|
//printf("%dx%ds%dd%d\n", conv->kernelX(), conv->kernelY(), conv->strideX(), conv->dilateX());
|
|
|
|
|
|
|
|
std::shared_ptr<ConvWinogradExecution::Resource> resource(new ConvWinogradExecution::Resource(backend, op));
|
|
|
|
return new ConvWinogradExecution(backend, op, resource);
|
|
|
|
}
|
|
|
|
|
2023-07-05 11:44:25 +08:00
|
|
|
#ifdef ENABLE_CUDA_BF16
|
2023-06-16 09:42:45 +08:00
|
|
|
if (static_cast<CUDABackend*>(backend)->getPrecision() == 3) {
|
|
|
|
std::shared_ptr<ConvCutlassBf16Execution::Resource> resource(new ConvCutlassBf16Execution::Resource(backend, op));
|
|
|
|
return new ConvCutlassBf16Execution(backend, op, resource);
|
|
|
|
}
|
2023-07-05 11:44:25 +08:00
|
|
|
#endif
|
2023-12-04 11:12:20 +08:00
|
|
|
|
2022-08-12 10:30:48 +08:00
|
|
|
std::shared_ptr<ConvCutlassExecution::Resource> resource(new ConvCutlassExecution::Resource(backend, op));
|
|
|
|
return new ConvCutlassExecution(backend, op, resource);
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2023-04-18 18:54:46 +08:00
|
|
|
#ifdef ENABLE_CUDA_QUANT
|
2023-02-28 10:41:24 +08:00
|
|
|
class CUDAConvolutionInt8Creator : public CUDABackend::Creator {
|
|
|
|
public:
|
|
|
|
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
|
|
|
const MNN::Op* op, Backend* backend) const override {
|
|
|
|
std::shared_ptr<ConvInt8CutlassExecution::Resource> resource(new ConvInt8CutlassExecution::Resource(backend, op));
|
|
|
|
return new ConvInt8CutlassExecution(backend, op, resource);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
CUDACreatorRegister<CUDAConvolutionInt8Creator> __ConvInt8Execution(OpType_ConvInt8);
|
2023-04-18 18:54:46 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
CUDACreatorRegister<CUDAConvolutionCreator> __ConvExecution(OpType_Convolution);
|
2020-11-05 16:41:56 +08:00
|
|
|
|
|
|
|
}// namespace CUDA
|
2021-11-30 10:10:53 +08:00
|
|
|
}// namespace MNN
|