MNN/source/backend/cuda/execution/int8/FloatToInt8Execution.hpp

47 lines
1.1 KiB
C++
Raw Normal View History

2023-02-28 10:41:24 +08:00
//
// FloatToInt8Execution.hpp
// MNN
//
// Created by MNN on 2023/01/03.
// Copyright © 2018, Alibaba Group Holding Limited
//
2023-04-18 18:54:46 +08:00
#ifdef ENABLE_CUDA_QUANT
2023-02-28 10:41:24 +08:00
#ifndef FloatToInt8Execution_hpp
#define FloatToInt8Execution_hpp
#include "core/Execution.hpp"
#include "core/TensorUtils.hpp"
#include <vector>
#include "backend/cuda/core/CUDABackend.hpp"
2023-06-16 09:42:45 +08:00
#include "../CastExecution.hpp"
2023-02-28 10:41:24 +08:00
namespace MNN {
namespace CUDA {
class FloatToInt8Execution : public Execution {
public:
FloatToInt8Execution(Backend *backend, const std::vector<Tensor *> &inputs, const MNN::Op *param);
virtual ~FloatToInt8Execution();
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
private:
void* mScales;
int8_t mZeroPoint;
int8_t mClampMin;
int8_t mClampMax;
int mClipBits;
bool mSingle = false;
2023-03-20 11:32:29 +08:00
float mSingleScale;
2023-02-28 10:41:24 +08:00
int mChannel;
int mCount;
int mArea;
2023-09-04 10:42:11 +08:00
MemChunk mScaleStorage;
2023-02-28 10:41:24 +08:00
};
} // namespace CUDA
} // namespace MNN
2023-04-18 18:54:46 +08:00
#endif /* FloatToInt8Execution_hpp */
#endif