2023-02-28 10:41:24 +08:00
|
|
|
//
|
|
|
|
// FloatToInt8Execution.hpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2023/01/03.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
2023-04-18 18:54:46 +08:00
|
|
|
#ifdef ENABLE_CUDA_QUANT
|
2023-02-28 10:41:24 +08:00
|
|
|
|
|
|
|
#ifndef FloatToInt8Execution_hpp
|
|
|
|
#define FloatToInt8Execution_hpp
|
|
|
|
|
|
|
|
#include "core/Execution.hpp"
|
|
|
|
#include "core/TensorUtils.hpp"
|
|
|
|
#include <vector>
|
|
|
|
#include "backend/cuda/core/CUDABackend.hpp"
|
2023-06-16 09:42:45 +08:00
|
|
|
#include "../CastExecution.hpp"
|
2023-02-28 10:41:24 +08:00
|
|
|
|
|
|
|
namespace MNN {
|
|
|
|
namespace CUDA {
|
|
|
|
|
|
|
|
class FloatToInt8Execution : public Execution {
|
|
|
|
public:
|
|
|
|
FloatToInt8Execution(Backend *backend, const std::vector<Tensor *> &inputs, const MNN::Op *param);
|
|
|
|
virtual ~FloatToInt8Execution();
|
|
|
|
|
|
|
|
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
|
|
|
|
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
void* mScales;
|
|
|
|
int8_t mZeroPoint;
|
|
|
|
int8_t mClampMin;
|
|
|
|
int8_t mClampMax;
|
|
|
|
int mClipBits;
|
|
|
|
bool mSingle = false;
|
2023-03-20 11:32:29 +08:00
|
|
|
float mSingleScale;
|
2023-02-28 10:41:24 +08:00
|
|
|
int mChannel;
|
|
|
|
int mCount;
|
|
|
|
int mArea;
|
2023-09-04 10:42:11 +08:00
|
|
|
MemChunk mScaleStorage;
|
2023-02-28 10:41:24 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace CUDA
|
|
|
|
} // namespace MNN
|
2023-04-18 18:54:46 +08:00
|
|
|
#endif /* FloatToInt8Execution_hpp */
|
|
|
|
#endif
|