MNN/source/backend/cuda/execution/int8/FloatToInt8Execution.hpp

//
//  FloatToInt8Execution.hpp
//  MNN
//
//  Created by MNN on 2023/01/03.
//  Copyright © 2018, Alibaba Group Holding Limited
//
#ifdef ENABLE_CUDA_QUANT

#ifndef FloatToInt8Execution_hpp
#define FloatToInt8Execution_hpp

#include "core/Execution.hpp"
#include "core/TensorUtils.hpp"
#include <vector>
#include "backend/cuda/core/CUDABackend.hpp"
#include "../CastExecution.hpp"

namespace MNN {
namespace CUDA {

class FloatToInt8Execution : public Execution {
public:
    FloatToInt8Execution(Backend *backend, const std::vector<Tensor *> &inputs, const MNN::Op *param);
    virtual ~FloatToInt8Execution();

    virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
    virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;

private:
    void* mScales;
    int8_t mZeroPoint;
    int8_t mClampMin;
    int8_t mClampMax;
    int mClipBits;
    bool mSingle = false;
    float mSingleScale;
    int mChannel;
    int mCount;
    int mArea;
    MemChunk mScaleStorage;
};

} // namespace CUDA
} // namespace MNN
#endif /* FloatToInt8Execution_hpp */
#endif
[MNN:Sync] Sync Internal Gitlab 2023-02-28 10:41:24 +08:00			`//`
			`// FloatToInt8Execution.hpp`
			`// MNN`
			`//`
			`// Created by MNN on 2023/01/03.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`
[MNN:Sync] Sync Internal Gitlab 2.4.3 2023-04-18 18:54:46 +08:00			`#ifdef ENABLE_CUDA_QUANT`
[MNN:Sync] Sync Internal Gitlab 2023-02-28 10:41:24 +08:00
			`#ifndef FloatToInt8Execution_hpp`
			`#define FloatToInt8Execution_hpp`

			`#include "core/Execution.hpp"`
			`#include "core/TensorUtils.hpp"`
			`#include <vector>`
			`#include "backend/cuda/core/CUDABackend.hpp"`
[MNN:Sync] Sync Internal 2.5.3 2023-06-16 09:42:45 +08:00			`#include "../CastExecution.hpp"`
[MNN:Sync] Sync Internal Gitlab 2023-02-28 10:41:24 +08:00
			`namespace MNN {`
			`namespace CUDA {`

			`class FloatToInt8Execution : public Execution {`
			`public:`
			`FloatToInt8Execution(Backend backend, const std::vector<Tensor > &inputs, const MNN::Op *param);`
			`virtual ~FloatToInt8Execution();`

			`virtual ErrorCode onResize(const std::vector<Tensor > &inputs, const std::vector<Tensor > &outputs) override;`
			`virtual ErrorCode onExecute(const std::vector<Tensor > &inputs, const std::vector<Tensor > &outputs) override;`

			`private:`
			`void* mScales;`
			`int8_t mZeroPoint;`
			`int8_t mClampMin;`
			`int8_t mClampMax;`
			`int mClipBits;`
			`bool mSingle = false;`
[MNN:Sync] Sync Internal 2.4.1 2023-03-20 11:32:29 +08:00			`float mSingleScale;`
[MNN:Sync] Sync Internal Gitlab 2023-02-28 10:41:24 +08:00			`int mChannel;`
			`int mCount;`
			`int mArea;`
[MNN:Sync] Sync Internal 2.7.0 2023-09-04 10:42:11 +08:00			`MemChunk mScaleStorage;`
[MNN:Sync] Sync Internal Gitlab 2023-02-28 10:41:24 +08:00			`};`

			`} // namespace CUDA`
			`} // namespace MNN`
[MNN:Sync] Sync Internal Gitlab 2.4.3 2023-04-18 18:54:46 +08:00			`#endif /* FloatToInt8Execution_hpp */`
			`#endif`