MNN/source/backend/cpu/CPUAttention.hpp

//
//  CPUAttention.hpp
//  MNN
//
//  Created by MNN on 2024/03/19.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#ifdef MNN_SUPPORT_TRANSFORMER_FUSE

#ifndef CPUATTENTION_HPP
#define CPUATTENTION_HPP

#include <functional>
#include "core/Execution.hpp"
#include "MNN/ErrorCode.hpp"
#include "KVCacheManager.hpp"

namespace MNN {

class CPUAttention : public Execution {
public:
    CPUAttention(Backend *backend, bool kv_cache);
    virtual ~CPUAttention();
    virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
    virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
    virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override;
private:
    bool mIsPrefill      = true;
    bool mIsFirstPrefill = true;
    bool mKVCache        = true;
    int bytes = 4;
    int mThreadNum = 1;;
    int eP, lP, hP, unit;
    int mNumHead, mKvNumHead, mHeadDim;
    std::shared_ptr<Tensor> mPackQ, mPackQKV;
    std::shared_ptr<KVCacheManager> mKVCacheManager = nullptr;
};

} // namespace MNN

#endif // CPUATTENTION_HPP

#endif // MNN_SUPPORT_TRANSFORMER_FUSE
MNN:Sync Sync Internal 2.9.0 2024-05-11 19:17:02 +08:00			`//`
			`// CPUAttention.hpp`
			`// MNN`
			`//`
			`// Created by MNN on 2024/03/19.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`

			`#ifdef MNN_SUPPORT_TRANSFORMER_FUSE`

			`#ifndef CPUATTENTION_HPP`
			`#define CPUATTENTION_HPP`

			`#include <functional>`
			`#include "core/Execution.hpp"`
MNN:Sync: Sync Internal 2.9.4 2024-08-24 15:46:21 +08:00			`#include "MNN/ErrorCode.hpp"`
			`#include "KVCacheManager.hpp"`
MNN:Sync Sync Internal 2.9.0 2024-05-11 19:17:02 +08:00
			`namespace MNN {`

			`class CPUAttention : public Execution {`
			`public:`
			`CPUAttention(Backend *backend, bool kv_cache);`
MNN:Sync: Sync Internal 2.9.4 2024-08-24 15:46:21 +08:00			`virtual ~CPUAttention();`
MNN:Sync Sync Internal 2.9.0 2024-05-11 19:17:02 +08:00			`virtual ErrorCode onResize(const std::vector<Tensor > &inputs, const std::vector<Tensor > &outputs) override;`
			`virtual ErrorCode onExecute(const std::vector<Tensor > &inputs, const std::vector<Tensor > &outputs) override;`
			`virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override;`
			`private:`
MNN:Sync: Sync Internal 2.9.4 2024-08-24 15:46:21 +08:00			`bool mIsPrefill = true;`
MNN:Sync: Sync Internal 2.9.3 2024-07-22 19:51:53 +08:00			`bool mIsFirstPrefill = true;`
MNN:Sync: Sync Internal 2.9.4 2024-08-24 15:46:21 +08:00			`bool mKVCache = true;`
			`int bytes = 4;`
			`int mThreadNum = 1;;`
			`int eP, lP, hP, unit;`
			`int mNumHead, mKvNumHead, mHeadDim;`
MNN:Sync: Sync Internal 2.9.2 2024-07-04 11:53:45 +08:00			`std::shared_ptr<Tensor> mPackQ, mPackQKV;`
MNN:Sync: Sync Internal 2.9.4 2024-08-24 15:46:21 +08:00			`std::shared_ptr<KVCacheManager> mKVCacheManager = nullptr;`
MNN:Sync Sync Internal 2.9.0 2024-05-11 19:17:02 +08:00			`};`
MNN:Sync: Sync Internal 2.9.4 2024-08-24 15:46:21 +08:00
MNN:Sync Sync Internal 2.9.0 2024-05-11 19:17:02 +08:00			`} // namespace MNN`

			`#endif // CPUATTENTION_HPP`
MNN:Sync: Sync Internal 2.9.4 2024-08-24 15:46:21 +08:00
			`#endif // MNN_SUPPORT_TRANSFORMER_FUSE`