MNN/codegen/cpu/CPUPluginModule.cpp

292 lines
12 KiB
C++

//
// CPUPluginModule.cpp
// MNNCodegen
//
// Created by MNN on 2020/12/29.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <string>
#include <vector>
#include <fstream>
#include <unordered_map>
#include "core/TensorUtils.hpp"
#include "MNN_generated.h"
#include "cpu/CPUAst.hpp"
#ifdef MNN_CODEGEN_LLVM
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#endif
using namespace AST;
using namespace MNN;
std::vector<std::vector<Node*>> spliteNodes(std::vector<Node*>& nodes) {
std::vector<std::vector<Node*>> res;
res.push_back(std::vector<Node*>());
for (auto node : nodes) {
if (isElemWise(node->cmd->op->type())) {
res.back().push_back(node);
} else {
if (res.back().empty()) {
res.back().push_back(node);
} else {
res.push_back({node});
}
res.push_back(std::vector<Node*>());
}
}
if (res.back().empty()) {
res.pop_back();
}
return res;
}
class CPUPluginModule::CPUPluginFunction {
public:
CPUPluginFunction(std::vector<Node*>& nodes, int idx) : nodes(nodes) {
sort(nodes.begin(), nodes.end(), [](Node* x, Node* y) { return x->topoIndex < y->topoIndex; });
std::unique_ptr<ListExprAST> list(new ListExprAST);
auto subNodes = spliteNodes(nodes);
for (auto subNode : subNodes) {
if (subNode.size() > 1 || subNode.back()->cmd->op->type() != OpType_Raster) {
auto loop = addElemwiseLoop(subNode);
list->push_back(std::move(loop));
} else {
auto raster = addRaster(subNode);
list->push_back(std::move(raster));
}
}
auto proto = std::make_unique<PrototypeAST>("kernel_" + std::to_string(idx), inputs.size(), outputs.size());
function = std::make_unique<FunctionAST>(std::move(proto), std::move(list));
}
#ifdef MNN_CODEGEN_LLVM
void codegen(LLVMTarget* target) {
function->codegen(target);
}
#endif
#ifdef MNN_CODEGEN_C
std::string codegen(SourceTarget* target) {
return function->codegen(target);
}
#endif
std::vector<Tensor*> getInputs() { return inputs; }
std::vector<Tensor*> getOutputs() { return outputs; }
private:
std::unique_ptr<ExprAST> addElemwiseLoop(std::vector<Node*>& nodes) {
std::map<const Tensor*, int> varShape;
std::unordered_map<Tensor*, std::unique_ptr<ExprAST>> outMap;
for (auto& node : nodes) {
auto cmd = node->cmd;
std::vector<std::unique_ptr<ExprAST>> in(cmd->inputs.size());
for (int i = 0; i < cmd->inputs.size(); i++) {
if (outMap.find(cmd->inputs[i]) == outMap.end()) {
auto inputExpr = getExprByTensor(cmd->inputs[i], true);
int size = cmd->inputs[i]->elementSize();
varShape[cmd->inputs[i]] = size;
if (size > 1) {
in[i] = std::make_unique<SubscriptExprAST>(std::move(inputExpr), "i");
} else {
in[i] = std::make_unique<SubscriptExprAST>(std::move(inputExpr), 0);
}
} else {
in[i] = std::move(outMap[cmd->inputs[i]]);
outMap.erase(cmd->inputs[i]);
}
}
switch (cmd->op->type()) {
case MNN::OpType_BinaryOp:
{
auto type = static_cast<MNN::BinaryOpOperation>(cmd->op->main_as_BinaryOp()->opType());
outMap[cmd->outputs[0]] = std::make_unique<BinaryExprAST>(type, std::move(in[0]), std::move(in[1]));
break;
}
case MNN::OpType_Eltwise:
{
std::map<EltwiseType, MNN::BinaryOpOperation> elemToBinary = {
{EltwiseType_PROD, BinaryOpOperation_MUL},
{EltwiseType_SUM, BinaryOpOperation_ADD},
{EltwiseType_MAXIMUM, BinaryOpOperation_MAXIMUM},
{EltwiseType_SUB, BinaryOpOperation_SUB}
};
auto type = elemToBinary[cmd->op->main_as_Eltwise()->type()];
auto tmp = std::make_unique<BinaryExprAST>(type, std::move(in[0]), std::move(in[1]));
for (int i = 2; i < cmd->inputs.size(); i++) {
tmp = std::make_unique<BinaryExprAST>(type, std::move(tmp), std::move(in[i]));
}
outMap[cmd->outputs[0]] = std::move(tmp);
break;
}
case MNN::OpType_UnaryOp:
{
auto unary = cmd->op->main_as_UnaryOp();
auto type = unary->opType();
outMap[cmd->outputs[0]] = std::make_unique<UnaryExprAST>(type, std::move(in[0]));
break;
}
case MNN::OpType_ReLU6:
{
auto relu6 = cmd->op->main_as_Relu6();
float minv = relu6->minValue();
float maxv = relu6->maxValue();
outMap[cmd->outputs[0]] = std::make_unique<ReluExprAST>(minv, maxv, std::move(in[0]));
break;
}
case MNN::OpType_ReLU:
{
auto relu = cmd->op->main_as_Relu();
float slope = relu->slope();
outMap[cmd->outputs[0]] = std::make_unique<ReluExprAST>(slope, 0, std::move(in[0]));
break;
}
default:
break;
}
}
std::unique_ptr<ExprAST> content;
for (auto& iter : outMap) {
auto outputExpr = getExprByTensor(iter.first, false);
auto output = std::make_unique<SubscriptExprAST>(std::move(outputExpr), "i");
varShape[iter.first] = iter.first->elementSize();
content = std::make_unique<AssignExprAST>(std::move(output), std::move(iter.second));
}
int size = -1;
for (auto& iter : varShape) {
if (iter.second > 1) {
if (size > 1 && iter.second != size) {
MNN_ERROR("size not equal!\n");
exit(0);
} else {
size = iter.second;
}
}
}
auto start = std::make_unique<NumberExprAST>(0);
auto end = std::make_unique<NumberExprAST>(size);
auto step = std::make_unique<NumberExprAST>(1);
auto loop = std::make_unique<ForExprAST>("i", std::move(start), std::move(end), std::move(step), std::move(content));
return loop;
}
std::unique_ptr<ExprAST> addRaster(std::vector<Node*>& nodes) {
auto node = nodes.back();
auto input = node->cmd->inputs[0];
auto output = node->cmd->outputs[0];
auto des = TensorUtils::getDescribe(input);
std::string foots[3] = { "i", "j", "k" };
auto getOffset = [&foots](int strides[], int offset) {
std::unique_ptr<ExprAST> steps[3];
for (int i = 0; i < 3; i++) {
auto stride = std::make_unique<NumberExprAST>(strides[i]);
auto foot = std::make_unique<VariableExprAST>(foots[i]);
steps[i] = std::make_unique<BinaryExprAST>(MNN::BinaryOpOperation_MUL, std::move(foot), std::move(stride));
}
auto res = std::make_unique<BinaryExprAST>(MNN::BinaryOpOperation_ADD, std::move(steps[1]), std::move(steps[2]));
res = std::make_unique<BinaryExprAST>(MNN::BinaryOpOperation_ADD, std::move(steps[0]), std::move(res));
return std::make_unique<BinaryExprAST>(MNN::BinaryOpOperation_ADD, std::move(res), std::make_unique<NumberExprAST>(offset));
};
std::unique_ptr<ListExprAST> list(new ListExprAST);
for (auto& reg : des->regions) {
auto inputExpr = getExprByTensor(reg.origin, true);
auto outputExpr = getExprByTensor(output, false);
auto srcPtr = std::make_unique<SubscriptExprAST>(std::move(inputExpr), getOffset(reg.src.stride, reg.src.offset));
auto dstPtr = std::make_unique<SubscriptExprAST>(std::move(outputExpr), getOffset(reg.dst.stride, reg.dst.offset));
std::unique_ptr<ExprAST> content = std::make_unique<AssignExprAST>(std::move(dstPtr), std::move(srcPtr));
for (int i = 2; i >= 0; i--) {
auto start = std::make_unique<NumberExprAST>(0);
auto end = std::make_unique<NumberExprAST>(reg.size[i]);
auto step = std::make_unique<NumberExprAST>(1);
content = std::make_unique<ForExprAST>(foots[i], std::move(start), std::move(end), std::move(step), std::move(content));
}
list->push_back(std::move(content));
}
return list;
}
std::unique_ptr<ExprAST> getExprByTensor(Tensor* t, bool read) {
if (inputMap.find(t) != inputMap.end()) {
return std::make_unique<SubscriptExprAST>("inputs", inputMap[t]);
}
if (outputMap.find(t) != outputMap.end()) {
return std::make_unique<SubscriptExprAST>("outputs", outputMap[t]);
}
if (read) {
int idx = inputs.size();
inputs.push_back(t);
inputMap[t] = idx;
return std::make_unique<SubscriptExprAST>("inputs", idx);
} else {
int idx = outputs.size();
outputs.push_back(t);
outputMap[t] = idx;
return std::make_unique<SubscriptExprAST>("outputs", idx);
}
}
private:
std::vector<Node*> nodes;
std::vector<Tensor*> inputs;
std::vector<Tensor*> outputs;
std::unordered_map<const Tensor*, int> inputMap;
std::unordered_map<const Tensor*, int> outputMap;
std::unique_ptr<FunctionAST> function;
};
void CPUPluginModule::codegen(LLVMTarget* target) {
for (int i = 0; i < getFunctionNum(); i++) {
functions[i]->codegen(target);
}
}
void CPUPluginModule::codegen() {
std::ofstream headerFile("./kernel.h");
std::ofstream sourceFile("./kernel.c");
if (!headerFile.is_open()) {
return;
}
headerFile << "extern \"C\" {\n";
#ifdef MNN_CODEGEN_LLVM
std::unique_ptr<LLVMTarget> llvm(new LLVMTarget(name));
#endif
#ifdef MNN_CODEGEN_C
sourceFile << "#include \"math.h\"\n";
std::unique_ptr<SourceTarget> source(new CTarget(name));
#endif
for (int i = 0; i < getFunctionNum(); i++) {
headerFile << "void kernel_" + std::to_string(i) + "(float**, float**);\n";
#ifdef MNN_CODEGEN_C
sourceFile << functions[i]->codegen(source.get());
#endif
#ifdef MNN_CODEGEN_LLVM
functions[i]->codegen(llvm.get());
#endif
}
headerFile << "}\n";
headerFile << "void (*kernels[])(float**, float**) = {\n";
for (int i = 0; i < getFunctionNum(); i++) {
headerFile << "&kernel_" + std::to_string(i) + ",\n";
}
headerFile << "};\n";
#ifdef MNN_CODEGEN_LLVM
// write to bc file
std::error_code EC;
llvm::raw_fd_ostream OS("kernel.bc", EC, sys::fs::F_None);
WriteBitcodeToFile(*llvm->getModule(), OS);
OS.flush();
#endif
}
InOutTensors CPUPluginModule::addFunction(std::vector<Node*> nodes) {
std::unique_ptr<CPUPluginFunction> func(new CPUPluginFunction(nodes, getFunctionNum()));
auto res = std::make_pair<std::vector<MNN::Tensor*>, std::vector<MNN::Tensor*>>(func->getInputs(), func->getOutputs());
functions.emplace_back(std::move(func));
return res;
}
CPUPluginModule::CPUPluginModule() {}
CPUPluginModule::CPUPluginModule(std::string name) : name(name) {}
CPUPluginModule::~CPUPluginModule() = default;
CPUPluginModule::CPUPluginModule(CPUPluginModule&& m) = default;
CPUPluginModule& CPUPluginModule::operator=(CPUPluginModule&& m) = default;