MNN/source/backend/cpu/CPUPack.cpp

//
//  CPUPack.cpp
//  MNN
//
//  Created by MNN on 2018/08/14.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#include "backend/cpu/CPUPack.hpp"
#include "backend/cpu/CPUBackend.hpp"

namespace MNN {

CPUPack::CPUPack(Backend *backend, int axis)
    : Execution(backend), mAxis(axis) {
    // nothing to do
}

template <typename T>
ErrorCode CPUPack::MNNPackLayerForward(const std::vector<MNN::Tensor *> &inputs,
                                       const std::vector<MNN::Tensor *> &outputs) {
    auto output                = outputs[0];
    const int outputDimensions = output->buffer().dimensions;
    auto mN                    = inputs.size();

    if (mAxis == 0) {
        auto dstPtr = outputs[0]->buffer().host;
        for (int i = 0; i < mN; i++) {
            auto inputX    = inputs[i];
            auto sourcePtr = inputX->buffer().host;
            memcpy(dstPtr, sourcePtr, inputX->size());
            dstPtr += inputX->size();
        }
    } else {
        int outputDataCount = 1;
        for (int i = 0; i < outputDimensions; i++) {
            outputDataCount *= output->buffer().dim[i].extent;
        }

        int r;
        for (int offset = 0, cordOnAxis = 0; offset < outputDataCount; offset++) {
            r               = offset;
            int inputOffset = 0;
            for (int i = 0, j = 0, cord; i < outputDimensions; i++) {
                cord          = r / output->buffer().dim[i].stride;
                r             = r % output->buffer().dim[i].stride;

                if (i != mAxis) {
                    inputOffset += (cord * inputs[0]->buffer().dim[j++].stride);
                } else {
                    cordOnAxis = cord;
                }
            }

            ((T *)output->buffer().host)[offset] = ((T *)inputs[cordOnAxis]->buffer().host)[inputOffset];
        }
    }

    return NO_ERROR;
}

ErrorCode CPUPack::onExecute(const std::vector<MNN::Tensor *> &inputs, const std::vector<MNN::Tensor *> &outputs) {
    auto input  = inputs[0];
    auto output = outputs[0];

    if (inputs.size() == 1) {
        ::memcpy(output->buffer().host, input->buffer().host, input->size());
        return NO_ERROR;
    }

    return MNNPackLayerForward<int32_t>(inputs, outputs);
}

class CPUPackCreator : public CPUBackend::Creator {
public:
    virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
                                const MNN::Op *op, Backend *backend) const {
        auto pack = op->main_as_PackParam();
        return new CPUPack(backend, pack->axis());
    }
};
REGISTER_CPU_OP_CREATOR(CPUPackCreator, OpType_Pack);
} // namespace MNN
beta 0.1.0 2019-04-17 10:49:11 +08:00			`//`
			`// CPUPack.cpp`
			`// MNN`
			`//`
			`// Created by MNN on 2018/08/14.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`

Update 2019-12-27 22:16:57 +08:00			`#include "backend/cpu/CPUPack.hpp"`
			`#include "backend/cpu/CPUBackend.hpp"`
beta 0.1.0 2019-04-17 10:49:11 +08:00
			`namespace MNN {`

- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr; 2019-10-29 13:37:26 +08:00			`CPUPack::CPUPack(Backend *backend, int axis)`
			`: Execution(backend), mAxis(axis) {`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`// nothing to do`
			`}`

			`template <typename T>`
			`ErrorCode CPUPack::MNNPackLayerForward(const std::vector<MNN::Tensor *> &inputs,`
			`const std::vector<MNN::Tensor *> &outputs) {`
			`auto output = outputs[0];`
			`const int outputDimensions = output->buffer().dimensions;`
			`auto mN = inputs.size();`

			`if (mAxis == 0) {`
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr; 2019-10-29 13:37:26 +08:00			`auto dstPtr = outputs[0]->buffer().host;`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`for (int i = 0; i < mN; i++) {`
			`auto inputX = inputs[i];`
			`auto sourcePtr = inputX->buffer().host;`
			`memcpy(dstPtr, sourcePtr, inputX->size());`
			`dstPtr += inputX->size();`
			`}`
			`} else {`
			`int outputDataCount = 1;`
			`for (int i = 0; i < outputDimensions; i++) {`
			`outputDataCount *= output->buffer().dim[i].extent;`
			`}`

			`int r;`
beta 0.1.1.6 - add support for windows - fix bugs in converting dropout - fix bugs in post treat 2019-06-10 21:08:55 +08:00			`for (int offset = 0, cordOnAxis = 0; offset < outputDataCount; offset++) {`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`r = offset;`
			`int inputOffset = 0;`
beta 0.1.1.6 - add support for windows - fix bugs in converting dropout - fix bugs in post treat 2019-06-10 21:08:55 +08:00			`for (int i = 0, j = 0, cord; i < outputDimensions; i++) {`
			`cord = r / output->buffer().dim[i].stride;`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`r = r % output->buffer().dim[i].stride;`

			`if (i != mAxis) {`
beta 0.1.1.6 - add support for windows - fix bugs in converting dropout - fix bugs in post treat 2019-06-10 21:08:55 +08:00			`inputOffset += (cord * inputs[0]->buffer().dim[j++].stride);`
			`} else {`
			`cordOnAxis = cord;`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`
			`}`

beta 0.1.1.6 - add support for windows - fix bugs in converting dropout - fix bugs in post treat 2019-06-10 21:08:55 +08:00			`((T )output->buffer().host)[offset] = ((T )inputs[cordOnAxis]->buffer().host)[inputOffset];`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`
			`}`

			`return NO_ERROR;`
			`}`

			`ErrorCode CPUPack::onExecute(const std::vector<MNN::Tensor > &inputs, const std::vector<MNN::Tensor > &outputs) {`
			`auto input = inputs[0];`
			`auto output = outputs[0];`

			`if (inputs.size() == 1) {`
			`::memcpy(output->buffer().host, input->buffer().host, input->size());`
			`return NO_ERROR;`
			`}`

- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr; 2019-10-29 13:37:26 +08:00			`return MNNPackLayerForward<int32_t>(inputs, outputs);`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`

			`class CPUPackCreator : public CPUBackend::Creator {`
			`public:`
			`virtual Execution onCreate(const std::vector<Tensor > &inputs, const std::vector<Tensor *> &outputs,`
			`const MNN::Op op, Backend backend) const {`
			`auto pack = op->main_as_PackParam();`
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr; 2019-10-29 13:37:26 +08:00			`return new CPUPack(backend, pack->axis());`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`
			`};`
			`REGISTER_CPU_OP_CREATOR(CPUPackCreator, OpType_Pack);`
			`} // namespace MNN`