MNN/source/backend/cpu/CPUScale.cpp

//
//  CPUScale.cpp
//  MNN
//
//  Created by MNN on 2018/08/07.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#include "CPUScale.hpp"
#include "CPUBackend.hpp"
#include "compute/CommonOptFunction.h"
#include "core/Macro.h"
#include "core/TensorUtils.hpp"
#include "core/Concurrency.h"

namespace MNN {
CPUScale::CPUScale(const Op* op, Backend* bn) : MNN::Execution(bn) {
    auto scale      = op->main_as_Scale();
    int outputCount = scale->scaleData()->size();
    mScaleBias.reset(
                     Tensor::createDevice<float>(
                                           {2, ALIGN_UP4(outputCount)}
                                           ));
    auto res = bn->onAcquireBuffer(mScaleBias.get(), Backend::STATIC);
    if (!res) {
        MNN_ERROR("Error for alloc buffer for CPUScale\n");
        mScaleBias = nullptr;
        mValid = false;
        return;
    }
    ::memset(mScaleBias->host<float>(), 0, mScaleBias->size());
    ::memcpy(mScaleBias->host<float>(), scale->scaleData()->data(), outputCount * sizeof(float));
    if (nullptr != scale->biasData() && nullptr != scale->biasData()->data()) {
        ::memcpy(mScaleBias->host<float>() + ALIGN_UP4(outputCount), scale->biasData()->data(), outputCount * sizeof(float));
    }
}
CPUScale::~CPUScale() {
    if (nullptr != mScaleBias) {
        backend()->onReleaseBuffer(mScaleBias.get(), Backend::STATIC);
    }
}
ErrorCode CPUScale::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
    auto input  = inputs[0];
    auto output = outputs[0];
    auto scalePtr = mScaleBias->host<float>();
    auto biasPtr = mScaleBias->host<float>() + 1 * mScaleBias->length(1);
    //FUNC_PRINT(TensorUtils::getDescribe(input)->dimensionFormat);
    if (TensorUtils::getDescribe(input)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4) {
        auto batch       = input->buffer().dim[0].extent;
        auto depthQuad   = UP_DIV(input->channel(), 4);
        int planeNumber = 1;
        for (int i = 2; i < input->buffer().dimensions; ++i) {
            planeNumber *= input->length(i);
        }
        auto depthStride = planeNumber * 4;
        auto totalDepth = batch * depthQuad;
        int numberThread = ((CPUBackend*)backend())->threadNumber();
        MNN_CONCURRENCY_BEGIN(tId, numberThread) {
            for (int i = tId; i < totalDepth; i+=numberThread) {
                auto depthIndex = i % depthQuad;
                MNNScaleAndAddBias(output->host<float>() + depthStride * i, input->host<float>() + depthStride * i, biasPtr + 4 * depthIndex,
                                   scalePtr + 4 * depthIndex, planeNumber, 1);
            }
        }
        MNN_CONCURRENCY_END();
        return NO_ERROR;
    }
    MNN_ASSERT(TensorUtils::getDescribe(input)->dimensionFormat == MNN_DATA_FORMAT_NHWC);

    auto channel = input->channel();
    auto outside = input->elementSize() / channel;
    MNNScaleAndAddBiasOutside(output->host<float>(), input->host<float>(), biasPtr, scalePtr, outside, channel);

    return NO_ERROR;
}
class CPUScaleCreator : public CPUBackend::Creator {
public:
    virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
                                const MNN::Op* op, Backend* backend) const override {
        return new CPUScale(op, backend);
    }
};

REGISTER_CPU_OP_CREATOR(CPUScaleCreator, OpType_Scale);
} // namespace MNN
beta 0.1.0 2019-04-17 10:49:11 +08:00			`//`
			`// CPUScale.cpp`
			`// MNN`
			`//`
			`// Created by MNN on 2018/08/07.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`

Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580) * Sync code with latest internal version * Update CMake * Fix logging issues * Fix OpenGL Building * Bump CMakeLists version. Update Podspec * Update MetalLib Lookup logic * Fix Windows Build 2020-01-15 13:33:47 +08:00			`#include "CPUScale.hpp"`
			`#include "CPUBackend.hpp"`
			`#include "compute/CommonOptFunction.h"`
Update 2019-12-27 22:16:57 +08:00			`#include "core/Macro.h"`
			`#include "core/TensorUtils.hpp"`
Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580) * Sync code with latest internal version * Update CMake * Fix logging issues * Fix OpenGL Building * Bump CMakeLists version. Update Podspec * Update MetalLib Lookup logic * Fix Windows Build 2020-01-15 13:33:47 +08:00			`#include "core/Concurrency.h"`
beta 0.1.0 2019-04-17 10:49:11 +08:00
			`namespace MNN {`
			`CPUScale::CPUScale(const Op* op, Backend* bn) : MNN::Execution(bn) {`
			`auto scale = op->main_as_Scale();`
			`int outputCount = scale->scaleData()->size();`
Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580) * Sync code with latest internal version * Update CMake * Fix logging issues * Fix OpenGL Building * Bump CMakeLists version. Update Podspec * Update MetalLib Lookup logic * Fix Windows Build 2020-01-15 13:33:47 +08:00			`mScaleBias.reset(`
			`Tensor::createDevice<float>(`
			`{2, ALIGN_UP4(outputCount)}`
			`));`
			`auto res = bn->onAcquireBuffer(mScaleBias.get(), Backend::STATIC);`
			`if (!res) {`
			`MNN_ERROR("Error for alloc buffer for CPUScale\n");`
			`mScaleBias = nullptr;`
			`mValid = false;`
			`return;`
			`}`
			`::memset(mScaleBias->host<float>(), 0, mScaleBias->size());`
			`::memcpy(mScaleBias->host<float>(), scale->scaleData()->data(), outputCount * sizeof(float));`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`if (nullptr != scale->biasData() && nullptr != scale->biasData()->data()) {`
Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580) * Sync code with latest internal version * Update CMake * Fix logging issues * Fix OpenGL Building * Bump CMakeLists version. Update Podspec * Update MetalLib Lookup logic * Fix Windows Build 2020-01-15 13:33:47 +08:00			`::memcpy(mScaleBias->host<float>() + ALIGN_UP4(outputCount), scale->biasData()->data(), outputCount * sizeof(float));`
			`}`
			`}`
			`CPUScale::~CPUScale() {`
			`if (nullptr != mScaleBias) {`
			`backend()->onReleaseBuffer(mScaleBias.get(), Backend::STATIC);`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`
			`}`
			`ErrorCode CPUScale::onExecute(const std::vector<Tensor>& inputs, const std::vector<Tensor>& outputs) {`
			`auto input = inputs[0];`
			`auto output = outputs[0];`
Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580) * Sync code with latest internal version * Update CMake * Fix logging issues * Fix OpenGL Building * Bump CMakeLists version. Update Podspec * Update MetalLib Lookup logic * Fix Windows Build 2020-01-15 13:33:47 +08:00			`auto scalePtr = mScaleBias->host<float>();`
			`auto biasPtr = mScaleBias->host<float>() + 1 * mScaleBias->length(1);`
Update 2020-02-26 09:57:17 +08:00			`//FUNC_PRINT(TensorUtils::getDescribe(input)->dimensionFormat);`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`if (TensorUtils::getDescribe(input)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4) {`
			`auto batch = input->buffer().dim[0].extent;`
			`auto depthQuad = UP_DIV(input->channel(), 4);`
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`int planeNumber = 1;`
			`for (int i = 2; i < input->buffer().dimensions; ++i) {`
			`planeNumber *= input->length(i);`
			`}`
Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580) * Sync code with latest internal version * Update CMake * Fix logging issues * Fix OpenGL Building * Bump CMakeLists version. Update Podspec * Update MetalLib Lookup logic * Fix Windows Build 2020-01-15 13:33:47 +08:00			`auto depthStride = planeNumber * 4;`
			`auto totalDepth = batch * depthQuad;`
			`int numberThread = ((CPUBackend*)backend())->threadNumber();`
			`MNN_CONCURRENCY_BEGIN(tId, numberThread) {`
			`for (int i = tId; i < totalDepth; i+=numberThread) {`
Update 2020-02-26 09:57:17 +08:00			`auto depthIndex = i % depthQuad;`
			`MNNScaleAndAddBias(output->host<float>() + depthStride * i, input->host<float>() + depthStride * i, biasPtr + 4 * depthIndex,`
			`scalePtr + 4 * depthIndex, planeNumber, 1);`
Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580) * Sync code with latest internal version * Update CMake * Fix logging issues * Fix OpenGL Building * Bump CMakeLists version. Update Podspec * Update MetalLib Lookup logic * Fix Windows Build 2020-01-15 13:33:47 +08:00			`}`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`
Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580) * Sync code with latest internal version * Update CMake * Fix logging issues * Fix OpenGL Building * Bump CMakeLists version. Update Podspec * Update MetalLib Lookup logic * Fix Windows Build 2020-01-15 13:33:47 +08:00			`MNN_CONCURRENCY_END();`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`return NO_ERROR;`
			`}`
			`MNN_ASSERT(TensorUtils::getDescribe(input)->dimensionFormat == MNN_DATA_FORMAT_NHWC);`

beta 0.1.1 - update resources and docs - unite tensor's width/height/channel/batch getter - optimize several ops - fix compile warnings and errors on Ubantu - some other bug fixes 2019-05-05 20:27:57 +08:00			`auto channel = input->channel();`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`auto outside = input->elementSize() / channel;`
Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580) * Sync code with latest internal version * Update CMake * Fix logging issues * Fix OpenGL Building * Bump CMakeLists version. Update Podspec * Update MetalLib Lookup logic * Fix Windows Build 2020-01-15 13:33:47 +08:00			`MNNScaleAndAddBiasOutside(output->host<float>(), input->host<float>(), biasPtr, scalePtr, outside, channel);`
beta 0.1.0 2019-04-17 10:49:11 +08:00
			`return NO_ERROR;`
			`}`
			`class CPUScaleCreator : public CPUBackend::Creator {`
			`public:`
			`virtual Execution* onCreate(const std::vector<Tensor>& inputs, const std::vector<Tensor>& outputs,`
			`const MNN::Op* op, Backend* backend) const override {`
			`return new CPUScale(op, backend);`
			`}`
			`};`

			`REGISTER_CPU_OP_CREATOR(CPUScaleCreator, OpType_Scale);`
			`} // namespace MNN`