mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			286 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			286 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  CPUPadding.cpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2019/6/24.
 | |
| //  Copyright © 2018 Alibaba. All rights reserved.
 | |
| //
 | |
| 
 | |
| #include "backend/cpu/CPUPadding.hpp"
 | |
| #include "core/Macro.h"
 | |
| #include "core/TensorUtils.hpp"
 | |
| #include <string.h>
 | |
| #include "backend/cpu/CPUTensorConvert.hpp"
 | |
| namespace MNN {
 | |
| 
 | |
| ErrorCode memsetHelper(const Tensor *padValueTensor, Tensor *output) {
 | |
|     auto dtype     = output->getType();
 | |
|     const int size = output->elementSize();
 | |
|     if (dtype == halide_type_of<float>()) {
 | |
|         const auto padValue = padValueTensor->host<float>()[0];
 | |
|         auto ptr            = output->host<float>();
 | |
|         std::fill(ptr, ptr + size, padValue);
 | |
|     } else if (dtype == halide_type_of<int>()) {
 | |
|         const auto padValue = padValueTensor->host<int>()[0];
 | |
|         auto ptr            = output->host<int>();
 | |
|         std::fill(ptr, ptr + size, padValue);
 | |
|     } else {
 | |
|         MNN_ERROR("TODO, support other data type: %d\n", dtype.code);
 | |
|         return NOT_SUPPORT;
 | |
|     }
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| //  refer to tflite mirrorPad
 | |
| struct CacheElement {
 | |
|     int start;
 | |
|     int end;
 | |
| };
 | |
| int MirrorPadImpl(const Tensor *data, CacheElement *cache, Tensor *paddedData, const int *pad, int currentDim,
 | |
|                   int flatIndex, int outputIndex, int offset) {
 | |
|     const int bytes = data->getType().bytes();
 | |
|     if (currentDim == paddedData->dimensions()) {
 | |
|         if (outputIndex >= paddedData->elementSize()) {
 | |
|             return outputIndex;
 | |
|         }
 | |
|         memcpy(paddedData->host<char>() + outputIndex * bytes, data->host<char>() + flatIndex * bytes, bytes);
 | |
|         return outputIndex + 1;
 | |
|     }
 | |
|     const int cacheIndex = currentDim * data->elementSize() + flatIndex;
 | |
|     auto &cacheEntry     = cache[cacheIndex];
 | |
|     if (cacheEntry.start != -1) {
 | |
|         const int size = cacheEntry.end - cacheEntry.start;
 | |
|         memcpy(paddedData->host<char>() + outputIndex * bytes, paddedData->host<char>() + cacheEntry.start * bytes,
 | |
|                size * bytes);
 | |
|         return outputIndex + size;
 | |
|     }
 | |
| 
 | |
|     cacheEntry.start     = outputIndex;
 | |
|     int leftPad          = pad[2 * currentDim];
 | |
|     int rightPad         = pad[2 * currentDim + 1];
 | |
|     const int multiplier = data->stride(currentDim);
 | |
| 
 | |
|     for (int i = leftPad + offset - 1; i >= offset && leftPad > 0; --i, --leftPad) {
 | |
|         outputIndex = MirrorPadImpl(data, cache, paddedData, pad, currentDim + 1, flatIndex + i * multiplier,
 | |
|                                     outputIndex, offset);
 | |
|     }
 | |
|     const int curDimLength = data->length(currentDim);
 | |
|     for (int i = 0; i < curDimLength; ++i) {
 | |
|         outputIndex = MirrorPadImpl(data, cache, paddedData, pad, currentDim + 1, flatIndex + i * multiplier,
 | |
|                                     outputIndex, offset);
 | |
|     }
 | |
|     for (int i = curDimLength - (1 + offset); i >= 0 && rightPad > 0; --i, --rightPad) {
 | |
|         outputIndex = MirrorPadImpl(data, cache, paddedData, pad, currentDim + 1, flatIndex + i * multiplier,
 | |
|                                     outputIndex, offset);
 | |
|     }
 | |
| 
 | |
|     cacheEntry.end = outputIndex;
 | |
| 
 | |
|     return outputIndex;
 | |
| }
 | |
| 
 | |
| static ErrorCode resizeImpl(Backend *bn, const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
 | |
|                             Tensor *cache) {
 | |
|     const int size = inputs[0]->elementSize() * inputs[0]->dimensions() * 2;
 | |
|     cache->setType(DataType_DT_INT32);
 | |
|     cache->buffer().dimensions = 1;
 | |
|     cache->setLength(0, size);
 | |
|     bool success = bn->onAcquireBuffer(cache, Backend::DYNAMIC);
 | |
|     if (!success) {
 | |
|         return OUT_OF_MEMORY;
 | |
|     }
 | |
|     bn->onReleaseBuffer(cache, Backend::DYNAMIC);
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| ErrorCode CPUPadding::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
|     if (mMode != PadValueMode_CONSTANT) {
 | |
|         return resizeImpl(backend(), inputs, outputs, &mCache);
 | |
|     }
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| void CPUPadding::execute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs, PadValueMode mode) {
 | |
|     auto input   = inputs[0];
 | |
|     auto output  = outputs[0];
 | |
|     auto padding = inputs[1]->host<int32_t>();
 | |
|     if (inputs.size() == 3) {
 | |
|         memsetHelper(inputs[2], output);
 | |
|     } else {
 | |
|         ::memset(output->host<char>(), 0, output->size());
 | |
|     }
 | |
|     auto outputData = output->host<char>();
 | |
|     auto inputData  = input->host<char>();
 | |
| #define MAX_DIM 6
 | |
|     MNN_ASSERT(output->dimensions() <= MAX_DIM);
 | |
|     int dims[MAX_DIM];
 | |
|     int oStride[MAX_DIM];
 | |
|     int iStride[MAX_DIM];
 | |
|     int pad[MAX_DIM];
 | |
|     auto bytes = input->getType().bytes();
 | |
|     for (int i = 0; i < MAX_DIM; ++i) {
 | |
|         pad[i]     = 0;
 | |
|         dims[i]    = 1;
 | |
|         oStride[i] = 0;
 | |
|         iStride[i] = 0;
 | |
|     }
 | |
|     int offset = MAX_DIM - input->dimensions();
 | |
|     for (int i = 0; i < input->dimensions(); ++i) {
 | |
|         pad[offset + i]     = padding[2 * i];
 | |
|         dims[offset + i]    = input->length(i);
 | |
|         oStride[offset + i] = output->stride(i) * bytes;
 | |
|         iStride[offset + i] = input->stride(i) * bytes;
 | |
|     }
 | |
|     for (int w = 0; w < dims[0]; ++w) {
 | |
|         auto ow = outputData + (w + pad[0]) * oStride[0];
 | |
|         auto sw = inputData + w * iStride[0];
 | |
| #define PTR(x, y, i)                              \
 | |
|     auto o##x = o##y + (x + pad[i]) * oStride[i]; \
 | |
|     auto s##x = s##y + x * iStride[i];
 | |
| 
 | |
|         for (int v = 0; v < dims[1]; ++v) {
 | |
|             PTR(v, w, 1);
 | |
|             for (int u = 0; u < dims[2]; ++u) {
 | |
|                 PTR(u, v, 2);
 | |
|                 for (int z = 0; z < dims[3]; ++z) {
 | |
|                     PTR(z, u, 3);
 | |
|                     for (int y = 0; y < dims[4]; ++y) {
 | |
|                         PTR(y, z, 4);
 | |
|                         ::memcpy(oy + pad[5] * oStride[5], sy, iStride[4]);
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| #undef MAX_DIM
 | |
| #undef PTR
 | |
| }
 | |
| 
 | |
| ErrorCode CPUPadding::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
|     if (mMode == PadValueMode_CONSTANT) {
 | |
|         execute(inputs, outputs, mMode);
 | |
|     } else {
 | |
|         // REFLECT or SYMMETRIC
 | |
|         int offset     = mMode == PadValueMode_SYMMETRIC ? 0 : 1;
 | |
|         auto cacheData = reinterpret_cast<CacheElement *>(mCache.host<char>());
 | |
|         std::fill(cacheData, cacheData + mCache.elementSize() / 2, CacheElement{-1, -1});
 | |
|         const int *pad  = inputs[1]->host<int32_t>();
 | |
|         int outputIndex = 0;
 | |
|         MirrorPadImpl(inputs[0], cacheData, outputs[0], pad, 0, 0, outputIndex, offset);
 | |
|     }
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| ErrorCode CPUPaddingPacked::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
|     auto padding    = inputs[1];
 | |
|     auto paddingPtr = padding->host<int32_t>();
 | |
|     if (paddingPtr[2] != 0 || paddingPtr[3] != 0 || mMode != PadValueMode_CONSTANT) {
 | |
|         mNeedConvert = true;
 | |
|     }
 | |
|     if (!mNeedConvert) {
 | |
|         return NO_ERROR;
 | |
|     }
 | |
|     mTempOutput.reset(Tensor::createDevice<float>(outputs[0]->shape(), Tensor::CAFFE));
 | |
|     mTempInput.reset(Tensor::createDevice<float>(inputs[0]->shape(), Tensor::CAFFE));
 | |
|     bool res = backend()->onAcquireBuffer(mTempOutput.get(), Backend::DYNAMIC);
 | |
|     res      = res && backend()->onAcquireBuffer(mTempInput.get(), Backend::DYNAMIC);
 | |
|     if (!res) {
 | |
|         return OUT_OF_MEMORY;
 | |
|     }
 | |
|     mTempInputs  = {mTempInput.get(), inputs[1]};
 | |
|     mTempOutputs = {mTempOutput.get()};
 | |
| 
 | |
|     if (mMode != PadValueMode_CONSTANT) {
 | |
|         resizeImpl(backend(), inputs, outputs, &mCache);
 | |
|     }
 | |
| 
 | |
|     backend()->onReleaseBuffer(mTempOutput.get(), Backend::DYNAMIC);
 | |
|     backend()->onReleaseBuffer(mTempInput.get(), Backend::DYNAMIC);
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| ErrorCode CPUPaddingPacked::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
|     auto input  = inputs[0];
 | |
|     auto output = outputs[0];
 | |
|     if (mNeedConvert) {
 | |
|         CPUTensorConverter::convert(input, mTempInput.get());
 | |
|         // CPUPadding::execute(mTempInputs, mTempOutputs, mMode);
 | |
| 
 | |
|         if (mMode == PadValueMode_CONSTANT) {
 | |
|             CPUPadding::execute(mTempInputs, mTempOutputs, mMode);
 | |
|         } else {
 | |
|             // REFLECT or SYMMETRIC
 | |
|             int offset     = mMode == PadValueMode_SYMMETRIC ? 0 : 1;
 | |
|             auto cacheData = reinterpret_cast<CacheElement *>(mCache.host<char>());
 | |
|             std::fill(cacheData, cacheData + mCache.elementSize(), CacheElement{-1, -1});
 | |
|             const int *pad  = inputs[1]->host<int32_t>();
 | |
|             int outputIndex = 0;
 | |
|             MirrorPadImpl(mTempInput.get(), cacheData, mTempOutput.get(), pad, 0, 0, outputIndex, offset);
 | |
|         }
 | |
| 
 | |
|         CPUTensorConverter::convert(mTempOutput.get(), output);
 | |
|         return NO_ERROR;
 | |
|     }
 | |
|     auto iw = input->width();
 | |
|     auto ih = input->height();
 | |
|     auto ic = input->channel();
 | |
|     auto ib = input->batch();
 | |
| 
 | |
|     auto ow      = output->width();
 | |
|     auto oh      = output->height();
 | |
|     auto icC4    = UP_DIV(ic, 4);
 | |
|     auto padding = inputs[1]->host<int32_t>();
 | |
|     if (inputs.size() == 3) {
 | |
|         auto code = memsetHelper(inputs[2], output);
 | |
|         if (code != NO_ERROR) {
 | |
|             return code;
 | |
|         }
 | |
|     } else {
 | |
|         ::memset(output->host<float>(), 0, output->size());
 | |
|     }
 | |
|     for (int n = 0; n < ib; ++n) {
 | |
|         auto inputN  = input->host<float>() + input->stride(0) * n;
 | |
|         auto outputN = output->host<float>() + output->stride(0) * (padding[2 * 0] + n);
 | |
|         for (int c = 0; c < icC4; ++c) {
 | |
|             auto inputC  = inputN + c * iw * ih * 4;
 | |
|             auto outputC = outputN + c * ow * oh * 4;
 | |
| 
 | |
|             for (int h = 0; h < ih; ++h) {
 | |
|                 auto inputH  = inputC + h * iw * 4;
 | |
|                 auto outputH = outputC + (h + padding[2 * 2]) * ow * 4;
 | |
| 
 | |
|                 ::memcpy(outputH + padding[2 * 3] * 4, inputH, iw * 4 * sizeof(float));
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| class CPUPaddingCreator : public CPUBackend::Creator {
 | |
| public:
 | |
|     virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
 | |
|                                 const MNN::Op *op, Backend *backend) const {
 | |
|         auto param = op->main_as_PadParam();
 | |
|         auto mode  = PadValueMode_CONSTANT;
 | |
|         if (param) {
 | |
|             mode = param->mode();
 | |
|         }
 | |
|         if (TensorUtils::getDescribe(inputs[0])->dimensionFormat != MNN_DATA_FORMAT_NC4HW4) {
 | |
|             return new CPUPadding(backend, mode);
 | |
|         }
 | |
|         if (inputs[0]->dimensions() != 4) {
 | |
|             MNN_ERROR("Currently padding only support 4 dimension for NC4HW4\n");
 | |
|             return nullptr;
 | |
|         }
 | |
|         if (inputs[0]->buffer().type.bits != 32) {
 | |
|             MNN_ERROR("Currently padding NC4HW4 only support 32 bit padding\n");
 | |
|             return nullptr;
 | |
|         }
 | |
|         return new CPUPaddingPacked(backend, mode);
 | |
|     }
 | |
| };
 | |
| 
 | |
| REGISTER_CPU_OP_CREATOR(CPUPaddingCreator, OpType_Padding);
 | |
| }; // namespace MNN
 |