mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			250 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			250 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  GeometryConvUtils.cpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2020/07/15.
 | |
| //  Copyright © 2018, Alibaba Group Holding Limited
 | |
| //
 | |
| 
 | |
| #include "GeometryConvUtils.hpp"
 | |
| #include "ConvertUtils.hpp"
 | |
| 
 | |
| #define ADD_PAD_VALUE(POS, OFFSET, NUM, STRIDE)               \
 | |
|     if (POS##Pad > 0) {                                       \
 | |
|         Tensor::InsideDescribe::Region region;                \
 | |
|         region.origin        = padVal;                        \
 | |
|         region.size[0]       = ic;                            \
 | |
|         region.size[1]       = NUM;                           \
 | |
|         region.size[2]       = POS##Pad;                      \
 | |
|         region.src.offset    = 0;                             \
 | |
|         region.dst.offset    = dstOffsetKx + (OFFSET);        \
 | |
|         region.src.stride[0] = 0;                             \
 | |
|         region.src.stride[1] = 0;                             \
 | |
|         region.src.stride[2] = 0;                             \
 | |
|         region.dst.stride[0] = dstStrideChannel;              \
 | |
|         region.dst.stride[1] = STRIDE;                        \
 | |
|         region.dst.stride[2] = 1;                             \
 | |
|         des->regions.emplace_back(std::move(region));         \
 | |
|     }
 | |
| namespace MNN {
 | |
| flatbuffers::Offset<Op> GeometryConvUtils::makeRelu6(flatbuffers::FlatBufferBuilder& builder, float minValue, float maxValue) {
 | |
|     Relu6Builder relu6B(builder);
 | |
|     relu6B.add_maxValue(maxValue);
 | |
|     relu6B.add_minValue(minValue);
 | |
|     auto paOffset = relu6B.Finish().Union();
 | |
|     OpBuilder opB(builder);
 | |
|     opB.add_type(OpType_ReLU6);
 | |
|     opB.add_main_type(OpParameter_Relu6);
 | |
|     opB.add_main(paOffset);
 | |
|     return opB.Finish();
 | |
| }
 | |
| void GeometryConvUtils::im2Col3d(Tensor* im2Col, Tensor* input, int ic, int kd, int kh, int kw, int batch, int od, int oh, int ow,
 | |
|     int id, int ih, int iw, int sd, int sh, int sw, int dd, int dh, int dw, int pd, int ph, int pw, int srcKernelOffset) {
 | |
|     im2Col->buffer().type       = halide_type_of<float>();
 | |
|     im2Col->buffer().dimensions = 2;
 | |
|     im2Col->setLength(0, ic * kd * kh * kw);
 | |
|     im2Col->setLength(1, batch * od * oh * ow);
 | |
|     TensorUtils::setLinearLayout(im2Col);
 | |
|     auto des             = TensorUtils::getDescribe(im2Col);
 | |
|     des->memoryType      = Tensor::InsideDescribe::MEMORY_VIRTUAL;
 | |
|     des->dimensionFormat = MNN_DATA_FORMAT_NCHW;
 | |
|     des->regions.clear();
 | |
|     des->regions.reserve(batch * ic * kd * kh * kw);
 | |
|     for (int c = 0; c < ic; ++c) {
 | |
|         for (int n = 0; n < batch; ++n) {
 | |
|             auto dstOffset = (c * kd * kh * kw * batch + n) * od * oh * ow;
 | |
|             // auto dstOffset = (n * ic + c) * od * oh * ow * kd * kh * kw;
 | |
|             auto srcOffset = (n * ic + c) * id * ih * iw;
 | |
|             for (int kz = 0; kz < kd; ++kz) {
 | |
|                 auto startSz = kz * dd - pd;
 | |
|                 int startDz = 0;
 | |
|                 if (startSz < 0) {
 | |
|                     startDz = ((-startSz) + sd - 1) / sd;
 | |
|                     startSz = startSz + startDz * sd;
 | |
|                 }
 | |
|                 auto endDz = od - 1;
 | |
|                 auto endSz = endDz * sd + kz * dd - pd;
 | |
|                 if (endSz >= id) {
 | |
|                     endDz = endDz - (endSz - id + sd) / sd;
 | |
|                     endSz = endDz * sd + kz * dd - pd;
 | |
|                 }
 | |
|                 if (startDz > endDz || endDz < 0 || startSz >= id) {
 | |
|                     continue;
 | |
|                 }
 | |
|                 auto dstOffsetKz = dstOffset + kz * kw * kh * ow * oh * od * batch + startDz * oh *  ow;
 | |
|                 auto srcOffsetKz = srcOffset + startSz * ih * iw;
 | |
|                 for (int ky = 0; ky < kh; ++ky) {
 | |
|                     auto startSy = ky * dh - ph;
 | |
|                     int startDy  = 0;
 | |
|                     if (startSy < 0) {
 | |
|                         startDy = ((-startSy) + sh - 1) / sh;
 | |
|                         startSy = startSy + startDy * sh;
 | |
|                     }
 | |
|                     auto endDy = oh - 1;
 | |
|                     auto endSy = endDy * sh + ky * dh - ph;
 | |
|                     if (endSy >= ih) {
 | |
|                         endDy = endDy - (endSy - ih + sh) / sh;
 | |
|                         endSy = endDy * sh + ky * dh - ph;
 | |
|                     }
 | |
|                     if (startDy > endDy || endDy < 0 || startSy >= ih) {
 | |
|                         continue;
 | |
|                     }
 | |
|                     auto dstOffsetKy = dstOffsetKz + ky * kw * ow * oh * od * batch + startDy * ow;
 | |
|                     auto srcOffsetKy = srcOffsetKz + startSy * iw;
 | |
|                     for (int kx = 0; kx < kw; ++kx) {
 | |
|                         auto startSx = kx * dw - pw;
 | |
|                         int startDx  = 0;
 | |
|                         if (startSx < 0) {
 | |
|                             startDx = ((-startSx) + sw - 1) / sw;
 | |
|                             startSx = startSx + startDx * sw;
 | |
|                         }
 | |
|                         auto endDx = ow - 1;
 | |
|                         auto endSx = endDx * sw + kx * dw - pw;
 | |
|                         if (endSx >= iw) {
 | |
|                             endDx = endDx - (endSx - iw + sw) / sw;
 | |
|                             endSx = endDx * sw + kx * dw - pw;
 | |
|                         }
 | |
|                         if (startDx > endDx || endDx < 0 || startSx >= iw) {
 | |
|                             continue;
 | |
|                         }
 | |
|                         auto dstOffsetKx = dstOffsetKy + kx * od * oh * ow * batch + startDx;
 | |
|                         auto srcOffsetKx = srcOffsetKy + startSx + srcKernelOffset * (kx + ky * kw);
 | |
|                         Tensor::InsideDescribe::Region region;
 | |
|                         region.origin        = input;
 | |
|                         region.size[0]       = endDz - startDz + 1;
 | |
|                         region.size[1]       = endDy - startDy + 1;
 | |
|                         region.size[2]       = endDx - startDx + 1;
 | |
|                         region.src.offset    = srcOffsetKx;
 | |
|                         region.dst.offset    = dstOffsetKx;
 | |
|                         region.src.stride[0] = sd * ih * iw;
 | |
|                         region.dst.stride[0] = oh * ow;
 | |
|                         region.src.stride[1] = sh * iw;
 | |
|                         region.dst.stride[1] = ow;
 | |
|                         region.src.stride[2] = sw;
 | |
|                         region.dst.stride[2] = 1;
 | |
|                         des->regions.emplace_back(std::move(region));
 | |
|                     }
 | |
|                 }
 | |
|                 // MNN_ASSERT(des->regions.size() > 0);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| void GeometryConvUtils::im2Col(Tensor* im2Col, Tensor* input, int ic, int kh, int kw, int batch, int oh, int ow, int ih,
 | |
|                                int iw, int sh, int sw, int dh, int dw, std::pair<int, int> pads, int srcKernelOffset, Tensor* padVal) {
 | |
|     im2Col->buffer().type       = halide_type_of<float>();
 | |
|     im2Col->buffer().dimensions = 2;
 | |
|     im2Col->setLength(0, ic * kw * kh);
 | |
|     im2Col->setLength(1, batch * ow * oh);
 | |
|     TensorUtils::setLinearLayout(im2Col);
 | |
|     auto des             = TensorUtils::getDescribe(im2Col);
 | |
|     des->memoryType      = Tensor::InsideDescribe::MEMORY_VIRTUAL;
 | |
|     des->dimensionFormat = MNN_DATA_FORMAT_NCHW;
 | |
|     des->regions.clear();
 | |
|     if (padVal == nullptr) {
 | |
|         des->regions.reserve(batch * kw * kh);
 | |
|     }
 | |
|     int dstStrideChannel = batch * oh * ow * kh * kw;
 | |
|     int srcStrideChannel = iw * ih;
 | |
|     for (int n = 0; n < batch; ++n) {
 | |
|         auto dstOffset = ow * oh * n;
 | |
|         auto srcOffset = n * ic * iw * ih;
 | |
|         for (int ky = 0; ky < kh; ++ky) {
 | |
|             auto startSy = ky * dh - pads.second;
 | |
|             int startDy  = 0;
 | |
|             int upPad = 0, belowPad = 0;
 | |
|             if (startSy < 0) {
 | |
|                 startDy = ((-startSy) + sh - 1) / sh;
 | |
|                 startSy = startSy + startDy * sh;
 | |
|                 upPad = startDy * ow;
 | |
|             }
 | |
|             auto endDy = oh - 1;
 | |
|             auto endSy = endDy * sh + ky * dh - pads.second;
 | |
|             if (endSy >= ih) {
 | |
|                 endDy = endDy - (endSy - ih + sh) / sh;
 | |
|                 endSy = endDy * sh + ky * dh - pads.second;
 | |
|                 belowPad = (oh - endDy - 1) * ow;
 | |
|             }
 | |
|             if (startDy > endDy || endDy < 0 || startSy >= ih) {
 | |
|                 continue;
 | |
|             }
 | |
|             auto dstOffsetKy = dstOffset + ky * kw * ow * oh * batch + startDy * ow;
 | |
|             auto srcOffsetKy = srcOffset + startSy * iw;
 | |
|             for (int kx = 0; kx < kw; ++kx) {
 | |
|                 auto startSx = kx * dw - pads.first;
 | |
|                 int startDx  = 0;
 | |
|                 int leftPad = 0, rightPad = 0;
 | |
|                 if (startSx < 0) {
 | |
|                     startDx = ((-startSx) + sw - 1) / sw;
 | |
|                     startSx = startSx + startDx * sw;
 | |
|                     leftPad = startDx;
 | |
|                 }
 | |
|                 auto endDx = ow - 1;
 | |
|                 auto endSx = endDx * sw + kx * dw - pads.first;
 | |
|                 if (endSx >= iw) {
 | |
|                     endDx = endDx - (endSx - iw + sw) / sw;
 | |
|                     endSx = endDx * sw + kx * dw - pads.first;
 | |
|                     rightPad = ow - endDx - 1;
 | |
|                 }
 | |
|                 if (startDx > endDx || endDx < 0 || startSx >= iw) {
 | |
|                     continue;
 | |
|                 }
 | |
|                 auto dstOffsetKx = dstOffsetKy + kx * ow * oh * batch + startDx;
 | |
|                 auto srcOffsetKx = srcOffsetKy + startSx + srcKernelOffset * (kx + ky * kw);
 | |
|                 const int ohExcludePad = endDy - startDy + 1;
 | |
|                 const int owExcludePad = endDx - startDx + 1;
 | |
|                 // if given padVal, pad value will use padVa otherwise use zero
 | |
|                 if (padVal) {
 | |
|                     ADD_PAD_VALUE(up, -(startDx+upPad), 1, 0);
 | |
|                     ADD_PAD_VALUE(below, ohExcludePad * ow - startDx, 1, 0);
 | |
|                     ADD_PAD_VALUE(left, -leftPad, ohExcludePad, ow);
 | |
|                     ADD_PAD_VALUE(right, owExcludePad, ohExcludePad, ow);
 | |
|                 }
 | |
|                 Tensor::InsideDescribe::Region region;
 | |
|                 region.origin        = input;
 | |
|                 region.size[0]       = ic;
 | |
|                 region.size[1]       = ohExcludePad;
 | |
|                 region.size[2]       = owExcludePad;
 | |
|                 region.src.offset    = srcOffsetKx;
 | |
|                 region.dst.offset    = dstOffsetKx;
 | |
|                 region.src.stride[0] = srcStrideChannel;
 | |
|                 region.dst.stride[0] = dstStrideChannel;
 | |
|                 region.src.stride[1] = sh * iw;
 | |
|                 region.dst.stride[1] = ow;
 | |
|                 region.src.stride[2] = sw;
 | |
|                 region.dst.stride[2] = 1;
 | |
|                 des->regions.emplace_back(std::move(region));
 | |
|             }
 | |
|         }
 | |
|         // MNN_ASSERT(des->regions.size() > 0);
 | |
|     }
 | |
| }
 | |
| bool GeometryConvUtils::computeSingle(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, GeometryComputer::Context& context, CommandBuffer& res) {
 | |
|     auto newOutputs   = outputs;
 | |
|     auto newInputs    = inputs;
 | |
|     auto originOutput = outputs[0];
 | |
|     auto output       = originOutput;
 | |
|     auto inputDes     = TensorUtils::getDescribe(newInputs[0]);
 | |
|     auto format       = inputDes->dimensionFormat;
 | |
|     if (MNN_DATA_FORMAT_NC4HW4 != format) {
 | |
|         std::shared_ptr<Tensor> newInput(new Tensor(newInputs[0], Tensor::CAFFE_C4, false));
 | |
|         ConvertUtils::compute(newInputs[0], newInput.get(), res);
 | |
|         newInputs[0] = newInput.get();
 | |
|         res.extras.emplace_back(std::move(newInput));
 | |
|         std::shared_ptr<Tensor> newOutput(new Tensor(originOutput, Tensor::CAFFE_C4, false));
 | |
|         output        = newOutput.get();
 | |
|         newOutputs[0] = output;
 | |
|         res.extras.emplace_back(newOutput);
 | |
|     }
 | |
|     SharedPtr<Command> cmd(new Command);
 | |
|     cmd->op      = op;
 | |
|     cmd->inputs  = std::move(newInputs);
 | |
|     cmd->outputs = std::move(newOutputs);
 | |
|     res.command.emplace_back(std::move(cmd));
 | |
|     if (originOutput != output) {
 | |
|         ConvertUtils::compute(output, originOutput, res);
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| }; // namespace MNN
 |