| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | //  GeometryPoolGrad.cpp
 | 
					
						
							|  |  |  | //  MNN
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //  Created by MNN on 2020/06/04.
 | 
					
						
							|  |  |  | //  Copyright © 2018, Alibaba Group Holding Limited
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "ConvertUtils.hpp"
 | 
					
						
							|  |  |  | #include "geometry/GeometryComputer.hpp"
 | 
					
						
							|  |  |  | #include "geometry/GeometryComputerUtils.hpp"
 | 
					
						
							|  |  |  | #include "core/Macro.h"
 | 
					
						
							|  |  |  | #include "core/OpCommonUtils.hpp"
 | 
					
						
							|  |  |  | #define MNN_OPEN_TIME_TRACE
 | 
					
						
							|  |  |  | #include <MNN/AutoTime.hpp>
 | 
					
						
							|  |  |  | namespace MNN { | 
					
						
							|  |  |  | class GeometryPoolGrad : public GeometryComputer { | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |     // PoolGrad PoolType_MAXPOOL
 | 
					
						
							|  |  |  |     bool onComputeMaxPool(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, | 
					
						
							|  |  |  |                           Context& context, CommandBuffer& res) const { | 
					
						
							|  |  |  |         auto origin       = inputs[0]; | 
					
						
							|  |  |  |         auto originOutput = inputs[1]; | 
					
						
							|  |  |  |         auto inputDiff    = inputs[2]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         auto ow = inputDiff->width(); | 
					
						
							|  |  |  |         auto oh = inputDiff->height(); | 
					
						
							|  |  |  |         auto iw = origin->width(); | 
					
						
							|  |  |  |         auto ih = origin->height(); | 
					
						
							|  |  |  |         auto oc = inputDiff->channel(); | 
					
						
							|  |  |  |         auto ob = inputDiff->batch(); | 
					
						
							| 
									
										
										
										
											2022-12-30 15:18:58 +08:00
										 |  |  |         MNN_ASSERT(TensorUtils::getDescribe(inputDiff)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4); | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         auto parameter = op->main_as_Pool(); | 
					
						
							|  |  |  |         auto stride_w  = parameter->strideX(); | 
					
						
							|  |  |  |         auto stride_h  = parameter->strideY(); | 
					
						
							|  |  |  |         auto kernel_w  = parameter->kernelX(); | 
					
						
							|  |  |  |         auto kernel_h  = parameter->kernelY(); | 
					
						
							|  |  |  |         auto isGlobal  = parameter->isGlobal(); | 
					
						
							|  |  |  |         auto pad_w     = parameter->padX(); | 
					
						
							|  |  |  |         auto pad_h     = parameter->padY(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // edit const if global
 | 
					
						
							|  |  |  |         if (isGlobal) { | 
					
						
							|  |  |  |             kernel_w = iw; | 
					
						
							|  |  |  |             kernel_h = ih; | 
					
						
							|  |  |  |             stride_w = iw; | 
					
						
							|  |  |  |             stride_h = ih; | 
					
						
							|  |  |  |             pad_w    = 0; | 
					
						
							|  |  |  |             pad_h    = 0; | 
					
						
							|  |  |  |         } else { | 
					
						
							| 
									
										
										
										
											2021-06-22 15:22:33 +08:00
										 |  |  |             if (parameter->padType() == PoolPadType_SAME) { | 
					
						
							|  |  |  |                 int pad_w_total = (ow - 1) * stride_w + kernel_w - iw; | 
					
						
							|  |  |  |                 int pad_h_total = (oh - 1) * stride_h + kernel_h - ih; | 
					
						
							|  |  |  |                 pad_w           = pad_w_total > 0 ? pad_w_total / 2 : 0; | 
					
						
							|  |  |  |                 pad_h           = pad_h_total > 0 ? pad_h_total / 2 : 0; | 
					
						
							|  |  |  |             } else if (parameter->padType() == PoolPadType_VALID) { | 
					
						
							|  |  |  |                 pad_w = 0; | 
					
						
							|  |  |  |                 pad_h = 0; | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |         std::vector<int> broadcastShape = {ob * kernel_h * kernel_w, oc, oh, ow}; | 
					
						
							|  |  |  |         std::shared_ptr<Tensor> originSplit(MNN::Tensor::createDevice<float>(broadcastShape, Tensor::CAFFE_C4)); | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             auto des             = TensorUtils::getDescribe(originSplit.get()); | 
					
						
							|  |  |  |             des->memoryType      = Tensor::InsideDescribe::MEMORY_VIRTUAL; | 
					
						
							|  |  |  |             des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4; | 
					
						
							|  |  |  |             des->regions.reserve(kernel_w * kernel_h); | 
					
						
							|  |  |  |             res.extras.emplace_back(originSplit); | 
					
						
							|  |  |  |             for (int ky = 0; ky < kernel_h; ky++) { | 
					
						
							|  |  |  |                 auto startSy = ky - pad_h; | 
					
						
							|  |  |  |                 int startDy  = 0; | 
					
						
							|  |  |  |                 if (startSy < 0) { | 
					
						
							|  |  |  |                     startDy = ((-startSy) + stride_h - 1) / stride_h; | 
					
						
							|  |  |  |                     startSy = startSy + startDy * stride_h; | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |                 } | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |                 auto endDy = oh - 1; | 
					
						
							|  |  |  |                 auto endSy = endDy * stride_h + ky - pad_h; | 
					
						
							|  |  |  |                 if (endSy >= ih) { | 
					
						
							|  |  |  |                     endDy = endDy - (endSy - ih + stride_h) / stride_h; | 
					
						
							|  |  |  |                     endSy = endDy * stride_h + ky - pad_h; | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |                 } | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |                 if (startDy > endDy) { | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |                     continue; | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |                 MNN_ASSERT(endDy >= 0); | 
					
						
							|  |  |  |                 MNN_ASSERT(startDy < oh); | 
					
						
							|  |  |  |                  | 
					
						
							|  |  |  |                 for (int kx = 0; kx < kernel_w; kx++) { | 
					
						
							|  |  |  |                     auto startSx = kx - pad_w; | 
					
						
							|  |  |  |                     int startDx  = 0; | 
					
						
							|  |  |  |                     if (startSx < 0) { | 
					
						
							|  |  |  |                         startDx = ((-startSx) + stride_w - 1) / stride_w; | 
					
						
							|  |  |  |                         startSx = startSx + startDx * stride_w; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     auto endDx = ow - 1; | 
					
						
							|  |  |  |                     auto endSx = endDx * stride_w + kx - pad_w; | 
					
						
							|  |  |  |                     if (endSx >= iw) { | 
					
						
							|  |  |  |                         endDx = endDx - (endSx - iw + stride_w) / stride_w; | 
					
						
							|  |  |  |                         endSx = endDx * stride_w + kx - pad_w; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     if (startDx > endDx) { | 
					
						
							|  |  |  |                         continue; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     MNN_ASSERT(endDx >= 0); | 
					
						
							|  |  |  |                     MNN_ASSERT(startDx < ow); | 
					
						
							|  |  |  |                      | 
					
						
							|  |  |  |                     // A: Input feature
 | 
					
						
							|  |  |  |                     int index = ky * kernel_w + kx; | 
					
						
							|  |  |  |                      | 
					
						
							|  |  |  |                     Tensor::InsideDescribe::Region region; | 
					
						
							|  |  |  |                     region.origin        = origin; | 
					
						
							|  |  |  |                     region.size[0]       = ob * oc; | 
					
						
							|  |  |  |                     region.size[1]       = endDy - startDy + 1; | 
					
						
							|  |  |  |                     region.size[2]       = endDx - startDx + 1; | 
					
						
							|  |  |  |                     region.dst.offset    = startDy * ow + startDx + ob * oc * oh * ow * (ky * kernel_w + kx); | 
					
						
							|  |  |  |                     region.src.offset    = startSy * iw + startSx; | 
					
						
							|  |  |  |                     region.dst.stride[0] = ow * oh; | 
					
						
							|  |  |  |                     region.src.stride[0] = iw * ih; | 
					
						
							|  |  |  |                     region.dst.stride[1] = ow; | 
					
						
							|  |  |  |                     region.src.stride[1] = iw * stride_h; | 
					
						
							|  |  |  |                     region.dst.stride[2] = 1; | 
					
						
							|  |  |  |                     region.src.stride[2] = stride_w; | 
					
						
							|  |  |  |                     des->regions.emplace_back(std::move(region)); | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |         std::shared_ptr<Tensor> originOutputBroadcast(MNN::Tensor::createDevice<float>(broadcastShape, Tensor::CAFFE_C4)); | 
					
						
							|  |  |  |         std::shared_ptr<Tensor> inputDiffBroadcast(MNN::Tensor::createDevice<float>(broadcastShape, Tensor::CAFFE_C4)); | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             auto des             = TensorUtils::getDescribe(originOutputBroadcast.get()); | 
					
						
							|  |  |  |             des->memoryType      = Tensor::InsideDescribe::MEMORY_VIRTUAL; | 
					
						
							|  |  |  |             des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4; | 
					
						
							|  |  |  |             Tensor::InsideDescribe::Region region; | 
					
						
							|  |  |  |             region.origin        = originOutput; | 
					
						
							|  |  |  |             region.size[0]       = 1; | 
					
						
							|  |  |  |             region.size[1]       = kernel_w * kernel_h; | 
					
						
							|  |  |  |             region.size[2]       = ob * oc * oh * ow; | 
					
						
							|  |  |  |             region.dst.offset    = 0; | 
					
						
							|  |  |  |             region.src.offset    = 0; | 
					
						
							|  |  |  |             region.dst.stride[0] = 0; | 
					
						
							|  |  |  |             region.src.stride[0] = 0; | 
					
						
							|  |  |  |             region.dst.stride[1] = ob * oc * oh * ow; | 
					
						
							|  |  |  |             region.src.stride[1] = 0; | 
					
						
							|  |  |  |             region.dst.stride[2] = 1; | 
					
						
							|  |  |  |             region.src.stride[2] = 1; | 
					
						
							|  |  |  |             des->regions = {region}; | 
					
						
							|  |  |  |             res.extras.emplace_back(originOutputBroadcast); | 
					
						
							|  |  |  |             region.origin = inputDiff; | 
					
						
							|  |  |  |             des = TensorUtils::getDescribe(inputDiffBroadcast.get()); | 
					
						
							|  |  |  |             des->memoryType      = Tensor::InsideDescribe::MEMORY_VIRTUAL; | 
					
						
							|  |  |  |             des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4; | 
					
						
							|  |  |  |             des->regions = {region}; | 
					
						
							|  |  |  |             res.extras.emplace_back(inputDiffBroadcast); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         std::shared_ptr<Tensor> originGEqual(MNN::Tensor::createDevice<float>(broadcastShape, Tensor::CAFFE_C4)); | 
					
						
							|  |  |  |         std::shared_ptr<Tensor> originGEqualInt(MNN::Tensor::createDevice<int>(broadcastShape, Tensor::CAFFE_C4)); | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |              | 
					
						
							|  |  |  |             auto cmd = GeometryComputerUtils::makeBinary(BinaryOpOperation_GREATER_EQUAL, originSplit.get(), | 
					
						
							|  |  |  |                                                          originOutputBroadcast.get(), originGEqualInt.get()); | 
					
						
							|  |  |  |             res.command.emplace_back(cmd); | 
					
						
							|  |  |  |             res.extras.emplace_back(originGEqualInt); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             std::unique_ptr<OpT> cast2float(new OpT); | 
					
						
							|  |  |  |             cast2float->type                     = OpType_Cast; | 
					
						
							|  |  |  |             cast2float->main.type                = OpParameter_CastParam; | 
					
						
							|  |  |  |             cast2float->main.value               = new CastParamT; | 
					
						
							|  |  |  |             cast2float->main.AsCastParam()->dstT = DataType_DT_FLOAT; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             flatbuffers::FlatBufferBuilder builder1; | 
					
						
							|  |  |  |             auto lastOffset1 = Op::Pack(builder1, cast2float.get()); | 
					
						
							|  |  |  |             builder1.Finish(lastOffset1); | 
					
						
							|  |  |  |             auto cmd1 = GeometryComputerUtils::makeCommand(builder1, {originGEqualInt.get()}, {originGEqual.get()}); | 
					
						
							|  |  |  |             res.extras.emplace_back(originGEqual); | 
					
						
							|  |  |  |             res.command.emplace_back(cmd1); | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |         std::shared_ptr<Tensor> originDiff(MNN::Tensor::createDevice<float>(broadcastShape, Tensor::CAFFE_C4)); | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             auto cmd2 = GeometryComputerUtils::makeBinary(BinaryOpOperation_MUL, inputDiffBroadcast.get(), originGEqual.get(), originDiff.get()); | 
					
						
							|  |  |  |             res.extras.emplace_back(originDiff); | 
					
						
							|  |  |  |             res.command.emplace_back(cmd2); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         std::shared_ptr<Tensor> reduceDiffBefore(MNN::Tensor::createDevice<float>({1, kernel_w * kernel_h, ob * oc * ih * iw}, Tensor::CAFFE)); | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |         { | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |             auto des                  = TensorUtils::getDescribe(reduceDiffBefore.get()); | 
					
						
							|  |  |  |             des->memoryType      = Tensor::InsideDescribe::MEMORY_VIRTUAL; | 
					
						
							|  |  |  |             Tensor::InsideDescribe::Region region; | 
					
						
							|  |  |  |             for (int ky = 0; ky < kernel_h; ++ky) { | 
					
						
							|  |  |  |                 for (int kx = 0; kx < kernel_w; ++kx) { | 
					
						
							|  |  |  |                     region.origin        = originDiff.get(); | 
					
						
							|  |  |  |                     region.size[0]       = ob * oc; | 
					
						
							|  |  |  |                     region.size[1]       = oh; | 
					
						
							|  |  |  |                     region.size[2]       = ow; | 
					
						
							|  |  |  |                     region.src.offset    = oc * ob * ow * oh * (ky * kernel_w + kx); | 
					
						
							|  |  |  |                     region.dst.offset    = ky * iw + kx + oc * ob * iw * ih * (ky * kernel_w + kx); | 
					
						
							|  |  |  |                     region.src.stride[0] = ow * oh; | 
					
						
							|  |  |  |                     region.dst.stride[0] = iw * ih; | 
					
						
							|  |  |  |                     region.src.stride[1] = ow; | 
					
						
							|  |  |  |                     region.dst.stride[1] = iw * stride_h; | 
					
						
							|  |  |  |                     region.src.stride[2] = 1; | 
					
						
							|  |  |  |                     region.dst.stride[2] = stride_w; | 
					
						
							|  |  |  |                     des->regions.emplace_back(std::move(region)); | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |             } | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |             res.extras.emplace_back(reduceDiffBefore); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         std::shared_ptr<Tensor> reduceDiffAfter(MNN::Tensor::createDevice<float>({1, 1, ob * oc * iw * ih}, Tensor::CAFFE)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         auto reduceCmd = GeometryComputerUtils::makeReduce(ReductionType_SUM, reduceDiffBefore.get(), reduceDiffAfter.get()); | 
					
						
							|  |  |  |         res.command.emplace_back(reduceCmd); | 
					
						
							|  |  |  |         res.extras.emplace_back(reduceDiffAfter); | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             auto outputDes = TensorUtils::getDescribe(outputs[0]); | 
					
						
							|  |  |  |             outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL; | 
					
						
							|  |  |  |             outputDes->regions = {TensorUtils::makeFullSlice(reduceDiffAfter.get())}; | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |         return true; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // PoolGrad PoolType_AVEPOOL
 | 
					
						
							|  |  |  |     virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, | 
					
						
							|  |  |  |                            Context& context, CommandBuffer& res) const override { | 
					
						
							|  |  |  |         auto parameter = op->main_as_Pool(); | 
					
						
							|  |  |  |         if (parameter->type() == PoolType_MAXPOOL) { | 
					
						
							|  |  |  |             return onComputeMaxPool(op, inputs, outputs, context, res); | 
					
						
							|  |  |  |         } else if (parameter->type() != PoolType_AVEPOOL) { | 
					
						
							|  |  |  |             MNN_PRINT("Pool type not supported!\n"); | 
					
						
							|  |  |  |             return false; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         auto origin     = inputs[0]; | 
					
						
							|  |  |  |         auto inputDiff  = inputs[2]; | 
					
						
							|  |  |  |         auto outputDiff = outputs[0]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         auto ow = inputDiff->width(); | 
					
						
							|  |  |  |         auto oh = inputDiff->height(); | 
					
						
							|  |  |  |         auto iw = origin->width(); | 
					
						
							|  |  |  |         auto ih = origin->height(); | 
					
						
							|  |  |  |         auto oc = inputDiff->channel(); | 
					
						
							|  |  |  |         auto ob = inputDiff->batch(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         auto stride_w = parameter->strideX(); | 
					
						
							|  |  |  |         auto stride_h = parameter->strideY(); | 
					
						
							|  |  |  |         auto kernel_w = parameter->kernelX(); | 
					
						
							|  |  |  |         auto kernel_h = parameter->kernelY(); | 
					
						
							|  |  |  |         auto isGlobal = parameter->isGlobal(); | 
					
						
							|  |  |  |         auto pad_w    = parameter->padX(); | 
					
						
							|  |  |  |         auto pad_h    = parameter->padY(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // edit const if global
 | 
					
						
							|  |  |  |         if (isGlobal) { | 
					
						
							|  |  |  |             kernel_w = iw; | 
					
						
							|  |  |  |             kernel_h = ih; | 
					
						
							|  |  |  |             stride_w = iw; | 
					
						
							|  |  |  |             stride_h = ih; | 
					
						
							|  |  |  |             pad_w    = 0; | 
					
						
							|  |  |  |             pad_h    = 0; | 
					
						
							|  |  |  |         } else { | 
					
						
							| 
									
										
										
										
											2021-06-22 15:22:33 +08:00
										 |  |  |             if (parameter->padType() == PoolPadType_SAME) { | 
					
						
							|  |  |  |                 int pad_w_total = (ow - 1) * stride_w + kernel_w - iw; | 
					
						
							|  |  |  |                 int pad_h_total = (oh - 1) * stride_h + kernel_h - ih; | 
					
						
							|  |  |  |                 pad_w           = pad_w_total > 0 ? pad_w_total / 2 : 0; | 
					
						
							|  |  |  |                 pad_h           = pad_h_total > 0 ? pad_h_total / 2 : 0; | 
					
						
							|  |  |  |             } else if (parameter->padType() == PoolPadType_VALID) { | 
					
						
							|  |  |  |                 pad_w = 0; | 
					
						
							|  |  |  |                 pad_h = 0; | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |         std::shared_ptr<Tensor> inpDifTrans; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         inpDifTrans.reset(new Tensor); | 
					
						
							|  |  |  |         inpDifTrans->buffer().type       = halide_type_of<float>(); | 
					
						
							|  |  |  |         inpDifTrans->buffer().dimensions = 5; | 
					
						
							|  |  |  |         inpDifTrans->setLength(0, kernel_h * kernel_w); | 
					
						
							|  |  |  |         inpDifTrans->setLength(1, ob); | 
					
						
							|  |  |  |         inpDifTrans->setLength(2, oc); | 
					
						
							|  |  |  |         inpDifTrans->setLength(3, ih); | 
					
						
							|  |  |  |         inpDifTrans->setLength(4, iw); | 
					
						
							|  |  |  |         auto des             = TensorUtils::getDescribe(inpDifTrans.get()); | 
					
						
							|  |  |  |         des->memoryType      = Tensor::InsideDescribe::MEMORY_VIRTUAL; | 
					
						
							|  |  |  |         des->dimensionFormat = MNN_DATA_FORMAT_NCHW; | 
					
						
							|  |  |  |         des->regions.clear(); | 
					
						
							|  |  |  |         // des->regions.reserve(kernel_h*kernel_w);
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for (int ky = 0; ky < kernel_h; ky++) { | 
					
						
							|  |  |  |             auto startSy = ky - pad_h; | 
					
						
							|  |  |  |             int startDy  = 0; | 
					
						
							|  |  |  |             if (startSy < 0) { | 
					
						
							|  |  |  |                 startDy = ((-startSy) + stride_h - 1) / stride_h; | 
					
						
							|  |  |  |                 startSy = startSy + startDy * stride_h; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             auto endDy = oh - 1; | 
					
						
							|  |  |  |             auto endSy = endDy * stride_h + ky - pad_h; | 
					
						
							|  |  |  |             if (endSy >= ih) { | 
					
						
							|  |  |  |                 endDy = endDy - (endSy - ih + stride_h) / stride_h; | 
					
						
							|  |  |  |                 endSy = endDy * stride_h + ky - pad_h; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             if (startDy > endDy) { | 
					
						
							|  |  |  |                 continue; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             MNN_ASSERT(endDy >= 0); | 
					
						
							|  |  |  |             MNN_ASSERT(startDy < oh); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             for (int kx = 0; kx < kernel_w; kx++) { | 
					
						
							|  |  |  |                 auto startSx = kx - pad_w; | 
					
						
							|  |  |  |                 int startDx  = 0; | 
					
						
							|  |  |  |                 if (startSx < 0) { | 
					
						
							|  |  |  |                     startDx = ((-startSx) + stride_w - 1) / stride_w; | 
					
						
							|  |  |  |                     startSx = startSx + startDx * stride_w; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 auto endDx = ow - 1; | 
					
						
							|  |  |  |                 auto endSx = endDx * stride_w + kx - pad_w; | 
					
						
							|  |  |  |                 if (endSx >= iw) { | 
					
						
							|  |  |  |                     endDx = endDx - (endSx - iw + stride_w) / stride_w; | 
					
						
							|  |  |  |                     endSx = endDx * stride_w + kx - pad_w; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 if (startDx > endDx) { | 
					
						
							|  |  |  |                     continue; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 MNN_ASSERT(endDx >= 0); | 
					
						
							|  |  |  |                 MNN_ASSERT(startDx < ow); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 // A: Input feature
 | 
					
						
							|  |  |  |                 int index = ky * kernel_w + kx; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 Tensor::InsideDescribe::Region region; | 
					
						
							|  |  |  |                 region.origin        = inputDiff; | 
					
						
							|  |  |  |                 region.size[0]       = ob * oc; | 
					
						
							|  |  |  |                 region.size[1]       = endDy - startDy + 1; | 
					
						
							|  |  |  |                 region.size[2]       = endDx - startDx + 1; | 
					
						
							|  |  |  |                 region.src.offset    = startDy * ow + startDx; | 
					
						
							|  |  |  |                 region.dst.offset    = index * ob * oc * ih * iw + startSy * iw + startSx; | 
					
						
							|  |  |  |                 region.src.stride[0] = ow * oh; | 
					
						
							|  |  |  |                 region.dst.stride[0] = iw * ih; | 
					
						
							|  |  |  |                 region.src.stride[1] = ow; | 
					
						
							|  |  |  |                 region.dst.stride[1] = iw * stride_h; | 
					
						
							|  |  |  |                 region.src.stride[2] = 1; | 
					
						
							|  |  |  |                 region.dst.stride[2] = stride_w; | 
					
						
							|  |  |  |                 des->regions.emplace_back(std::move(region)); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         res.extras.emplace_back(inpDifTrans); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // reduction mean
 | 
					
						
							|  |  |  |         std::shared_ptr<Tensor> tmpOutput; | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             tmpOutput.reset(new Tensor); | 
					
						
							|  |  |  |             tmpOutput->buffer().type       = halide_type_of<float>(); | 
					
						
							| 
									
										
										
										
											2023-10-18 10:31:02 +08:00
										 |  |  |             tmpOutput->buffer().dimensions = 5; | 
					
						
							|  |  |  |             tmpOutput->setLength(0, 1); | 
					
						
							|  |  |  |             tmpOutput->setLength(1, ob); | 
					
						
							|  |  |  |             tmpOutput->setLength(2, oc); | 
					
						
							|  |  |  |             tmpOutput->setLength(3, ih); | 
					
						
							|  |  |  |             tmpOutput->setLength(4, iw); | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |             auto des = TensorUtils::getDescribe(tmpOutput.get()); | 
					
						
							|  |  |  |             // des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
 | 
					
						
							|  |  |  |             des->dimensionFormat = MNN_DATA_FORMAT_NCHW; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             std::unique_ptr<OpT> mean(new OpT); | 
					
						
							|  |  |  |             mean->type                               = OpType_Reduction; | 
					
						
							|  |  |  |             mean->main.type                          = OpParameter_ReductionParam; | 
					
						
							|  |  |  |             mean->main.value                         = new ReductionParamT; | 
					
						
							|  |  |  |             mean->main.AsReductionParam()->dim       = {0}; | 
					
						
							|  |  |  |             mean->main.AsReductionParam()->keepDims  = false; | 
					
						
							|  |  |  |             mean->main.AsReductionParam()->operation = ReductionType_MEAN; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             flatbuffers::FlatBufferBuilder builder; | 
					
						
							|  |  |  |             auto lastOffset = Op::Pack(builder, mean.get()); | 
					
						
							|  |  |  |             builder.Finish(lastOffset); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |             auto cmd = GeometryComputerUtils::makeCommand(builder, {inpDifTrans.get()}, {tmpOutput.get()}); | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |             auto outputDes        = TensorUtils::getDescribe(outputs[0]); | 
					
						
							|  |  |  |             outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL; | 
					
						
							|  |  |  |             Tensor::InsideDescribe::Region desReg; | 
					
						
							|  |  |  |             desReg.size[0]       = ob * oc; | 
					
						
							|  |  |  |             desReg.size[1]       = ih; | 
					
						
							|  |  |  |             desReg.size[2]       = iw; | 
					
						
							|  |  |  |             desReg.dst.offset    = 0; | 
					
						
							|  |  |  |             desReg.dst.stride[0] = ih * iw; | 
					
						
							|  |  |  |             desReg.dst.stride[1] = iw; | 
					
						
							|  |  |  |             desReg.dst.stride[2] = 1; | 
					
						
							|  |  |  |             desReg.src.offset    = 0; | 
					
						
							|  |  |  |             desReg.src.stride[0] = ih * iw; | 
					
						
							|  |  |  |             desReg.src.stride[1] = iw; | 
					
						
							|  |  |  |             desReg.src.stride[2] = 1; | 
					
						
							|  |  |  |             desReg.origin        = tmpOutput.get(); | 
					
						
							|  |  |  |             outputDes->regions.emplace_back(std::move(desReg)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             res.extras.emplace_back(std::move(tmpOutput)); | 
					
						
							|  |  |  |             res.command.emplace_back(std::move(cmd)); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return true; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void _create() { | 
					
						
							|  |  |  |     std::shared_ptr<GeometryComputer> comp(new GeometryPoolGrad); | 
					
						
							|  |  |  |     GeometryComputer::registerGeometryComputer(comp, {OpType_PoolGrad}); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | REGISTER_GEOMETRY(GeometryPoolGrad, _create); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } // namespace MNN
 |