mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			995 lines
		
	
	
		
			35 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			995 lines
		
	
	
		
			35 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  TensorUtils.cpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2018/08/11.
 | |
| //  Copyright © 2018, Alibaba Group Holding Limited
 | |
| //
 | |
| 
 | |
| #include "core/TensorUtils.hpp"
 | |
| #include <float.h>
 | |
| #include <math.h>
 | |
| #include <stdio.h>
 | |
| #include <cmath>
 | |
| #include <cstring>
 | |
| #include <algorithm>
 | |
| #include "core/Backend.hpp"
 | |
| #include "core/Macro.h"
 | |
| namespace MNN {
 | |
| Tensor::InsideDescribe::NativeInsideDescribe* TensorUtils::getDescribe(const Tensor* tensor) {
 | |
|     return tensor->mDescribe->mContent.get();
 | |
| }
 | |
| bool TensorUtils::regionIsFull(Tensor* input) {
 | |
|     auto des = TensorUtils::getDescribe(input);
 | |
|     int size = 1;
 | |
|     for (int i = 0; i < input->dimensions(); ++i) {
 | |
|         size *= input->length(i);
 | |
|     }
 | |
|     int regionSize = 0;
 | |
|     for (auto& region : des->regions) {
 | |
|         regionSize += region.size[1] * region.size[0] * region.size[2];
 | |
|     }
 | |
|     return regionSize == size;
 | |
| }
 | |
| 
 | |
| Tensor::InsideDescribe::Region TensorUtils::makeFullSlice(Tensor* input) {
 | |
|     Tensor::InsideDescribe::Region totalSlice;
 | |
|     totalSlice.src.offset = 0;
 | |
|     totalSlice.dst.offset = 0;
 | |
|     totalSlice.origin     = input;
 | |
|     for (int i = 0; i < input->dimensions(); ++i) {
 | |
|         totalSlice.size[2] *= input->length(i);
 | |
|     }
 | |
|     totalSlice.dst.stride[1] = totalSlice.size[2];
 | |
|     totalSlice.dst.stride[0] = totalSlice.size[2];
 | |
|     totalSlice.src.stride[1] = totalSlice.size[2];
 | |
|     totalSlice.src.stride[0] = totalSlice.size[2];
 | |
|     return totalSlice;
 | |
| }
 | |
| bool TensorUtils::reshapeSlice(Tensor::InsideDescribe::Region& slice, int outside, int inside, int axis) {
 | |
|     if (slice.size[1] == 1 && slice.size[0] == 1 && slice.size[2] == outside * inside * axis) {
 | |
|         slice.size[0]       = outside;
 | |
|         slice.size[2]       = inside;
 | |
|         slice.size[1]       = axis;
 | |
|         slice.dst.stride[0] = inside * axis;
 | |
|         slice.dst.stride[1] = inside;
 | |
| 
 | |
|         auto originStride   = slice.src.stride[2];
 | |
|         slice.src.stride[0] = originStride * inside * axis;
 | |
|         slice.src.stride[1] = originStride * inside;
 | |
|         return true;
 | |
|     }
 | |
|     if (slice.size[0] == outside && slice.size[1] == axis && slice.size[2] == inside) {
 | |
|         return true;
 | |
|     }
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| void TensorUtils::setupTensorInfo(const Tensor* tensor, Tensor* wrapTensor, MNN_DATA_FORMAT mMidFormat) {
 | |
|     TensorUtils::getDescribe(wrapTensor)->dimensionFormat = mMidFormat;
 | |
|     auto tensorFormat                                     = TensorUtils::getDescribe(tensor)->dimensionFormat;
 | |
|     bool originCaffeFormat = (tensorFormat == MNN_DATA_FORMAT_NCHW || tensorFormat == MNN_DATA_FORMAT_NC4HW4);
 | |
|     bool wrapCaffeFormat   = (mMidFormat == MNN_DATA_FORMAT_NCHW || mMidFormat == MNN_DATA_FORMAT_NC4HW4);
 | |
|     bool originTfFormat    = (tensorFormat == MNN_DATA_FORMAT_NHWC || tensorFormat == MNN_DATA_FORMAT_NHWC4);
 | |
|     bool wrapTfFormat      = (mMidFormat == MNN_DATA_FORMAT_NHWC || mMidFormat == MNN_DATA_FORMAT_NHWC4);
 | |
|     if ((originCaffeFormat && wrapCaffeFormat) || (originTfFormat && wrapTfFormat)) {
 | |
|         TensorUtils::copyShape(tensor, wrapTensor);
 | |
|     } else if (originCaffeFormat && wrapTfFormat) {
 | |
|         for (int i = 1; i < wrapTensor->dimensions() - 1; ++i) {
 | |
|             wrapTensor->setLength(i, tensor->length(i + 1));
 | |
|         }
 | |
|         wrapTensor->setLength(0, tensor->length(0));
 | |
|         wrapTensor->setLength(wrapTensor->dimensions() - 1, tensor->length(1));
 | |
|     } else if (originTfFormat && wrapCaffeFormat) {
 | |
|         for (int i = 2; i < wrapTensor->dimensions(); ++i) {
 | |
|             wrapTensor->setLength(i, tensor->length(i - 1));
 | |
|         }
 | |
|         wrapTensor->setLength(0, tensor->length(0));
 | |
|         wrapTensor->setLength(1, tensor->length(tensor->dimensions() - 1));
 | |
|     } else {
 | |
|         // will not reach here
 | |
|         MNN_ASSERT(false);
 | |
|     }
 | |
|     TensorUtils::setLinearLayout(wrapTensor);
 | |
|     wrapTensor->buffer().type = tensor->getType();
 | |
| }
 | |
| 
 | |
| void TensorUtils::copyShape(const Tensor* source, Tensor* dest, bool copyFormat, bool copyRef) {
 | |
|     auto& ob      = dest->buffer();
 | |
|     auto& ib      = source->buffer();
 | |
|     ob.dimensions = ib.dimensions;
 | |
|     ::memcpy(ob.dim, ib.dim, ib.dimensions * sizeof(halide_dimension_t));
 | |
|     if (copyFormat) {
 | |
|         getDescribe(dest)->dimensionFormat = getDescribe(source)->dimensionFormat;
 | |
|     }
 | |
|     if (copyRef) {
 | |
|         auto dstDes = getDescribe(dest);
 | |
|         auto srcDes = getDescribe(source);
 | |
|         dstDes->regions = srcDes->regions;
 | |
|         dstDes->quantAttr = srcDes->quantAttr;
 | |
|         dstDes->type = srcDes->type;
 | |
|         dest->buffer().type = source->getType();
 | |
|     }
 | |
|     adjustTensorForCompability(dest);
 | |
| }
 | |
| 
 | |
| void TensorUtils::setShape(Tensor* dest, const std::vector<int>& alldims) {
 | |
|     auto& ob      = dest->buffer();
 | |
|     ob.dimensions = alldims.size();
 | |
|     int stride = 1;
 | |
|     for (int i = alldims.size() - 1; i >= 0; --i) {
 | |
|         ob.dim[i].stride = stride;
 | |
|         ob.dim[i].extent = alldims[i];
 | |
|         stride *= alldims[i];
 | |
|     }
 | |
|     return;
 | |
| }
 | |
| 
 | |
| void TensorUtils::setLinearLayout(Tensor* tensor) {
 | |
|     auto& buffer = tensor->buffer();
 | |
|     int size     = 1;
 | |
|     for (int i = 0; i < buffer.dimensions; ++i) {
 | |
|         auto index  = buffer.dimensions - i - 1;
 | |
|         auto extent = buffer.dim[index].extent;
 | |
|         if (1 == index && tensor->mDescribe->mContent->dimensionFormat == MNN_DATA_FORMAT_NC4HW4) {
 | |
|             extent = ROUND_UP(extent, 4);
 | |
|         }
 | |
|         buffer.dim[index].stride = size;
 | |
|         size *= extent;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static const Tensor* createHostPlanar(const Tensor* source) {
 | |
|     // check
 | |
|     auto bnType        = MNN_FORWARD_CPU;
 | |
|     auto tensorBackend = TensorUtils::getDescribeOrigin(source)->getBackend();
 | |
|     if (tensorBackend) {
 | |
|         bnType = tensorBackend->type();
 | |
|     }
 | |
|     bool device = bnType != MNN_FORWARD_CPU;
 | |
|     bool chunky = TensorUtils::getDescribe(source)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4;
 | |
| 
 | |
|     // no convert needed
 | |
|     if (!device && !chunky) {
 | |
|         return source;
 | |
|     }
 | |
| 
 | |
|     // convert
 | |
|     if (chunky) {
 | |
|         Tensor* result = source->createHostTensorFromDevice(source, false);
 | |
|         if (result->getDimensionType() == MNN::Tensor::TENSORFLOW) {
 | |
|             TensorUtils::getDescribe(result)->dimensionFormat = MNN_DATA_FORMAT_NHWC;
 | |
|         } else {
 | |
|             TensorUtils::getDescribe(result)->dimensionFormat = MNN_DATA_FORMAT_NCHW;
 | |
|         }
 | |
|         TensorUtils::setLinearLayout(result);
 | |
| 
 | |
|         if (device) {
 | |
|             void *host = ((Tensor *)source)->map(MNN::Tensor::MAP_TENSOR_READ, result->getDimensionType());
 | |
|             if(host != nullptr) {
 | |
|                 ::memcpy(result->buffer().host, host, result->size());
 | |
|             }
 | |
|             ((Tensor *)source)->unmap(MNN::Tensor::MAP_TENSOR_READ,  result->getDimensionType(), host);
 | |
|         } else {
 | |
|             Backend::Info info;
 | |
|             info.type = MNN_FORWARD_CPU;
 | |
|             std::shared_ptr<Runtime> runtime(MNNGetExtraRuntimeCreator(MNN_FORWARD_CPU)->onCreate(info));
 | |
|             auto backend = runtime->onCreate();
 | |
|             backend->onCopyBuffer(source, result);
 | |
|             delete backend;
 | |
|         }
 | |
|         return result;
 | |
|     } else {
 | |
|         return source->createHostTensorFromDevice(source, true);
 | |
|     }
 | |
| }
 | |
| 
 | |
| template <typename T>
 | |
| static void copyTensorToFloat(const Tensor* source, double* dest) {
 | |
|     auto srcData = source->host<T>();
 | |
|     auto size    = source->elementSize();
 | |
|     for (int i = 0; i < size; ++i) {
 | |
|         dest[i] = srcData[i];
 | |
|     }
 | |
| }
 | |
| 
 | |
| static bool equals(const double* pa, const double* pb, size_t size, double tolerance, double epsilon, bool overall,
 | |
|                    bool prints) {
 | |
|     // get max if using overall torelance
 | |
|     double max = fabs(pb[0]);
 | |
|     if (overall) {
 | |
|         for (int i = 1; i < size; i++) {
 | |
|             max = std::max(max, fabs(pb[i]));
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // compare
 | |
|     for (int i = 0; i < size; i++) {
 | |
|         float va = pa[i], vb = pb[i];
 | |
|         if (std::isinf(va) && std::isinf(vb)) {
 | |
|             continue;
 | |
|         }
 | |
|         if (fabs(va) < epsilon && fabs(vb) < epsilon) {
 | |
|             continue;
 | |
|         }
 | |
|         float div = overall ? max : fabsf(vb);
 | |
|         if (fabsf(va - vb) / div > tolerance) {
 | |
|             if (prints) {
 | |
|                 MNN_PRINT("%d: %f != %f\n", i, va, vb);
 | |
|             }
 | |
|             return false;
 | |
|         }
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| bool TensorUtils::compareTensors(const Tensor* compare, const Tensor* expect, float tolerance, bool overall,
 | |
|                                  bool printsErrors, bool printsTensors) {
 | |
|     // type
 | |
|     if (compare->getType().code != expect->getType().code || compare->getType().bits != expect->getType().bits) {
 | |
|         if (printsErrors) {
 | |
|             MNN_PRINT("NOT equal in type: %d/%d - %d/%d.\n", compare->getType().code, compare->getType().bits,
 | |
|                       expect->getType().code, expect->getType().bits);
 | |
|         }
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     // dimensions
 | |
|     if (compare->dimensions() != expect->dimensions()) {
 | |
|         if (printsErrors) {
 | |
|             MNN_PRINT("NOT equal in dimensions: %d - %d.\n", compare->dimensions(), expect->dimensions());
 | |
|         }
 | |
|         return false;
 | |
|     }
 | |
|     for (int i = 0; i < compare->dimensions(); i++) {
 | |
|         if (compare->length(i) == expect->length(i)) {
 | |
|             continue;
 | |
|         }
 | |
|         if (printsErrors) {
 | |
|             MNN_PRINT("NOT equal in dimensions[%d]: %d - %d.\n", i, compare->length(i), expect->length(i));
 | |
|         }
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     // convert to host if needed
 | |
|     auto a = createHostPlanar(compare), b = createHostPlanar(expect);
 | |
| 
 | |
|     // get value as double
 | |
|     auto size = expect->elementSize();
 | |
|     std::vector<double> expectValue(expect->elementSize(), 0.0f);
 | |
|     std::vector<double> compareValue(compare->elementSize(), 0.0f);
 | |
| 
 | |
|     auto result = false;
 | |
|     if (b->buffer().type.code == halide_type_uint) {
 | |
|         switch (b->buffer().type.bits) {
 | |
|             case 8:
 | |
|                 copyTensorToFloat<uint8_t>(a, compareValue.data());
 | |
|                 copyTensorToFloat<uint8_t>(b, expectValue.data());
 | |
|                 break;
 | |
|             case 16:
 | |
|                 copyTensorToFloat<uint16_t>(a, compareValue.data());
 | |
|                 copyTensorToFloat<uint16_t>(b, expectValue.data());
 | |
|                 break;
 | |
|             case 32:
 | |
|                 copyTensorToFloat<uint32_t>(a, compareValue.data());
 | |
|                 copyTensorToFloat<uint32_t>(b, expectValue.data());
 | |
|                 break;
 | |
|             case 64:
 | |
|                 copyTensorToFloat<uint64_t>(a, compareValue.data());
 | |
|                 copyTensorToFloat<uint64_t>(b, expectValue.data());
 | |
|                 break;
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     } else if (b->buffer().type.code == halide_type_int) {
 | |
|         switch (b->buffer().type.bits) {
 | |
|             case 8:
 | |
|                 copyTensorToFloat<int8_t>(a, compareValue.data());
 | |
|                 copyTensorToFloat<int8_t>(b, expectValue.data());
 | |
|                 break;
 | |
|             case 16:
 | |
|                 copyTensorToFloat<int16_t>(a, compareValue.data());
 | |
|                 copyTensorToFloat<int16_t>(b, expectValue.data());
 | |
|                 break;
 | |
|             case 32:
 | |
|                 copyTensorToFloat<int32_t>(a, compareValue.data());
 | |
|                 copyTensorToFloat<int32_t>(b, expectValue.data());
 | |
|                 break;
 | |
|             case 64:
 | |
|                 copyTensorToFloat<int64_t>(a, compareValue.data());
 | |
|                 copyTensorToFloat<int64_t>(b, expectValue.data());
 | |
|                 break;
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     } else if (b->buffer().type.code == halide_type_float) {
 | |
|         switch (b->buffer().type.bits) {
 | |
|             case 32:
 | |
|                 copyTensorToFloat<float>(a, compareValue.data());
 | |
|                 copyTensorToFloat<float>(b, expectValue.data());
 | |
|                 break;
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     } else {
 | |
|         if (printsErrors) {
 | |
|             MNN_PRINT("unsupported data type.");
 | |
|         }
 | |
|     }
 | |
|     auto epsilon = FLT_EPSILON;
 | |
|     if ((NULL != compareValue.data()) && (NULL != expectValue.data())) {
 | |
|         result = equals(compareValue.data(), expectValue.data(), size, tolerance, epsilon, overall, printsErrors);
 | |
|     }
 | |
| 
 | |
|     // clean up
 | |
|     if (a != compare) {
 | |
|         delete a;
 | |
|     }
 | |
|     if (b != expect) {
 | |
|         delete b;
 | |
|     }
 | |
|     return result;
 | |
| }
 | |
| 
 | |
| // is copy only region
 | |
| bool TensorUtils::isCopyRegion(const Tensor::InsideDescribe::Region& region) {
 | |
|     bool eq = true;
 | |
|     for (int i = 0; i < 3; i++) {
 | |
|         eq &= ((region.src.stride[i] == region.dst.stride[i]) || (region.size[i] <= 1));
 | |
|     }
 | |
|     return eq;
 | |
| }
 | |
| 
 | |
| bool TensorUtils::isTransposeRegion(const Tensor::InsideDescribe::Region& region) {
 | |
|     int srcOne = -1, dstOne = -1;
 | |
|     for (int i = 0; i < 3; i++) {
 | |
|         if (region.src.stride[i] == 1 && region.size[i] != 1) {
 | |
|             if (srcOne >= 0/* || region.size[i] < 4*/) {
 | |
|                 return false;
 | |
|             }
 | |
|             srcOne = i;
 | |
|         }
 | |
|         if (region.dst.stride[i] == 1 && region.size[i] != 1) {
 | |
|             if (dstOne >= 0/* || region.size[i] < 4*/) {
 | |
|                 return false;
 | |
|             }
 | |
|             dstOne = i;
 | |
|         }
 | |
|     }
 | |
|     return srcOne >= 0 && dstOne >= 0 && srcOne != dstOne;
 | |
| }
 | |
| 
 | |
| bool TensorUtils::isTileRegion(const Tensor::InsideDescribe::Region& region) {
 | |
|     bool res = true;
 | |
|     for (int i = 0; i < 3; i++) {
 | |
|         if (region.src.stride[i] != 0 && region.size[i] > 1) {
 | |
|             res &= (region.src.stride[i] == region.dst.stride[i]);
 | |
|         }
 | |
|     }
 | |
|     return res;
 | |
| }
 | |
| 
 | |
| bool TensorUtils::isDepthToSpaceRegions(const Tensor* output) {
 | |
|     const auto& regions = TensorUtils::getDescribe(output)->regions;
 | |
|     if (regions.empty()) {
 | |
|         return false;
 | |
|     }
 | |
|     auto input = regions[0].origin;
 | |
|     for (const auto region : regions) {
 | |
|         if (region.origin != input) {
 | |
|             return false;
 | |
|         }
 | |
|     }
 | |
|     auto ic = input->channel();
 | |
|     auto ih = input->height();
 | |
|     auto iw = input->width();
 | |
|     auto oc = output->channel();
 | |
|     auto oh = output->height();
 | |
|     auto ow = output->width();
 | |
|     if (ic * ih * iw != oc * oh * ow) {
 | |
|         return false;
 | |
|     }
 | |
|     int hblock = oh / ih;
 | |
|     int wblock = ow / iw;
 | |
|     if (hblock != wblock) {
 | |
|         return false;
 | |
|     }
 | |
|     if (hblock * wblock * oc != ic) {
 | |
|         return false;
 | |
|     }
 | |
|     if (regions.size() != hblock * wblock) {
 | |
|         return false;
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // compute offset through region
 | |
| static inline int offsetCompute(const Tensor::InsideDescribe::Region& reg, int srcOffset, int dstOffset, bool backward) {
 | |
|     const Tensor::InsideDescribe::View* src;
 | |
|     const Tensor::InsideDescribe::View* dst;
 | |
|     if (backward) {
 | |
|         src = ®.dst;
 | |
|         dst = ®.src;
 | |
|     } else {
 | |
|         src = ®.src;
 | |
|         dst = ®.dst;
 | |
|     }
 | |
|     int res = 0;
 | |
|     for (int i = 0; i < 3; i++) {
 | |
|         if (reg.size[i] > 1) {
 | |
|             res += (srcOffset / src->stride[i] - dstOffset / src->stride[i]) * dst->stride[i];
 | |
|             srcOffset %= src->stride[i];
 | |
|             dstOffset %= src->stride[i];
 | |
|         }
 | |
|     }
 | |
|     return res;
 | |
| }
 | |
| 
 | |
| // expand src stride with expand value
 | |
| static inline bool expandSrc(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& size, int expandValue) {
 | |
|     if (expandValue <= 0) {
 | |
|         return false;
 | |
|     }
 | |
|     for (int i = size.size()-1; i >= 0; i--) {
 | |
|         int splitSize = expandValue / src[i];
 | |
|         if (!(expandValue % src[i] || size[i] % splitSize)) {
 | |
|             src.insert(src.begin()+i, expandValue);
 | |
|             dst.insert(dst.begin()+i, splitSize * dst[i]);
 | |
|             size[i] /= splitSize;
 | |
|             size.insert(size.begin()+i+1, splitSize);
 | |
|             return true;
 | |
|         }
 | |
|     }
 | |
|     return false;
 | |
| }
 | |
| // expand stride and size with expand value
 | |
| static inline bool expandStrideSize(int* src, int* dst, int* size, int& num, int expandValue) {
 | |
| #define MNN_3_INT_INSERT(x, i, y) if (i == 2) { x[2] = y; } else if (i == 1) { x[2] = x[1]; x[1] = y; } else if (i == 0) { x[2] = x[1]; x[1] = x[0]; x[0] = y; } else { return false; }
 | |
|     for (int i = num-1; i >= 0; i--) {
 | |
|         int splitSize = expandValue / src[i];
 | |
|         if (!(expandValue % src[i] || size[i] % splitSize)) {
 | |
|             MNN_3_INT_INSERT(src, i, expandValue)
 | |
|             MNN_3_INT_INSERT(dst, i, (splitSize * dst[i]))
 | |
|             size[i] /= splitSize;
 | |
|             MNN_3_INT_INSERT(size, (i+1), splitSize)
 | |
|             if (++num > 3) return false;
 | |
|             return true;
 | |
|         }
 | |
|     }
 | |
|     return false;
 | |
| #undef MNN_3_INT_INSERT
 | |
| }
 | |
| 
 | |
| bool TensorUtils::refTensorContent(Tensor* dst, const Tensor* src) {
 | |
|     auto des = TensorUtils::getDescribe(dst);
 | |
|     auto srcDes = TensorUtils::getDescribe(src);
 | |
|     auto desO = TensorUtils::getDescribeOrigin(dst);
 | |
|     auto srcDesO = TensorUtils::getDescribeOrigin(src);
 | |
|     bool needMalloc = dst->buffer().host != src->buffer().host || dst->buffer().device != src->buffer().device || des->extra.offset != srcDes->extra.offset;
 | |
|     desO->setBackend(srcDesO->getBackend());
 | |
|     dst->buffer().host = src->buffer().host;
 | |
|     dst->buffer().device = src->buffer().device;
 | |
|     dst->buffer().flags = src->buffer().flags;
 | |
|     des->extra.offset = srcDes->extra.offset;
 | |
|     des->group = -1;
 | |
|     return needMalloc;
 | |
| }
 | |
| 
 | |
| static bool _ClipDst(int* stride, const int srcOffsetO, const int dstOffsetO, const int* srcSize, const int* dstSize, const int sizeNum, int* dstMax, int* dstMin, bool checkFull = true) {
 | |
|     /* Compute The range of dx, dy, dz:
 | |
|      s0 * (dx-sx) + s1 * (dy-sy) + s2 * (dz-sz) + (doff-soff) = 0
 | |
|      Assume the region won't be overlapped, then extract doff -> s0*xd+ s1*yd+s2*zd, soff -> s0*xs+s1*ys+s2*zs
 | |
|      xd-xs=xo, yd-ys=yo, zd-zs=zo
 | |
|      then:
 | |
|      dx-sx+xo = 0
 | |
|      dy-sy+yo = 0
 | |
|      dz-sz+zo = 0
 | |
|      dx=sx-xo -> [max(0, -xo), max(0, min(sxr-xo, dxr))]
 | |
|      dy,dz compute the same
 | |
|      **/
 | |
| 
 | |
|     int srcOffset = srcOffsetO;
 | |
|     int dstOffset = dstOffsetO;
 | |
|     int offsetBias = dstOffset - srcOffset;
 | |
|     if (sizeNum == 0) {
 | |
|         // All stride is zero, then size will be all one
 | |
|         return offsetBias == 0;
 | |
|     }
 | |
|     int o[3] = {0, 0, 0};
 | |
|     int validIndex[3] = {0, 1, 2};
 | |
|     if (sizeNum == 2) {
 | |
|         if (stride[0] < stride[1]) {
 | |
|             validIndex[0] = 1;
 | |
|             validIndex[1] = 0;
 | |
|         }
 | |
|     } else if (sizeNum > 2) {
 | |
|         int maxs = stride[0];
 | |
|         int mins = stride[0];
 | |
|         int maxi = 0;
 | |
|         int mini = 0;
 | |
|         // Sort index by stride
 | |
|         for (int i=1; i<sizeNum; ++i) {
 | |
|             int s = stride[i];
 | |
|             if (s > maxs) {
 | |
|                 maxs = s;
 | |
|                 maxi = i;
 | |
|             }
 | |
|             if (s < mins) {
 | |
|                 mins = s;
 | |
|                 mini = i;
 | |
|             }
 | |
|         }
 | |
|         for (int i=0; i<sizeNum; ++i) {
 | |
|             if (i != maxi && i != mini) {
 | |
|                 validIndex[1] = i;
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|         validIndex[0] = maxi;
 | |
|         validIndex[2] = mini;
 | |
|     }
 | |
|     // Compute offset
 | |
|     for (int i=0; i<sizeNum; ++i) {
 | |
|         int s = stride[validIndex[i]];
 | |
|         int xs = srcOffset / s;
 | |
|         int xd = dstOffset / s;
 | |
|         o[validIndex[i]] = xd-xs;
 | |
|         srcOffset = srcOffset % s;
 | |
|         dstOffset = dstOffset % s;
 | |
|     }
 | |
|     if (0 != srcOffset || 0 != dstOffset) {
 | |
|         return false;
 | |
|     }
 | |
|     int srcMax = 0;
 | |
|     for (int i=0; i<sizeNum; ++i) {
 | |
|         srcMax += srcSize[i] * stride[i];
 | |
|         dstMin[i] = ALIMAX(0, -o[i]);
 | |
|         dstMax[i] = ALIMIN(srcSize[i]-o[i], dstSize[i]);
 | |
|     }
 | |
|     int srcMin = -1;
 | |
|     for (int i=0; i<sizeNum; ++i) {
 | |
|         if (dstMax[i] < srcSize[i]) {
 | |
|             if (srcMin == -1) {
 | |
|                 srcMin = stride[i];
 | |
|             } else {
 | |
|                 srcMin = ALIMIN(stride[i], srcMin);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     if (srcMin < 0 || (!checkFull)) {
 | |
|         // Src is fully used
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     // Check If dstMax is inside src, it means one region can't describe dst - src
 | |
|     // TODO: Support slice region to support fuse
 | |
|     for (int i=0; i<sizeNum; ++i) {
 | |
|         if (dstMax[i] == dstSize[i]) {
 | |
|             continue;
 | |
|         }
 | |
|         int bias = offsetBias + dstMax[i] * stride[i];
 | |
|         if (bias < srcMax && bias >= srcMin) {
 | |
|             // for [dstMax, dstSize], may exist value match formula
 | |
|             // Try Clip a new region
 | |
|             for (int k=0; k<sizeNum; ++k) {
 | |
|                 if (dstMax[k] < srcSize[k]) {
 | |
|                     int newDstSize[3];
 | |
|                     int newSrcSize[3];
 | |
|                     for (int j=0; j<sizeNum; ++j) {
 | |
|                         newDstSize[j] = dstSize[j];
 | |
|                         newSrcSize[j] = srcSize[j];
 | |
|                     }
 | |
|                     newSrcSize[k] = srcSize[k] - dstMax[k];
 | |
|                     newDstSize[i] = dstSize[i] - dstMax[i];
 | |
|                     int dstMaxNew[3];
 | |
|                     int dstMinNew[3];
 | |
|                     auto status = _ClipDst(stride, srcOffsetO + dstMax[k] * stride[k], dstOffsetO + dstMax[i] * stride[i], newSrcSize, newDstSize, sizeNum, dstMaxNew, dstMinNew, false);
 | |
|                     if (status) {
 | |
|                         bool valid = true;
 | |
|                         for (int j=0; j<sizeNum; ++j) {
 | |
|                             if (dstMaxNew[j] <= dstMinNew[j]) {
 | |
|                                 valid = false;
 | |
|                                 break;
 | |
|                             }
 | |
|                         }
 | |
|                         if (valid) {
 | |
|                             // Need new region
 | |
|                             return false;
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| static bool _RegionValid(int* stride, int offset, int* size, int sizeNum, size_t limitSize) {
 | |
|     int maxOffset = offset;
 | |
|     int minOffset = offset;
 | |
|     // Check start and end
 | |
|     for (int i=0; i<sizeNum; ++i) {
 | |
|         if (stride[i] > 0) {
 | |
|             maxOffset += (stride[i] * (size[i] - 1));
 | |
|         } else {
 | |
|             minOffset += (stride[i] * (size[i] - 1));
 | |
|         }
 | |
|     }
 | |
|     if (minOffset < 0 || maxOffset >= limitSize) {
 | |
|         return false;
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| class TensorUtils::FuseRegionStatus {
 | |
| public:
 | |
|     enum Status {
 | |
|         FUSE_SRC_COPY,
 | |
|         FUSE_REGION_COMPUTE
 | |
|     };
 | |
|     void apply(const Tensor::InsideDescribe::Region& srcReg, Tensor::InsideDescribe::Region& dstReg) {
 | |
|         switch (mStatus) {
 | |
|             case FUSE_SRC_COPY:
 | |
|                 dstReg.origin = srcReg.origin;
 | |
|                 dstReg.src.offset += srcReg.src.offset - srcReg.dst.offset;
 | |
|                 break;
 | |
|             case FUSE_REGION_COMPUTE:
 | |
|             {
 | |
|                 if (dstSize[0] == 0) {
 | |
|                     dstReg.size[0] = 0;
 | |
|                     dstReg.origin = nullptr;
 | |
|                     break;
 | |
|                 }
 | |
|                 for (int i=0; i<3; ++i) {
 | |
|                     dstReg.size[i] = 1;
 | |
|                     dstReg.src.stride[i] = 0;
 | |
|                     dstReg.dst.stride[i] = 0;
 | |
|                 }
 | |
|                 int valid[3] = {0, 0, 0};
 | |
|                 int offset = 3 - dstNum;
 | |
|                 if (dstNum > sizeNum) {
 | |
|                     for (int i = dstNum - 1; i >= 0; i--) {
 | |
|                         if (i < dstNum) {
 | |
|                             if (dstSize[i] == 1) {
 | |
|                                 expandIdx = i;
 | |
|                             }
 | |
|                             dstReg.size[i+offset] = dstMax[i] - dstMin[i];
 | |
|                             valid[i] = dstSize[i] > 1;
 | |
|                         } else {
 | |
|                             dstReg.size[i+offset] = 1;
 | |
|                             valid[i] = 0;
 | |
|                         }
 | |
|                     }
 | |
|                 } else {
 | |
|                     for (int i=0; i<dstNum; ++i) {
 | |
|                         dstReg.size[i+offset] = dstMax[i] - dstMin[i];
 | |
|                         valid[i] = dstSize[i] > 1 ? 1 : 0;
 | |
|                     }
 | |
|                 }
 | |
|                 int idx = 0;
 | |
|                 for (int i = 0; i < 3; i++) {
 | |
|                     if (valid[i] > 0 || i == expandIdx) {
 | |
|                         dstReg.src.stride[i+offset] = newSrc[idx];
 | |
|                         dstReg.dst.stride[i+offset] = dstDst[idx++];
 | |
|                     }
 | |
|                 }
 | |
|                 dstReg.origin = srcReg.origin;
 | |
|                 dstReg.src.offset = newSrcOffset;
 | |
|                 dstReg.dst.offset = newDstOffset;
 | |
|             }
 | |
|                 break;
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
|     bool match(const Tensor::InsideDescribe::Region& srcReg, const Tensor::InsideDescribe::Region& dstReg) {
 | |
|         // dont deal size > 1 && stride <= 0
 | |
|         for (int i = 0; i < 3; i++) {
 | |
|             if (srcReg.size[i] > 1 && (srcReg.src.stride[i] <= 0 || srcReg.dst.stride[i] <= 0)) {
 | |
|                 return false;
 | |
|             }
 | |
|             if (dstReg.size[i] > 1 && (dstReg.src.stride[i] <= 0 || dstReg.dst.stride[i] <= 0)) {
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
|         bool copyValid = true;
 | |
|         // src data isnot full data of dst
 | |
|         if (srcReg.dst.offset > dstReg.src.offset ||
 | |
|             srcReg.dst.stride[1] > srcReg.size[2] ||
 | |
|             srcReg.dst.stride[2] > srcReg.size[1] * srcReg.size[2]) {
 | |
|             copyValid = false;
 | |
|         }
 | |
|         int dstTotalSize = 1, srcTotalSize = 1;
 | |
|         int dstSrcMin = dstReg.src.offset;
 | |
|         int dstSrcMax = dstSrcMin;
 | |
|         int srcDstMin = srcReg.dst.offset;
 | |
|         int srcDstMax = srcDstMin;
 | |
|         for (int i = 0; i < 3; i++) {
 | |
|             srcDstMax += srcReg.dst.stride[i] * (srcReg.size[i] - 1);
 | |
|             dstSrcMax += dstReg.src.stride[i] * (dstReg.size[i] - 1);
 | |
|             if (dstReg.size[i] > 1) {
 | |
|                 dstTotalSize *= dstReg.size[i];
 | |
|             }
 | |
|             if (srcReg.size[i] > 1) {
 | |
|                 srcTotalSize *= srcReg.size[i];
 | |
|             }
 | |
|         }
 | |
|         // src data is not full data of dst
 | |
|         if (dstTotalSize > srcTotalSize) {
 | |
|             copyValid = false;
 | |
|         }
 | |
|         // Valid range is from srcReg: srcDstMin - srcDstMax, if dst's srcReg exceed, not valid for copy
 | |
|         if (srcDstMin > dstSrcMin || srcDstMax < dstSrcMax) {
 | |
|             copyValid = false;
 | |
|         }
 | |
|         // src copy fuse
 | |
|         if (isCopyRegion(srcReg) && copyValid) {
 | |
|             mStatus = FUSE_SRC_COPY;
 | |
|             return true;
 | |
|         }
 | |
|     #define MNN_3_INT_INIT(x, y) { x[0] = y; x[1] = y; x[2] = y; }
 | |
|         MNN_3_INT_INIT(dstStride, -1)
 | |
|         MNN_3_INT_INIT(srcStride, -1)
 | |
|         expandIdx = -1;
 | |
|     #undef MNN_3_INT_INIT
 | |
|         srcNum = 0, dstNum = 0, sizeNum = 0;
 | |
|         for (int i = 0; i < 3; i++) {
 | |
|             if (srcReg.size[i] > 1) {
 | |
|                 srcStride[srcNum] = srcReg.dst.stride[i];
 | |
|                 srcDst[srcNum]    = srcReg.dst.stride[i];
 | |
|                 srcSrc[srcNum]    = srcReg.src.stride[i];
 | |
|                 srcSize[srcNum]   = srcReg.size[i];
 | |
|                 srcNum++;
 | |
|             }
 | |
|             if (dstReg.size[i] > 1) {
 | |
|                 dstStride[dstNum] = dstReg.src.stride[i];
 | |
|                 dstDst[dstNum]    = dstReg.dst.stride[i];
 | |
|                 dstSrc[dstNum]    = dstReg.src.stride[i];
 | |
|                 dstSize[dstNum]   = dstReg.size[i];
 | |
|                 dstNum++;
 | |
|             }
 | |
|         }
 | |
|         sizeNum = dstNum;
 | |
|     #define MNN_3_INT_DIFF(r, x, y, i) if ((x[i] != y[0]) && (x[i] != y[1]) && (x[i] != y[2])) { if (r > 0) { return false; } else { r = x[i]; } }
 | |
|         int srcExtra = -1, dstExtra = -1;
 | |
|         MNN_3_INT_DIFF(srcExtra, srcStride, dstStride, 0)
 | |
|         MNN_3_INT_DIFF(srcExtra, srcStride, dstStride, 1)
 | |
|         MNN_3_INT_DIFF(srcExtra, srcStride, dstStride, 2)
 | |
|         MNN_3_INT_DIFF(dstExtra, dstStride, srcStride, 0)
 | |
|         MNN_3_INT_DIFF(dstExtra, dstStride, srcStride, 1)
 | |
|         MNN_3_INT_DIFF(dstExtra, dstStride, srcStride, 2)
 | |
|     #undef MNN_3_INT_DIFF
 | |
|         if (dstExtra > 0) {
 | |
|             if (!expandStrideSize(srcDst, srcSrc, srcSize, srcNum, dstExtra)) {
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
|         if (srcExtra > 0) {
 | |
|             if (!expandStrideSize(dstSrc, dstDst, dstSize, dstNum, srcExtra)) {
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
|         // reorder srcSrc to newSrc by align srcDst and dstSrc
 | |
|         for (int i = 0; i < srcNum; i++) {
 | |
|             int index = -1;
 | |
|             for (int j = 0; j < dstNum; j++) {
 | |
|                 if (dstSrc[j] == srcDst[i]) {
 | |
|                     index = j;
 | |
|                     break;
 | |
|                 }
 | |
|             }
 | |
|             if (-1 == index) {
 | |
|                 return false;
 | |
|             }
 | |
|             newSrc[index] = srcSrc[i];
 | |
|             newSrcSize[index] = srcSize[i];
 | |
|         }
 | |
|         // set final size and set expandIdx if expand val is 1
 | |
|         newSrcOffset = offsetCompute(srcReg, dstReg.src.offset, srcReg.dst.offset, true) + srcReg.src.offset;
 | |
|         bool valid = _ClipDst(dstSrc, srcReg.dst.offset, dstReg.src.offset, newSrcSize, dstSize, dstNum, dstMax, dstMin);
 | |
|         if (!valid) {
 | |
|             return false;
 | |
|         }
 | |
|         newDstOffset = dstReg.dst.offset;
 | |
|         for (int i=0; i<dstNum; ++i) {
 | |
|             if (dstMax[i] <= dstMin[i]) {
 | |
|                 // Set region as empty
 | |
|                 dstSize[0] = 0;
 | |
|                 dstSize[1] = 0;
 | |
|                 dstSize[2] = 0;
 | |
|                 break;
 | |
|             }
 | |
|             if (dstMin[i] > 0) {
 | |
|                 newDstOffset += dstMin[i] * dstDst[i];
 | |
|                 newSrcOffset += dstMin[i] * newSrc[i];
 | |
|             }
 | |
|         }
 | |
|         mStatus = FUSE_REGION_COMPUTE;
 | |
|         return true;
 | |
|     }
 | |
| private:
 | |
|     int mStatus;
 | |
|     int mSrcOff;
 | |
|     int mDstOff;
 | |
|     // general fuse
 | |
|     int srcDst[3], srcSrc[3], dstSrc[3], dstDst[3], srcSize[3], dstSize[3], newSrc[3], dstStride[3], srcStride[3];
 | |
|     int dstMin[3],dstMax[3];
 | |
|     int newSrcSize[3];
 | |
|     int srcNum, dstNum, sizeNum;
 | |
|     int newSrcOffset;
 | |
|     int newDstOffset;
 | |
|     int expandIdx;
 | |
| };
 | |
| 
 | |
| TensorUtils::FuseWrap::FuseWrap() {
 | |
|     mStatus = new FuseRegionStatus;
 | |
| }
 | |
| TensorUtils::FuseWrap::~ FuseWrap() {
 | |
|     delete mStatus;
 | |
| }
 | |
| bool TensorUtils::FuseWrap::match(const Tensor::InsideDescribe::Region& srcReg, const Tensor::InsideDescribe::Region& dstReg) {
 | |
|     return mStatus->match(srcReg, dstReg);
 | |
| }
 | |
| #ifdef MNN_DEBUG_BLIT
 | |
| static std::pair<size_t, size_t> _computeMinSrcDstSize(const Tensor::InsideDescribe::Region& reg) {
 | |
|     size_t srcSize = 1 + reg.src.offset;
 | |
|     size_t dstSize = 1 + reg.dst.offset;
 | |
|     for (int i=0; i<3; ++i) {
 | |
|         if (reg.src.stride[i] > 0) {
 | |
|             srcSize += reg.src.stride[i] * reg.size[i];
 | |
|         }
 | |
|         if (reg.dst.stride[i] > 0) {
 | |
|             dstSize += reg.dst.stride[i] * reg.size[i];
 | |
|         }
 | |
|     }
 | |
|     return std::make_pair(srcSize, dstSize);
 | |
| }
 | |
| static void _computeRaw(std::vector<int>& dst, const std::vector<int>& src, const Tensor::InsideDescribe::Region& reg) {
 | |
|     int dstOffset = reg.dst.offset;
 | |
|     int srcOffset = reg.src.offset;
 | |
|     ::memset(dst.data(), 0, dst.size() * sizeof(int));
 | |
|     for (int z=0; z<reg.size[0]; ++z) {
 | |
|         int srcZ = srcOffset + z * reg.src.stride[0];
 | |
|         int dstZ = dstOffset + z * reg.dst.stride[0];
 | |
|         for (int y=0; y<reg.size[1]; ++y) {
 | |
|             int srcY = srcZ + y * reg.src.stride[1];
 | |
|             int dstY = dstZ + y * reg.dst.stride[1];
 | |
|             for (int x=0; x<reg.size[2]; ++x) {
 | |
|                 int srcX = srcY + x * reg.src.stride[2];
 | |
|                 int dstX = dstY + x * reg.dst.stride[2];
 | |
|                 dst[dstX] = src[srcX];
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| static std::string _printRegion(const Tensor::InsideDescribe::Region& reg) {
 | |
|     char info[2048];
 | |
|     sprintf(info, "size: %d, %d, %d; src: %d, %d, %d, %d; dst: %d, %d, %d, %d", reg.size[0], reg.size[1], reg.size[2], reg.src.offset, reg.src.stride[0], reg.src.stride[1], reg.src.stride[2], reg.dst.offset, reg.dst.stride[0], reg.dst.stride[1], reg.dst.stride[2]);
 | |
|     info[2047] = 0;
 | |
|     return std::string(info);
 | |
| }
 | |
| #endif
 | |
| 
 | |
| void TensorUtils::FuseWrap::apply(const Tensor::InsideDescribe::Region& srcReg, Tensor::InsideDescribe::Region& dstReg) {
 | |
| #ifdef MNN_DEBUG_BLIT
 | |
|     auto srcSize = _computeMinSrcDstSize(srcReg);
 | |
|     auto dstSize = _computeMinSrcDstSize(dstReg);
 | |
|     auto midSize = ALIMAX(srcSize.second, dstSize.first);
 | |
|     std::vector<int> srcData(srcSize.first);
 | |
|     std::vector<int> midData(midSize);
 | |
|     std::vector<int> dstData(dstSize.second);
 | |
|     std::vector<int> dstDataFuse(dstSize.second);
 | |
|     for (int i=0; i<srcSize.first; ++i) {
 | |
|         srcData[i] = i;
 | |
|     }
 | |
|     _computeRaw(midData, srcData, srcReg);
 | |
|     _computeRaw(dstData, midData, dstReg);
 | |
|     {
 | |
|         auto src = _printRegion(srcReg);
 | |
|         auto dst = _printRegion(dstReg);
 | |
|         MNN_PRINT("Fuse:\n %s \n %s\n To: \n", src.c_str(), dst.c_str());
 | |
|     }
 | |
| #endif
 | |
|     mStatus->apply(srcReg, dstReg);
 | |
| #ifdef MNN_DEBUG_BLIT
 | |
|     {
 | |
|         auto dst = _printRegion(dstReg);
 | |
|         MNN_PRINT("%s\n", dst.c_str());
 | |
|         _computeRaw(dstDataFuse, srcData, dstReg);
 | |
|         if (dstDataFuse != dstData) {
 | |
|             FUNC_PRINT(1);
 | |
|             MNN_ASSERT(false);
 | |
|         }
 | |
|     }
 | |
| #endif
 | |
| }
 | |
| 
 | |
| void TensorUtils::adjustTensorForCompability(Tensor* newTensor) {
 | |
|     if (newTensor->dimensions() < 4) {
 | |
|         for (int n = newTensor->dimensions(); n < 4; ++n) {
 | |
|             newTensor->setLength(n, 1);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| Tensor::DimensionType TensorUtils::getDimType(const Tensor* t) {
 | |
|     auto format = TensorUtils::getDescribe(t)->dimensionFormat;
 | |
|     switch (format) {
 | |
|         case MNN_DATA_FORMAT_NCHW:
 | |
|             return Tensor::CAFFE;
 | |
|         case MNN_DATA_FORMAT_NC4HW4:
 | |
|             return Tensor::CAFFE_C4;
 | |
|         case MNN_DATA_FORMAT_NHWC:
 | |
|             return Tensor::TENSORFLOW;
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
|     return Tensor::TENSORFLOW;
 | |
| }
 | |
| 
 | |
| std::vector<float> TensorUtils::getQuantInfo(const Tensor* t) {
 | |
|     float scale = getDescribe(t)->quantAttr ? getDescribe(t)->quantAttr->scale : 0.0f;
 | |
|     float zero = getDescribe(t)->quantAttr ? getDescribe(t)->quantAttr->zero : 0.0f;
 | |
|     float min = getDescribe(t)->quantAttr ? getDescribe(t)->quantAttr->min : -127.0f;
 | |
|     float max = getDescribe(t)->quantAttr ? getDescribe(t)->quantAttr->max : 127.0f;
 | |
|     return {scale, zero, min, max};
 | |
| }
 | |
| 
 | |
| Tensor::InsideDescribe* TensorUtils::getDescribeOrigin(const Tensor* tensor) {
 | |
|     return tensor->mDescribe;
 | |
| }
 | |
| size_t TensorUtils::getRawSize(const Tensor* t) {
 | |
|     size_t len = 1;
 | |
|     int dim = t->dimensions();
 | |
|     for (int i=0; i<dim; ++i) {
 | |
|         len *= (size_t)t->length(i);
 | |
|     }
 | |
|     return len;
 | |
| }
 | |
| void TensorUtils::setRasterInputs(Command* cmd) {
 | |
|     auto& regions = TensorUtils::getDescribe(cmd->outputs[0])->regions;
 | |
|     cmd->inputs.resize(regions.size());
 | |
|     for (int i=0; i<regions.size(); ++i) {
 | |
| #ifdef DEBUG
 | |
|         for (int j=0; j<3; ++j) {
 | |
|             MNN_ASSERT(regions[i].size[j] > 0);
 | |
|         }
 | |
| #endif
 | |
|         cmd->inputs[i] = regions[i].origin;
 | |
|         auto des = getDescribe(regions[i].origin);
 | |
|     }
 | |
| }
 | |
| 
 | |
| int TensorUtils::getTensorChannelPack(const Tensor* tensor) {
 | |
|     auto srcDes = TensorUtils::getDescribe(tensor);
 | |
|     return srcDes->support_pack16 ? srcDes->channel_pack_num : 4;
 | |
| }
 | |
| 
 | |
| void TensorUtils::setTensorChannelPack(const Tensor* tensor, int pack) {
 | |
|     auto srcDes = TensorUtils::getDescribe(tensor);
 | |
|     srcDes->channel_pack_num = srcDes->support_pack16 ? pack : 4;
 | |
| }
 | |
| 
 | |
| void TensorUtils::setTensorSupportPack(const Tensor* tensor, bool flag) {
 | |
|     auto srcDes = TensorUtils::getDescribe(tensor);
 | |
|     srcDes->support_pack16 = flag;
 | |
| }
 | |
| 
 | |
| void TensorUtils::setTensorPad(const Tensor* tensor, int left, int right, int bottom, int top) {
 | |
|     auto srcDes = TensorUtils::getDescribe(tensor);
 | |
|     srcDes->mPads.left = std::max(srcDes->mPads.left,left);
 | |
|     srcDes->mPads.right = std::max(srcDes->mPads.right, right);
 | |
|     srcDes->mPads.bottom = std::max(srcDes->mPads.bottom, bottom);
 | |
|     srcDes->mPads.top = std::max(srcDes->mPads.top, top);
 | |
| }
 | |
| 
 | |
| void TensorUtils::setSharedMem(const Tensor *tensor, Backend::MemObj *mem){
 | |
|     auto srcDes = TensorUtils::getDescribe(tensor);
 | |
|     srcDes->mSharedMem = mem;
 | |
| }
 | |
| 
 | |
| Backend::MemObj* TensorUtils::getSharedMem(const Tensor* tensor){
 | |
|     auto srcDes = TensorUtils::getDescribe(tensor);
 | |
|     return srcDes->mSharedMem.get();
 | |
| }
 | |
| 
 | |
| } // namespace MNN
 |