mirror of https://github.com/alibaba/MNN.git
535 lines
22 KiB
C++
535 lines
22 KiB
C++
//
|
|
// GeometryStridedSlice.cpp
|
|
// MNN
|
|
//
|
|
// Created by MNN on 2020/04/17.
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
//
|
|
|
|
#include "geometry/GeometryComputer.hpp"
|
|
#include "core/OpCommonUtils.hpp"
|
|
#include "core/Macro.h"
|
|
#include "ConvertUtils.hpp"
|
|
namespace MNN {
|
|
struct Block {
|
|
int start; // inclusive
|
|
int end; // exclusive
|
|
|
|
bool operator<(const Block& other) const {
|
|
return start < other.start;
|
|
}
|
|
};
|
|
|
|
class GeometryStridedSlice : public GeometryComputer {
|
|
public:
|
|
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
|
Context& context, CommandBuffer& res) const override {
|
|
Tensor* input = inputs[0];
|
|
// input haven't realized
|
|
auto output = outputs[0];
|
|
auto outputDes = TensorUtils::getDescribe(output);
|
|
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
|
outputDes->regions.clear();
|
|
const int inputDim = input->buffer().dimensions;
|
|
auto parameter = op->main_as_StridedSliceParam();
|
|
int32_t beginMask = parameter->beginMask();
|
|
int32_t endMask = parameter->endMask();
|
|
int32_t shrinkAxisMask = parameter->shrinkAxisMask();
|
|
int32_t ellipsisMask = parameter->ellipsisMask();
|
|
int32_t newAxisMask = parameter->newAxisMask();
|
|
int32_t fromType = parameter->fromType();
|
|
if (ellipsisMask && (ellipsisMask & (ellipsisMask - 1))) {
|
|
MNN_ERROR("only one non-zero bit is allowed in ellipsisMask\n");
|
|
return false;
|
|
}
|
|
|
|
MNN_ASSERT(inputs.size() >= 3 && inputs.size() <= 5);
|
|
Tensor *begin = inputs[1];
|
|
Tensor *end = inputs[2];
|
|
|
|
int32_t strideSize = begin->length(0);
|
|
MNN_ASSERT(begin->buffer().dimensions == end->buffer().dimensions);
|
|
|
|
int32_t inputShape[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t begins[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t ends[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t strides[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t axes[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t beginMasks[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t endMasks[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t shrinkAxisMasks[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t newAxisMasks[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t inputStride[MNN_MAX_TENSOR_DIM];
|
|
|
|
{
|
|
int stride = 1;
|
|
for (int i = input->buffer().dimensions - 1; i >= 0; --i) {
|
|
inputShape[i] = input->buffer().dim[i].extent;
|
|
inputStride[i] = stride;
|
|
stride *= inputShape[i];
|
|
if (inputShape[i] == 0) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < inputDim; i++) {
|
|
inputShape[i] = input->length(i);
|
|
}
|
|
for (int i = 0; i < strideSize; i++) {
|
|
beginMasks[i] = beginMask & (1 << i);
|
|
}
|
|
for (int i = 0; i < strideSize; i++) {
|
|
endMasks[i] = endMask & (1 << i);
|
|
}
|
|
for (int i = 0; i < strideSize; i++) {
|
|
shrinkAxisMasks[i] = shrinkAxisMask & (1 << i);
|
|
}
|
|
for (int i = 0; i < strideSize; i++) {
|
|
newAxisMasks[i] = newAxisMask & (1 << i);
|
|
}
|
|
|
|
// broadcast begin end stride axis param
|
|
if (fromType == 1) {
|
|
|
|
Tensor *axis = nullptr;
|
|
if(inputs.size() >= 4) {
|
|
axis = inputs[3];
|
|
}
|
|
|
|
Tensor *step = nullptr;
|
|
if(inputs.size() == 5) {
|
|
step = inputs[4];
|
|
}
|
|
|
|
for(int i = 0; i < inputDim; i++) {
|
|
begins[i] = 0;
|
|
ends[i] = inputShape[i];
|
|
strides[i] = 1;
|
|
}
|
|
|
|
for (int i = 0; i < strideSize; i++) {
|
|
auto temp_axis = i;
|
|
if(axis != nullptr) {
|
|
temp_axis = axis->host<int>()[i];
|
|
temp_axis = temp_axis < 0 ? (temp_axis + inputDim) : temp_axis;
|
|
MNN_ASSERT(temp_axis < MNN_MAX_TENSOR_DIM);
|
|
}
|
|
if(step != nullptr) {
|
|
strides[temp_axis] = step->host<int>()[i];
|
|
}
|
|
|
|
auto shape = inputShape[temp_axis];
|
|
auto temp_value = begin->host<int>()[i];
|
|
temp_value = temp_value < 0 ? (temp_value + shape) : temp_value;
|
|
begins[temp_axis] = temp_value;
|
|
|
|
temp_value = end->host<int>()[i];
|
|
temp_value = temp_value < 0 ? (temp_value + shape) : temp_value;
|
|
ends[temp_axis] = temp_value;
|
|
}
|
|
strideSize = inputDim;
|
|
|
|
} else if(fromType == 0) {
|
|
|
|
Tensor *strided = nullptr;
|
|
if(inputs.size() >= 4) {
|
|
strided = inputs[3];
|
|
MNN_ASSERT(begin->buffer().dimensions == strided->buffer().dimensions);
|
|
}
|
|
// deal ellipsis, expand strides info
|
|
if (ellipsisMask > 0) {
|
|
int32_t beginMasksTmp[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t endMasksTmp[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t shrinkAxisMasksTmp[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t newAxisMasksTmp[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
// expand stride info
|
|
int ellipsisPos = -1;
|
|
for (int i = 0; i < strideSize; i++) {
|
|
int temp = ellipsisMask & (1 << i);
|
|
if (temp != 0) {
|
|
ellipsisPos = i;
|
|
break;
|
|
}
|
|
}
|
|
MNN_ASSERT(ellipsisPos >= 0 && ellipsisPos < strideSize);
|
|
/*
|
|
Example: foo's dim is [2, 3, 4, 5, 6, 7], foo[0:2, :, 3:5, 3:6]:
|
|
1. strideSize = 4, inputDim = 6, ellipsis = 2(0010)
|
|
2. left part: 0:2, right part: 3:5, 3:6
|
|
3. expand: foo[0:2, 0:3, 0:4, 3:5, 3:6]
|
|
*/
|
|
int ellpsisSize = inputDim - strideSize, strideIdx = 0;
|
|
for (int i = 0; i < inputDim; i++) {
|
|
if (i == ellipsisPos) {
|
|
strideIdx++;
|
|
}
|
|
if (i >= ellipsisPos && i <= ellipsisPos + ellpsisSize) {
|
|
begins[i] = 0;
|
|
ends[i] = inputShape[i];
|
|
strides[i] = 1;
|
|
beginMasksTmp[i] = 0;
|
|
endMasksTmp[i] = 0;
|
|
shrinkAxisMasksTmp[i] = 0;
|
|
} else {
|
|
begins[i] = begin->host<int32_t>()[strideIdx];
|
|
ends[i] = end->host<int32_t>()[strideIdx];
|
|
if(strided != nullptr) {
|
|
strides[i] = strided->host<int32_t>()[strideIdx];
|
|
}
|
|
beginMasksTmp[i] = beginMasks[strideIdx];
|
|
endMasksTmp[i] = endMasks[strideIdx];
|
|
shrinkAxisMasksTmp[i] = shrinkAxisMasks[strideIdx];
|
|
newAxisMasksTmp[i] = newAxisMasks[strideIdx++];
|
|
}
|
|
}
|
|
for (int i = 0; i < inputDim; i++) {
|
|
beginMasks[i] = beginMasksTmp[i];
|
|
endMasks[i] = endMasksTmp[i];
|
|
shrinkAxisMasks[i] = shrinkAxisMasksTmp[i];
|
|
newAxisMasks[i] = newAxisMasksTmp[i];
|
|
}
|
|
strideSize = inputDim;
|
|
} else {
|
|
for (int i = 0; i < strideSize; i++) {
|
|
begins[i] = begin->host<int>()[i];
|
|
ends[i] = end->host<int>()[i];
|
|
strides[i] = strided->host<int>()[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
int32_t beginShape[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t endShape[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t stridedShape[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t outputShape[MNN_MAX_TENSOR_DIM] = { 0 };
|
|
int32_t reverseDim = -1;
|
|
int32_t shapeNum = 0;
|
|
|
|
auto beginAndEndShapeLimit = [](int shape, int dimSize, bool exclusive) -> int {
|
|
int maxShape = dimSize - 1, minShape = -dimSize;
|
|
if (exclusive) {
|
|
++maxShape;
|
|
--minShape;
|
|
}
|
|
shape = (shape > maxShape ? maxShape : shape);
|
|
shape = (shape < minShape ? minShape : shape);
|
|
if (shape < 0) {
|
|
shape += dimSize;
|
|
}
|
|
return shape;
|
|
};
|
|
|
|
for (int i = 0; i < strideSize; i++) {
|
|
if (newAxisMasks[i] > 0) {
|
|
// ignore newAxis beacuse it is 1
|
|
continue;
|
|
}
|
|
stridedShape[shapeNum] = (shrinkAxisMasks[i] > 0 ? 1 : strides[i]);
|
|
if (stridedShape[shapeNum] < 0) {
|
|
reverseDim = i;
|
|
}
|
|
if (beginMasks[i] > 0) {
|
|
beginShape[shapeNum] = stridedShape[shapeNum] < 0 ? inputShape[shapeNum] - 1 : 0;
|
|
} else {
|
|
beginShape[shapeNum] = stridedShape[shapeNum] < 0 ? beginAndEndShapeLimit(begins[i], inputShape[shapeNum], false) :
|
|
std::min(inputShape[shapeNum], begins[i]);
|
|
}
|
|
if (beginShape[shapeNum] < 0) {
|
|
auto temp = -beginShape[shapeNum];
|
|
beginShape[shapeNum] = UP_DIV(temp, input->buffer().dim[i].extent) * input->buffer().dim[i].extent + beginShape[shapeNum];
|
|
}
|
|
if (endMasks[i] > 0) {
|
|
endShape[shapeNum] = stridedShape[shapeNum] < 0 ? -1 : inputShape[shapeNum];
|
|
} else {
|
|
endShape[shapeNum] = stridedShape[shapeNum] < 0 ? std::max(-1, std::min(inputDim, ends[i])) :
|
|
beginAndEndShapeLimit(ends[i], inputShape[shapeNum], true);
|
|
}
|
|
|
|
if (shrinkAxisMasks[i] == 0) {
|
|
if (stridedShape[shapeNum] > 0) {
|
|
int size = (endShape[shapeNum] - beginShape[shapeNum] - 1) / stridedShape[shapeNum] + 1;
|
|
outputShape[shapeNum] = size;
|
|
} else {
|
|
int size = (endShape[shapeNum] - beginShape[shapeNum] + 1) / stridedShape[shapeNum] + 1;
|
|
outputShape[shapeNum] = size;
|
|
}
|
|
} else {
|
|
outputShape[shapeNum] = 1;
|
|
}
|
|
shapeNum++;
|
|
}
|
|
int dealDims = shapeNum;
|
|
int dimensionRemained = input->dimensions() - dealDims;
|
|
for (int i = 0; i < dimensionRemained; i++) {
|
|
outputShape[shapeNum] = input->length(dealDims + i);
|
|
stridedShape[shapeNum] = 1;
|
|
beginShape[shapeNum] = 0;
|
|
shapeNum++;
|
|
}
|
|
int remainSize = 1;
|
|
int remainDims[MNN_MAX_TENSOR_DIM];
|
|
int remainDimSize = shapeNum - 3;
|
|
for (int i = 0; i < (int)shapeNum - 3; ++i) {
|
|
remainSize *= outputShape[i];
|
|
remainDims[i] = outputShape[i];
|
|
}
|
|
outputDes->regions.resize(remainSize);
|
|
int regionSize = shapeNum < 3 ? shapeNum : 3;
|
|
if (reverseDim >= 0) {
|
|
remainDimSize = reverseDim;
|
|
for (int i = 0; i < reverseDim; ++i) {
|
|
remainSize *= outputShape[i];
|
|
remainDims[i] = outputShape[i];
|
|
}
|
|
outputDes->regions.resize(remainSize);
|
|
regionSize = shapeNum - reverseDim;
|
|
MNN_ASSERT(regionSize <= 3);
|
|
}
|
|
int mod[MNN_MAX_TENSOR_DIM];
|
|
OpCommonUtils::computeStride(mod, remainDims, (int)remainDimSize);
|
|
int outputStrideTotal = 1;
|
|
int basicInputOffset = 0;
|
|
for (int i = 0; i < shapeNum - regionSize; ++i) {
|
|
basicInputOffset += inputStride[i] * beginShape[i];
|
|
}
|
|
for (int i = 0; i < regionSize; ++i) {
|
|
int pos = shapeNum - i - 1;
|
|
auto len = outputShape[pos];
|
|
basicInputOffset += inputStride[pos] * beginShape[pos];
|
|
outputStrideTotal *= len;
|
|
}
|
|
int coordinates[MNN_MAX_TENSOR_DIM];
|
|
for (int r = 0; r < remainSize; ++r) {
|
|
OpCommonUtils::unravelIndexHelper(coordinates, mod, remainDimSize, r);
|
|
int inputOffset = basicInputOffset;
|
|
for (int i = 0; i < remainDimSize; ++i) {
|
|
inputOffset += coordinates[i] * inputStride[i] * stridedShape[i];
|
|
}
|
|
|
|
auto& reg = outputDes->regions[r];
|
|
reg.dst.offset = r * outputStrideTotal;
|
|
reg.src.offset = inputOffset;
|
|
reg.origin = input;
|
|
for (int i = 0; i < regionSize; ++i) {
|
|
int pos = shapeNum - i - 1;
|
|
reg.size[3 - i - 1] = outputShape[pos];
|
|
reg.src.stride[3 - i - 1] = inputStride[pos] * stridedShape[pos];
|
|
}
|
|
reg.dst.stride[0] = reg.size[1] * reg.size[2];
|
|
reg.dst.stride[1] = reg.size[2];
|
|
reg.dst.stride[2] = 1;
|
|
}
|
|
|
|
if (fromType == 0 && inputs.size() == 5) { // stride slice write
|
|
auto write = inputs[4]; // Data that is not the same in the input and output.
|
|
std::vector<int> shape(outputShape, outputShape + shapeNum);
|
|
if (write->shape() != shape) {
|
|
std::shared_ptr<Tensor> newTensor(new Tensor);
|
|
newTensor->buffer().type = write->buffer().type;
|
|
newTensor->buffer().dimensions = shapeNum;
|
|
for (int i = 0; i < shapeNum; i++) {
|
|
newTensor->setLength(i, outputShape[i]);
|
|
}
|
|
ConvertUtils::broadcastto(write, newTensor.get());
|
|
write = newTensor.get();
|
|
res.extras.emplace_back(newTensor);
|
|
}
|
|
|
|
// Add regions to replicate data that is the same between output and input.
|
|
// We should copy 'A','B','C','D' from 'input', the shaded area is decided by 'write'
|
|
/*
|
|
+--------------+
|
|
| |
|
|
| A |
|
|
| |
|
|
+--------------+
|
|
| //////////// |
|
|
| //////////// |
|
|
| //////////// |
|
|
+--------------+
|
|
| |
|
|
| B |
|
|
| |
|
|
+--------------+
|
|
| |
|
|
| C |
|
|
| |
|
|
+--------------+
|
|
| //////////// |
|
|
| //////////// |
|
|
| //////////// |
|
|
+--------------+
|
|
| |
|
|
| D |
|
|
| |
|
|
+--------------+
|
|
*/
|
|
|
|
std::vector< std::vector<int>> shadedInfos;
|
|
for (auto& regionShaded: outputDes->regions) {
|
|
// Swap the contents of src and dst in each region.
|
|
auto tmp = regionShaded.dst;
|
|
regionShaded.dst = regionShaded.src;
|
|
regionShaded.src = tmp;
|
|
regionShaded.origin = write;
|
|
|
|
int outterSize = regionShaded.size[0] * regionShaded.size[1];
|
|
int baseOffset = regionShaded.dst.offset;
|
|
int stride0 = regionShaded.dst.stride[0], size0 = regionShaded.size[0];
|
|
int stride1 = regionShaded.dst.stride[1], size1 = regionShaded.size[1];
|
|
int stride2 = regionShaded.dst.stride[2], size2 = regionShaded.size[2];
|
|
std::vector<int> tmpInfo = {outterSize, baseOffset, stride0, stride1, stride2, size0, size1, size2};
|
|
shadedInfos.emplace_back(tmpInfo);
|
|
}
|
|
|
|
int currentShadedInfoStart = 0;
|
|
int unitSize = input->length(inputDim - 1) * input->length(inputDim - 2) * input->length(inputDim - 3);
|
|
// shadedInfo size = output->elementSize() / unitSize
|
|
for (auto shadedInfo: shadedInfos) {
|
|
std::vector<Block> occupiedBlocks; // contains all shadow regions' start and end index.
|
|
occupiedBlocks.reserve(shadedInfo[0]);
|
|
int baseOffset = shadedInfo[1];
|
|
int stride0 = shadedInfo[2], size0 = shadedInfo[5];
|
|
int stride1 = shadedInfo[3], size1 = shadedInfo[6];
|
|
int stride2 = shadedInfo[4], size2 = shadedInfo[7];
|
|
int insideSize = output->length(inputDim - 1);
|
|
|
|
/*
|
|
occupiedBlock.start=x0, .end=x1
|
|
when stride2>1, the innermost axis (size2) is not continuous
|
|
x0 x1
|
|
+-------+-------+-------+-------+-------+-------+-------+-------+
|
|
|///////| |///////| |///////| |///////| |
|
|
+-------+-------+-------+-------+-------+-------+-------+-------+
|
|
*/
|
|
|
|
for (int i = 0; i < size0; ++i) {
|
|
for (int j = 0; j < size1; ++j) {
|
|
int blockStart = baseOffset + i * stride0 + j * stride1;
|
|
occupiedBlocks.push_back({blockStart - (blockStart % insideSize), blockStart - (blockStart % insideSize) + insideSize });
|
|
}
|
|
}
|
|
std::sort(occupiedBlocks.begin(), occupiedBlocks.end());
|
|
|
|
int currentFillPos = currentShadedInfoStart;
|
|
// 1. build region for A,B,C
|
|
for (const auto& block : occupiedBlocks) {
|
|
int gapStart = currentFillPos;
|
|
int gapEnd = block.start;
|
|
|
|
if (gapStart < gapEnd) {
|
|
// copy A, B, C
|
|
Tensor::InsideDescribe::Region fillRegion;
|
|
fillRegion.origin = input;
|
|
fillRegion.size[2] = gapEnd - gapStart;
|
|
fillRegion.src.offset = gapStart;
|
|
fillRegion.dst.offset = gapStart;
|
|
|
|
outputDes->regions.push_back(fillRegion);
|
|
}
|
|
|
|
currentFillPos = std::max(currentFillPos, block.end);
|
|
}
|
|
|
|
// last copied block D
|
|
const int totalSize = currentShadedInfoStart + unitSize;
|
|
int lastGapStart = currentFillPos;
|
|
if (lastGapStart < totalSize) {
|
|
Tensor::InsideDescribe::Region lastFillRegion;
|
|
lastFillRegion.origin = input;
|
|
|
|
lastFillRegion.size[2] = totalSize - lastGapStart;
|
|
lastFillRegion.src.offset = lastGapStart;
|
|
lastFillRegion.dst.offset = lastGapStart;
|
|
|
|
outputDes->regions.push_back(lastFillRegion);
|
|
}
|
|
|
|
// copied block between the innermost size2
|
|
const int headGapSize = baseOffset % insideSize;
|
|
|
|
if (headGapSize > 0) {
|
|
Tensor::InsideDescribe::Region headGapRegion;
|
|
headGapRegion.origin = input;
|
|
|
|
headGapRegion.size[0] = size0;
|
|
headGapRegion.size[1] = size1;
|
|
headGapRegion.size[2] = headGapSize;
|
|
|
|
headGapRegion.dst.stride[0] = stride0;
|
|
headGapRegion.dst.stride[1] = stride1;
|
|
headGapRegion.dst.stride[2] = 1;
|
|
|
|
int headGapStartOffset = baseOffset - headGapSize;
|
|
headGapRegion.dst.offset = headGapStartOffset;
|
|
|
|
headGapRegion.src = headGapRegion.dst;
|
|
|
|
outputDes->regions.push_back(headGapRegion);
|
|
}
|
|
|
|
// --- 2. inter the inside row
|
|
const int interPointGapSize = stride2 - 1;
|
|
if (interPointGapSize > 0 && size2 > 1) {
|
|
for (int k = 0; k < size2 - 1; ++k) {
|
|
Tensor::InsideDescribe::Region gapRegion;
|
|
gapRegion.origin = input;
|
|
|
|
gapRegion.size[0] = size0;
|
|
gapRegion.size[1] = size1;
|
|
gapRegion.size[2] = interPointGapSize;
|
|
|
|
gapRegion.dst.stride[0] = stride0;
|
|
gapRegion.dst.stride[1] = stride1;
|
|
gapRegion.dst.stride[2] = 1;
|
|
|
|
int gapStartOffset = baseOffset + k * stride2 + 1;
|
|
gapRegion.dst.offset = gapStartOffset;
|
|
|
|
gapRegion.src = gapRegion.dst;
|
|
|
|
outputDes->regions.push_back(gapRegion);
|
|
}
|
|
}
|
|
|
|
// Tail region of the row
|
|
const int lastOccupiedRelativePos = (baseOffset % insideSize) + (size2 - 1) * stride2;
|
|
const int tailGapSize = insideSize - lastOccupiedRelativePos - 1;
|
|
|
|
if (tailGapSize > 0) {
|
|
Tensor::InsideDescribe::Region tailGapRegion;
|
|
tailGapRegion.origin = input;
|
|
|
|
tailGapRegion.size[0] = size0;
|
|
tailGapRegion.size[1] = size1;
|
|
tailGapRegion.size[2] = tailGapSize;
|
|
|
|
tailGapRegion.dst.stride[0] = stride0;
|
|
tailGapRegion.dst.stride[1] = stride1;
|
|
tailGapRegion.dst.stride[2] = 1;
|
|
|
|
int tailGapStartOffset = baseOffset + (size2 - 1) * stride2 + 1;
|
|
tailGapRegion.dst.offset = tailGapStartOffset;
|
|
|
|
tailGapRegion.src = tailGapRegion.dst;
|
|
|
|
outputDes->regions.push_back(tailGapRegion);
|
|
}
|
|
|
|
currentShadedInfoStart += unitSize;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
static void _create() {
|
|
std::shared_ptr<GeometryComputer> comp(new GeometryStridedSlice);
|
|
GeometryComputer::registerGeometryComputer(comp, {OpType_StridedSlice});
|
|
}
|
|
|
|
REGISTER_GEOMETRY(GeometryStridedSlice, _create);
|
|
|
|
} // namespace MNN
|