MNN/source/geometry/GeometryStridedSlice.cpp

535 lines
22 KiB
C++

//
// GeometryStridedSlice.cpp
// MNN
//
// Created by MNN on 2020/04/17.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "geometry/GeometryComputer.hpp"
#include "core/OpCommonUtils.hpp"
#include "core/Macro.h"
#include "ConvertUtils.hpp"
namespace MNN {
struct Block {
int start; // inclusive
int end; // exclusive
bool operator<(const Block& other) const {
return start < other.start;
}
};
class GeometryStridedSlice : public GeometryComputer {
public:
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
Tensor* input = inputs[0];
// input haven't realized
auto output = outputs[0];
auto outputDes = TensorUtils::getDescribe(output);
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
outputDes->regions.clear();
const int inputDim = input->buffer().dimensions;
auto parameter = op->main_as_StridedSliceParam();
int32_t beginMask = parameter->beginMask();
int32_t endMask = parameter->endMask();
int32_t shrinkAxisMask = parameter->shrinkAxisMask();
int32_t ellipsisMask = parameter->ellipsisMask();
int32_t newAxisMask = parameter->newAxisMask();
int32_t fromType = parameter->fromType();
if (ellipsisMask && (ellipsisMask & (ellipsisMask - 1))) {
MNN_ERROR("only one non-zero bit is allowed in ellipsisMask\n");
return false;
}
MNN_ASSERT(inputs.size() >= 3 && inputs.size() <= 5);
Tensor *begin = inputs[1];
Tensor *end = inputs[2];
int32_t strideSize = begin->length(0);
MNN_ASSERT(begin->buffer().dimensions == end->buffer().dimensions);
int32_t inputShape[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t begins[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t ends[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t strides[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t axes[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t beginMasks[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t endMasks[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t shrinkAxisMasks[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t newAxisMasks[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t inputStride[MNN_MAX_TENSOR_DIM];
{
int stride = 1;
for (int i = input->buffer().dimensions - 1; i >= 0; --i) {
inputShape[i] = input->buffer().dim[i].extent;
inputStride[i] = stride;
stride *= inputShape[i];
if (inputShape[i] == 0) {
return true;
}
}
}
for (int i = 0; i < inputDim; i++) {
inputShape[i] = input->length(i);
}
for (int i = 0; i < strideSize; i++) {
beginMasks[i] = beginMask & (1 << i);
}
for (int i = 0; i < strideSize; i++) {
endMasks[i] = endMask & (1 << i);
}
for (int i = 0; i < strideSize; i++) {
shrinkAxisMasks[i] = shrinkAxisMask & (1 << i);
}
for (int i = 0; i < strideSize; i++) {
newAxisMasks[i] = newAxisMask & (1 << i);
}
// broadcast begin end stride axis param
if (fromType == 1) {
Tensor *axis = nullptr;
if(inputs.size() >= 4) {
axis = inputs[3];
}
Tensor *step = nullptr;
if(inputs.size() == 5) {
step = inputs[4];
}
for(int i = 0; i < inputDim; i++) {
begins[i] = 0;
ends[i] = inputShape[i];
strides[i] = 1;
}
for (int i = 0; i < strideSize; i++) {
auto temp_axis = i;
if(axis != nullptr) {
temp_axis = axis->host<int>()[i];
temp_axis = temp_axis < 0 ? (temp_axis + inputDim) : temp_axis;
MNN_ASSERT(temp_axis < MNN_MAX_TENSOR_DIM);
}
if(step != nullptr) {
strides[temp_axis] = step->host<int>()[i];
}
auto shape = inputShape[temp_axis];
auto temp_value = begin->host<int>()[i];
temp_value = temp_value < 0 ? (temp_value + shape) : temp_value;
begins[temp_axis] = temp_value;
temp_value = end->host<int>()[i];
temp_value = temp_value < 0 ? (temp_value + shape) : temp_value;
ends[temp_axis] = temp_value;
}
strideSize = inputDim;
} else if(fromType == 0) {
Tensor *strided = nullptr;
if(inputs.size() >= 4) {
strided = inputs[3];
MNN_ASSERT(begin->buffer().dimensions == strided->buffer().dimensions);
}
// deal ellipsis, expand strides info
if (ellipsisMask > 0) {
int32_t beginMasksTmp[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t endMasksTmp[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t shrinkAxisMasksTmp[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t newAxisMasksTmp[MNN_MAX_TENSOR_DIM] = { 0 };
// expand stride info
int ellipsisPos = -1;
for (int i = 0; i < strideSize; i++) {
int temp = ellipsisMask & (1 << i);
if (temp != 0) {
ellipsisPos = i;
break;
}
}
MNN_ASSERT(ellipsisPos >= 0 && ellipsisPos < strideSize);
/*
Example: foo's dim is [2, 3, 4, 5, 6, 7], foo[0:2, :, 3:5, 3:6]:
1. strideSize = 4, inputDim = 6, ellipsis = 2(0010)
2. left part: 0:2, right part: 3:5, 3:6
3. expand: foo[0:2, 0:3, 0:4, 3:5, 3:6]
*/
int ellpsisSize = inputDim - strideSize, strideIdx = 0;
for (int i = 0; i < inputDim; i++) {
if (i == ellipsisPos) {
strideIdx++;
}
if (i >= ellipsisPos && i <= ellipsisPos + ellpsisSize) {
begins[i] = 0;
ends[i] = inputShape[i];
strides[i] = 1;
beginMasksTmp[i] = 0;
endMasksTmp[i] = 0;
shrinkAxisMasksTmp[i] = 0;
} else {
begins[i] = begin->host<int32_t>()[strideIdx];
ends[i] = end->host<int32_t>()[strideIdx];
if(strided != nullptr) {
strides[i] = strided->host<int32_t>()[strideIdx];
}
beginMasksTmp[i] = beginMasks[strideIdx];
endMasksTmp[i] = endMasks[strideIdx];
shrinkAxisMasksTmp[i] = shrinkAxisMasks[strideIdx];
newAxisMasksTmp[i] = newAxisMasks[strideIdx++];
}
}
for (int i = 0; i < inputDim; i++) {
beginMasks[i] = beginMasksTmp[i];
endMasks[i] = endMasksTmp[i];
shrinkAxisMasks[i] = shrinkAxisMasksTmp[i];
newAxisMasks[i] = newAxisMasksTmp[i];
}
strideSize = inputDim;
} else {
for (int i = 0; i < strideSize; i++) {
begins[i] = begin->host<int>()[i];
ends[i] = end->host<int>()[i];
strides[i] = strided->host<int>()[i];
}
}
}
int32_t beginShape[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t endShape[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t stridedShape[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t outputShape[MNN_MAX_TENSOR_DIM] = { 0 };
int32_t reverseDim = -1;
int32_t shapeNum = 0;
auto beginAndEndShapeLimit = [](int shape, int dimSize, bool exclusive) -> int {
int maxShape = dimSize - 1, minShape = -dimSize;
if (exclusive) {
++maxShape;
--minShape;
}
shape = (shape > maxShape ? maxShape : shape);
shape = (shape < minShape ? minShape : shape);
if (shape < 0) {
shape += dimSize;
}
return shape;
};
for (int i = 0; i < strideSize; i++) {
if (newAxisMasks[i] > 0) {
// ignore newAxis beacuse it is 1
continue;
}
stridedShape[shapeNum] = (shrinkAxisMasks[i] > 0 ? 1 : strides[i]);
if (stridedShape[shapeNum] < 0) {
reverseDim = i;
}
if (beginMasks[i] > 0) {
beginShape[shapeNum] = stridedShape[shapeNum] < 0 ? inputShape[shapeNum] - 1 : 0;
} else {
beginShape[shapeNum] = stridedShape[shapeNum] < 0 ? beginAndEndShapeLimit(begins[i], inputShape[shapeNum], false) :
std::min(inputShape[shapeNum], begins[i]);
}
if (beginShape[shapeNum] < 0) {
auto temp = -beginShape[shapeNum];
beginShape[shapeNum] = UP_DIV(temp, input->buffer().dim[i].extent) * input->buffer().dim[i].extent + beginShape[shapeNum];
}
if (endMasks[i] > 0) {
endShape[shapeNum] = stridedShape[shapeNum] < 0 ? -1 : inputShape[shapeNum];
} else {
endShape[shapeNum] = stridedShape[shapeNum] < 0 ? std::max(-1, std::min(inputDim, ends[i])) :
beginAndEndShapeLimit(ends[i], inputShape[shapeNum], true);
}
if (shrinkAxisMasks[i] == 0) {
if (stridedShape[shapeNum] > 0) {
int size = (endShape[shapeNum] - beginShape[shapeNum] - 1) / stridedShape[shapeNum] + 1;
outputShape[shapeNum] = size;
} else {
int size = (endShape[shapeNum] - beginShape[shapeNum] + 1) / stridedShape[shapeNum] + 1;
outputShape[shapeNum] = size;
}
} else {
outputShape[shapeNum] = 1;
}
shapeNum++;
}
int dealDims = shapeNum;
int dimensionRemained = input->dimensions() - dealDims;
for (int i = 0; i < dimensionRemained; i++) {
outputShape[shapeNum] = input->length(dealDims + i);
stridedShape[shapeNum] = 1;
beginShape[shapeNum] = 0;
shapeNum++;
}
int remainSize = 1;
int remainDims[MNN_MAX_TENSOR_DIM];
int remainDimSize = shapeNum - 3;
for (int i = 0; i < (int)shapeNum - 3; ++i) {
remainSize *= outputShape[i];
remainDims[i] = outputShape[i];
}
outputDes->regions.resize(remainSize);
int regionSize = shapeNum < 3 ? shapeNum : 3;
if (reverseDim >= 0) {
remainDimSize = reverseDim;
for (int i = 0; i < reverseDim; ++i) {
remainSize *= outputShape[i];
remainDims[i] = outputShape[i];
}
outputDes->regions.resize(remainSize);
regionSize = shapeNum - reverseDim;
MNN_ASSERT(regionSize <= 3);
}
int mod[MNN_MAX_TENSOR_DIM];
OpCommonUtils::computeStride(mod, remainDims, (int)remainDimSize);
int outputStrideTotal = 1;
int basicInputOffset = 0;
for (int i = 0; i < shapeNum - regionSize; ++i) {
basicInputOffset += inputStride[i] * beginShape[i];
}
for (int i = 0; i < regionSize; ++i) {
int pos = shapeNum - i - 1;
auto len = outputShape[pos];
basicInputOffset += inputStride[pos] * beginShape[pos];
outputStrideTotal *= len;
}
int coordinates[MNN_MAX_TENSOR_DIM];
for (int r = 0; r < remainSize; ++r) {
OpCommonUtils::unravelIndexHelper(coordinates, mod, remainDimSize, r);
int inputOffset = basicInputOffset;
for (int i = 0; i < remainDimSize; ++i) {
inputOffset += coordinates[i] * inputStride[i] * stridedShape[i];
}
auto& reg = outputDes->regions[r];
reg.dst.offset = r * outputStrideTotal;
reg.src.offset = inputOffset;
reg.origin = input;
for (int i = 0; i < regionSize; ++i) {
int pos = shapeNum - i - 1;
reg.size[3 - i - 1] = outputShape[pos];
reg.src.stride[3 - i - 1] = inputStride[pos] * stridedShape[pos];
}
reg.dst.stride[0] = reg.size[1] * reg.size[2];
reg.dst.stride[1] = reg.size[2];
reg.dst.stride[2] = 1;
}
if (fromType == 0 && inputs.size() == 5) { // stride slice write
auto write = inputs[4]; // Data that is not the same in the input and output.
std::vector<int> shape(outputShape, outputShape + shapeNum);
if (write->shape() != shape) {
std::shared_ptr<Tensor> newTensor(new Tensor);
newTensor->buffer().type = write->buffer().type;
newTensor->buffer().dimensions = shapeNum;
for (int i = 0; i < shapeNum; i++) {
newTensor->setLength(i, outputShape[i]);
}
ConvertUtils::broadcastto(write, newTensor.get());
write = newTensor.get();
res.extras.emplace_back(newTensor);
}
// Add regions to replicate data that is the same between output and input.
// We should copy 'A','B','C','D' from 'input', the shaded area is decided by 'write'
/*
+--------------+
| |
| A |
| |
+--------------+
| //////////// |
| //////////// |
| //////////// |
+--------------+
| |
| B |
| |
+--------------+
| |
| C |
| |
+--------------+
| //////////// |
| //////////// |
| //////////// |
+--------------+
| |
| D |
| |
+--------------+
*/
std::vector< std::vector<int>> shadedInfos;
for (auto& regionShaded: outputDes->regions) {
// Swap the contents of src and dst in each region.
auto tmp = regionShaded.dst;
regionShaded.dst = regionShaded.src;
regionShaded.src = tmp;
regionShaded.origin = write;
int outterSize = regionShaded.size[0] * regionShaded.size[1];
int baseOffset = regionShaded.dst.offset;
int stride0 = regionShaded.dst.stride[0], size0 = regionShaded.size[0];
int stride1 = regionShaded.dst.stride[1], size1 = regionShaded.size[1];
int stride2 = regionShaded.dst.stride[2], size2 = regionShaded.size[2];
std::vector<int> tmpInfo = {outterSize, baseOffset, stride0, stride1, stride2, size0, size1, size2};
shadedInfos.emplace_back(tmpInfo);
}
int currentShadedInfoStart = 0;
int unitSize = input->length(inputDim - 1) * input->length(inputDim - 2) * input->length(inputDim - 3);
// shadedInfo size = output->elementSize() / unitSize
for (auto shadedInfo: shadedInfos) {
std::vector<Block> occupiedBlocks; // contains all shadow regions' start and end index.
occupiedBlocks.reserve(shadedInfo[0]);
int baseOffset = shadedInfo[1];
int stride0 = shadedInfo[2], size0 = shadedInfo[5];
int stride1 = shadedInfo[3], size1 = shadedInfo[6];
int stride2 = shadedInfo[4], size2 = shadedInfo[7];
int insideSize = output->length(inputDim - 1);
/*
occupiedBlock.start=x0, .end=x1
when stride2>1, the innermost axis (size2) is not continuous
x0 x1
+-------+-------+-------+-------+-------+-------+-------+-------+
|///////| |///////| |///////| |///////| |
+-------+-------+-------+-------+-------+-------+-------+-------+
*/
for (int i = 0; i < size0; ++i) {
for (int j = 0; j < size1; ++j) {
int blockStart = baseOffset + i * stride0 + j * stride1;
occupiedBlocks.push_back({blockStart - (blockStart % insideSize), blockStart - (blockStart % insideSize) + insideSize });
}
}
std::sort(occupiedBlocks.begin(), occupiedBlocks.end());
int currentFillPos = currentShadedInfoStart;
// 1. build region for A,B,C
for (const auto& block : occupiedBlocks) {
int gapStart = currentFillPos;
int gapEnd = block.start;
if (gapStart < gapEnd) {
// copy A, B, C
Tensor::InsideDescribe::Region fillRegion;
fillRegion.origin = input;
fillRegion.size[2] = gapEnd - gapStart;
fillRegion.src.offset = gapStart;
fillRegion.dst.offset = gapStart;
outputDes->regions.push_back(fillRegion);
}
currentFillPos = std::max(currentFillPos, block.end);
}
// last copied block D
const int totalSize = currentShadedInfoStart + unitSize;
int lastGapStart = currentFillPos;
if (lastGapStart < totalSize) {
Tensor::InsideDescribe::Region lastFillRegion;
lastFillRegion.origin = input;
lastFillRegion.size[2] = totalSize - lastGapStart;
lastFillRegion.src.offset = lastGapStart;
lastFillRegion.dst.offset = lastGapStart;
outputDes->regions.push_back(lastFillRegion);
}
// copied block between the innermost size2
const int headGapSize = baseOffset % insideSize;
if (headGapSize > 0) {
Tensor::InsideDescribe::Region headGapRegion;
headGapRegion.origin = input;
headGapRegion.size[0] = size0;
headGapRegion.size[1] = size1;
headGapRegion.size[2] = headGapSize;
headGapRegion.dst.stride[0] = stride0;
headGapRegion.dst.stride[1] = stride1;
headGapRegion.dst.stride[2] = 1;
int headGapStartOffset = baseOffset - headGapSize;
headGapRegion.dst.offset = headGapStartOffset;
headGapRegion.src = headGapRegion.dst;
outputDes->regions.push_back(headGapRegion);
}
// --- 2. inter the inside row
const int interPointGapSize = stride2 - 1;
if (interPointGapSize > 0 && size2 > 1) {
for (int k = 0; k < size2 - 1; ++k) {
Tensor::InsideDescribe::Region gapRegion;
gapRegion.origin = input;
gapRegion.size[0] = size0;
gapRegion.size[1] = size1;
gapRegion.size[2] = interPointGapSize;
gapRegion.dst.stride[0] = stride0;
gapRegion.dst.stride[1] = stride1;
gapRegion.dst.stride[2] = 1;
int gapStartOffset = baseOffset + k * stride2 + 1;
gapRegion.dst.offset = gapStartOffset;
gapRegion.src = gapRegion.dst;
outputDes->regions.push_back(gapRegion);
}
}
// Tail region of the row
const int lastOccupiedRelativePos = (baseOffset % insideSize) + (size2 - 1) * stride2;
const int tailGapSize = insideSize - lastOccupiedRelativePos - 1;
if (tailGapSize > 0) {
Tensor::InsideDescribe::Region tailGapRegion;
tailGapRegion.origin = input;
tailGapRegion.size[0] = size0;
tailGapRegion.size[1] = size1;
tailGapRegion.size[2] = tailGapSize;
tailGapRegion.dst.stride[0] = stride0;
tailGapRegion.dst.stride[1] = stride1;
tailGapRegion.dst.stride[2] = 1;
int tailGapStartOffset = baseOffset + (size2 - 1) * stride2 + 1;
tailGapRegion.dst.offset = tailGapStartOffset;
tailGapRegion.src = tailGapRegion.dst;
outputDes->regions.push_back(tailGapRegion);
}
currentShadedInfoStart += unitSize;
}
}
return true;
}
};
static void _create() {
std::shared_ptr<GeometryComputer> comp(new GeometryStridedSlice);
GeometryComputer::registerGeometryComputer(comp, {OpType_StridedSlice});
}
REGISTER_GEOMETRY(GeometryStridedSlice, _create);
} // namespace MNN