MNN/source/backend/cpu/CPUTensorConvert.cpp

259 lines
10 KiB
C++
Raw Normal View History

2019-04-17 10:49:11 +08:00
//
// CPUTensorConvert.cpp
// MNN
//
// Created by MNN on 2018/08/04.
// Copyright © 2018, Alibaba Group Holding Limited
//
2019-12-27 22:16:57 +08:00
#include "backend/cpu/CPUTensorConvert.hpp"
#include "backend/cpu/CPUBackend.hpp"
#include "core/Macro.h"
#include "core/TensorUtils.hpp"
#include "backend/cpu/compute/CommonOptFunction.h"
2020-02-26 09:57:17 +08:00
#include "core/Concurrency.h"
2019-04-17 10:49:11 +08:00
namespace MNN {
static void _NC4HW42NHWCUint8(const uint8_t* source, uint8_t* dest, int b, int c, int area) {
int sourceBatchsize = ALIGN_UP4(c) * area;
int destBatchSize = c * area;
2019-04-17 10:49:11 +08:00
for (int bi = 0; bi < b; ++bi) {
auto srcBatch = source + bi * sourceBatchsize;
auto dstBatch = dest + bi * destBatchSize;
MNNPackTransposeUint8(dstBatch, srcBatch, area, c);
2019-04-17 10:49:11 +08:00
}
}
2021-04-08 15:34:23 +08:00
static void _NC4HW42NHWCInt16(const int16_t* source, int16_t* dest, int b, int c, int area) {
int sourceBatchsize = ALIGN_UP4(c) * area;
int destBatchSize = c * area;
for (int bi = 0; bi < b; ++bi) {
auto srcBatch = source + bi * sourceBatchsize;
auto dstBatch = dest + bi * destBatchSize;
MNNPackTransposeInt16(dstBatch, srcBatch, area, c);
}
}
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
static void _NHWC2NC4HW4Uint8(const uint8_t* source, uint8_t* dest, int b, int c, int area) {
int sourceBatchsize = c * area;
int destBatchSize = ALIGN_UP4(c) * area;
2019-04-17 10:49:11 +08:00
for (int bi = 0; bi < b; ++bi) {
auto srcBatch = source + bi * sourceBatchsize;
auto dstBatch = dest + bi * destBatchSize;
MNNUnpackTransposeUint8(dstBatch, srcBatch, area, c);
2019-04-17 10:49:11 +08:00
}
}
2021-04-08 15:34:23 +08:00
static void _NHWC2NC4HW4Int16(const int16_t* source, int16_t* dest, int b, int c, int area) {
int sourceBatchsize = c * area;
int destBatchSize = ALIGN_UP4(c) * area;
for (int bi = 0; bi < b; ++bi) {
auto srcBatch = source + bi * sourceBatchsize;
auto dstBatch = dest + bi * destBatchSize;
MNNUnpackTransposeInt16(dstBatch, srcBatch, area, c);
}
}
2019-04-17 10:49:11 +08:00
2021-04-08 15:34:23 +08:00
static void NC4HW42NHWC(const float* source, float* dest, int b, int c, int area) {
int sourceBatchsize = ALIGN_UP4(c) * area;
int destBatchSize = c * area;
2019-04-17 10:49:11 +08:00
for (int bi = 0; bi < b; ++bi) {
auto srcBatch = source + bi * sourceBatchsize;
auto dstBatch = dest + bi * destBatchSize;
MNNPackTranspose(dstBatch, srcBatch, area, c);
2019-04-17 10:49:11 +08:00
}
}
2021-04-08 15:34:23 +08:00
static void NHWC2NC4HW4(const float* source, float* dest, int b, int c, int area) {
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
int sourceBatchsize = c * area;
int destBatchSize = ALIGN_UP4(c) * area;
2019-04-17 10:49:11 +08:00
for (int bi = 0; bi < b; ++bi) {
auto srcBatch = source + bi * sourceBatchsize;
auto dstBatch = dest + bi * destBatchSize;
MNNUnpackTranspose(dstBatch, srcBatch, area, c);
2019-04-17 10:49:11 +08:00
}
}
2021-04-08 15:34:23 +08:00
template<typename T>
void NCHW2NHWC(const T* source, T* dest, int b, int c, int area) {
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
int sourceBatchsize = c * area;
2019-04-17 10:49:11 +08:00
int destBatchSize = sourceBatchsize;
for (int bi = 0; bi < b; ++bi) {
auto srcBatch = source + bi * sourceBatchsize;
auto dstBatch = dest + bi * destBatchSize;
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
for (int i = 0; i < area; ++i) {
auto srcArea = srcBatch + i;
auto dstArea = dstBatch + i * c;
for (int ci = 0; ci < c; ++ci) {
dstArea[ci] = srcArea[ci * area];
2019-04-17 10:49:11 +08:00
}
}
}
}
2021-04-08 15:34:23 +08:00
template<typename T>
void NHWC2NCHW(const T* source, T* dest, int b, int c, int area) {
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
int sourceBatchsize = c * area;
2019-04-17 10:49:11 +08:00
int destBatchSize = sourceBatchsize;
for (int bi = 0; bi < b; ++bi) {
auto srcBatch = source + bi * sourceBatchsize;
auto dstBatch = dest + bi * destBatchSize;
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
for (int i = 0; i < area; ++i) {
auto srcArea = srcBatch + i * c;
auto dstArea = dstBatch + i;
for (int ci = 0; ci < c; ++ci) {
dstArea[ci * area] = srcArea[ci];
2019-04-17 10:49:11 +08:00
}
}
}
}
ErrorCode CPUTensorConverter::convert(const void* inputRaw, void* outputRaw, MNN_DATA_FORMAT source, MNN_DATA_FORMAT dest, int batch, int area, int channel, int bitLength, const CoreFunctions* core) {
auto channelC4 = UP_DIV(channel, core->pack);
auto batchStrideC4 = channelC4 * area * core->pack;
auto batchStride = area * channel;
2021-04-08 15:34:23 +08:00
// the case when source and dest data layout are the same
// This case occurs in BackendTest of BF16 data.
if(source == dest) {
::memcpy(outputRaw, inputRaw, batch * area * channel * bitLength);
return NO_ERROR;
}
if (MNN_DATA_FORMAT_NC4HW4 == source && MNN_DATA_FORMAT_NCHW == dest) {
2019-09-02 11:13:14 +08:00
if (bitLength == 1) {
for (int i = 0; i < batch; ++i) {
MNNUnpackC4Uint8((uint8_t*)outputRaw + batchStride * i,
(const uint8_t*)inputRaw + batchStrideC4 * i, area, channel);
2019-09-02 11:13:14 +08:00
}
return NO_ERROR;
}
2021-04-08 15:34:23 +08:00
if (bitLength == 2) {
for (int i = 0; i < batch; ++i) {
MNNUnpackC4Int16((int16_t*)outputRaw + batchStride * i,
(const int16_t*)inputRaw + batchStrideC4 * i, area, channel);
}
return NO_ERROR;
}
for (int i = 0; i < batch; ++i) {
core->MNNUnpackCUnit((float*)outputRaw + batchStride * i, (const float*)inputRaw + batchStrideC4 * i, area, channel);
}
return NO_ERROR;
}
2019-04-17 10:49:11 +08:00
if (MNN_DATA_FORMAT_NCHW == source && MNN_DATA_FORMAT_NC4HW4 == dest) {
2019-09-02 11:13:14 +08:00
if (bitLength == 1) {
for (int i = 0; i < batch; ++i) {
MNNPackC4Uint8((uint8_t*)outputRaw + batchStrideC4 * i, (const uint8_t*)inputRaw + batchStride * i, area, channel);
2019-09-02 11:13:14 +08:00
}
return NO_ERROR;
}
2021-04-08 15:34:23 +08:00
if (bitLength == 2) {
for (int i = 0; i < batch; ++i) {
MNNPackC4Int16((int16_t*)outputRaw + batchStrideC4 * i, (const int16_t*)inputRaw + batchStride * i, area, channel);
}
return NO_ERROR;
}
for (int i = 0; i < batch; ++i) {
core->MNNPackCUnit((float*)outputRaw + batchStrideC4 * i, (const float*)inputRaw + batchStride * i, area, channel);
}
return NO_ERROR;
}
2019-12-27 22:16:57 +08:00
2019-04-17 10:49:11 +08:00
if (MNN_DATA_FORMAT_NHWC == source && MNN_DATA_FORMAT_NC4HW4 == dest) {
2019-09-02 11:13:14 +08:00
if (bitLength == 1) {
_NHWC2NC4HW4Uint8((uint8_t*)inputRaw, (uint8_t*)outputRaw, batch, channel, area);
2021-04-08 15:34:23 +08:00
} else if (bitLength == 2){
_NHWC2NC4HW4Int16((int16_t*)inputRaw, (int16_t*)outputRaw, batch, channel, area);
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
} else {
for (int i = 0; i < batch; ++i) {
core->MNNPackCUnitTranspose((float*)outputRaw + batchStrideC4 * i, (const float*)inputRaw + batchStride * i, area, channel);
}
2019-04-17 10:49:11 +08:00
}
} else if (MNN_DATA_FORMAT_NC4HW4 == source && MNN_DATA_FORMAT_NHWC == dest) {
2019-09-02 11:13:14 +08:00
if (bitLength == 1) {
_NC4HW42NHWCUint8((uint8_t*)inputRaw, (uint8_t*)outputRaw, batch, channel, area);
2021-04-08 15:34:23 +08:00
} else if (bitLength == 2){
_NC4HW42NHWCInt16((int16_t*)inputRaw, (int16_t*)outputRaw, batch, channel, area);
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
} else {
for (int i = 0; i < batch; ++i) {
core->MNNUnpackCUnitTranspose((float*)outputRaw + batchStride * i, (const float*)inputRaw + batchStrideC4 * i, area, channel);
}
2019-04-17 10:49:11 +08:00
}
} else if (MNN_DATA_FORMAT_NHWC == source && MNN_DATA_FORMAT_NCHW == dest) {
2021-04-08 15:34:23 +08:00
switch (bitLength) {
case 1:
NHWC2NCHW((int8_t*)inputRaw, (int8_t*)outputRaw, batch, channel, area);
break;
case 2:
NHWC2NCHW((int16_t*)inputRaw, (int16_t*)outputRaw, batch, channel, area);
break;
case 4:
NHWC2NCHW((float*)inputRaw, (float*)outputRaw, batch, channel, area);
break;
default:
break;
2019-12-27 22:16:57 +08:00
}
2019-04-17 10:49:11 +08:00
} else if (MNN_DATA_FORMAT_NCHW == source && MNN_DATA_FORMAT_NHWC == dest) {
2021-04-08 15:34:23 +08:00
switch (bitLength) {
case 1:
NCHW2NHWC((int8_t*)inputRaw, (int8_t*)outputRaw, batch, channel, area);
break;
case 2:
NCHW2NHWC((int16_t*)inputRaw, (int16_t*)outputRaw, batch, channel, area);
break;
case 4:
NCHW2NHWC((float*)inputRaw, (float*)outputRaw, batch, channel, area);
break;
default:
break;
2019-12-27 22:16:57 +08:00
}
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
} else {
return NOT_SUPPORT;
2019-04-17 10:49:11 +08:00
}
return NO_ERROR;
}
2019-12-27 22:16:57 +08:00
2021-04-08 15:34:23 +08:00
std::tuple<int, int, int> CPUTensorConverter::splitDimensions(const halide_buffer_t& ib, MNN_DATA_FORMAT source) {
int area = 1, batch = ib.dim[0].extent, channel;
if (source == MNN_DATA_FORMAT_NC4HW4 || source == MNN_DATA_FORMAT_NCHW) {
channel = ib.dim[1].extent;
for (int axis = 2; axis < ib.dimensions; ++axis) {
area *= ib.dim[axis].extent;
}
} else {
channel = ib.dim[ib.dimensions - 1].extent;
for (int axis = 1; axis < ib.dimensions - 1; ++axis) {
area *= ib.dim[axis].extent;
}
}
return std::make_tuple(batch, area, channel);
}
ErrorCode CPUTensorConverter::convert(const Tensor* input, const Tensor* output, const CoreFunctions* core) {
auto ib = input->buffer();
auto ob = output->buffer();
auto source = TensorUtils::getDescribe(input)->dimensionFormat;
auto dest = TensorUtils::getDescribe(output)->dimensionFormat;
if (ib.dimensions <= 1 || source == dest) {
::memcpy(ob.host, ib.host, input->size());
return NO_ERROR;
}
if (nullptr == core) {
core = MNNGetCoreFunctions();
}
if (source == MNN_DATA_FORMAT_UNKNOWN || dest == MNN_DATA_FORMAT_UNKNOWN) {
MNN_ERROR("unknown data format!\nsrc: %s, dst: %s\n", EnumNameMNN_DATA_FORMAT(source), EnumNameMNN_DATA_FORMAT(dest));
return INVALID_VALUE;
}
2021-04-08 15:34:23 +08:00
auto tup = splitDimensions(ib, source);
int area = std::get<1>(tup), batch = std::get<0>(tup), channel = std::get<2>(tup);
const int bitLength = ib.type.bytes();
auto code = convert(ib.host, ob.host, source, dest, batch, area, channel, bitLength, core);
if (NO_ERROR != code) {
MNN_ERROR("Error in CPUTensorConver\n");
return code;
}
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
return NO_ERROR;
2019-04-17 10:49:11 +08:00
}
} // namespace MNN