Github release 1.1.0

This commit is contained in:
Hui Shu 2020-11-05 16:41:56 +08:00
parent 939a80dba8
commit d6795ad031
1296 changed files with 98954 additions and 55065 deletions

8
.gitignore vendored
View File

@ -330,7 +330,6 @@ project/android/.idea/caches/build_file_checksums.ser
# FIXME(haijing): Xcode pre-build stage breaks compilation of flatbuffers by setting envs that do cmake cross-compilation for iOS
# schema/current
schema/private
schema/current
tools/converter/source/IR
benchmark/benchmark.txt
@ -345,18 +344,13 @@ pymnn/android/.idea/modules.xml
pymnn/android/.idea/runConfigurations.xml
pymnn/android/.idea/vcs.xml
pymnn/android/.idea/caches/build_file_checksums.ser
pymnn/src/pybind_private/
buildios
build*/
include/MNN/VCS.h
source/backend/opencl/execution/cl/codegen/opencl_program.cc
source/backend/opencl/execution/cl/opencl_program.cc
# FIXME(haijing): MTL issues.....
# source/backend/metal/MetalOPRegister.mm
source/backend/opengl/AllShader.cpp
include/MNN/backend/opengl/shaders/AllShader.h
source/backend/vulkan/compiler/AllShader.cpp
include/MNN/backend/vulkan/shaders/AllShader.h
.idea
project/ios/ios_64
project/ios/ios_32

View File

@ -49,6 +49,7 @@ include(FindPythonInterp REQUIRED)
option(MNN_USE_SYSTEM_LIB "For opencl and vulkan, use system lib or use dlopen" OFF)
option(MNN_BUILD_HARD "Build -mfloat-abi=hard or not" OFF)
option(MNN_BUILD_SHARED_LIBS "MNN build shared or static lib" ON)
option(MNN_WIN_RUNTIME_MT "MNN use /MT on Windows dll" OFF)
option(MNN_FORBID_MULTI_THREAD "Disable Multi Thread" OFF)
option(MNN_OPENMP "Use OpenMP's thread pool implementation. Does not work on iOS or Mac OS" OFF)
option(MNN_USE_THREAD_POOL "Use MNN's own thread pool implementation" ON)
@ -62,14 +63,14 @@ option(MNN_SUPPORT_TFLITE_QUAN "Enable MNN's tflite quantized op" ON)
option(MNN_DEBUG_MEMORY "MNN Debug Memory Access" OFF)
option(MNN_DEBUG_TENSOR_SIZE "Enable Tensor Size" OFF)
option(MNN_GPU_TRACE "Enable MNN Gpu Debug" OFF)
option(MNN_OPENCL_LWS_TUNE "Enable MNN OpenCL Lws Tuning" ON)
option(MNN_PORTABLE_BUILD "Link the static version of third party libraries where possible to improve the portability of built executables" OFF)
option(MNN_SEP_BUILD "Build MNN Backends and expression seperately. Only works with MNN_BUILD_SHARED_LIBS=ON" ON)
option(NATIVE_LIBRARY_OUTPUT "Native Library Path" OFF)
option(NATIVE_INCLUDE_OUTPUT "Native Include Path" OFF)
option(MNN_AAPL_FMWK "Build MNN.framework instead of traditional .a/.dylib" OFF)
option(MNN_FMA_ENABLE "x86 routine use fma extension" OFF)
option(MNN_WITH_PLUGIN "Build with plugin op support." OFF)
option(MNN_BUILD_MINI "Build MNN-MINI that just supports fixed shape models." OFF)
option(MNN_USE_SSE "Use SSE optimization for x86 if possiable" ON)
IF(NOT MNN_BUILD_SHARED_LIBS)
message(WARNING "Close MNN_SEP_BUILD for static library")
@ -79,13 +80,14 @@ IF(APPLE AND MNN_AAPL_FMWK AND MNN_SEP_BUILD)
message(WARNING "MNN_SEP_BUILD AND MNN_AAPL_FMWK can't coexist. Turning off MNN_SEP_BUILD")
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
ENDIF()
IF(MSVC OR WIN32)
IF(WIN32)
IF(MNN_SEP_BUILD)
message(WARNING "MNN_SEP_BUILD IS TROUBLESOME ON Windows. Forcing OFF...")
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
ENDIF()
SET(MNN_USE_SYSTEM_LIB ON CACHE BOOL "<docstring>" FORCE)
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
IF(MSVC)
# generate optimized (release) exe and library with pdb debug file, https://stackoverflow.com/a/31264946
SET(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
@ -95,11 +97,12 @@ IF(MSVC OR WIN32)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
ENDIF()
ENDIF()
include(${CMAKE_CURRENT_LIST_DIR}/cmake/macros.cmake)
IF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT MNN_BUILD_SHARED_LIBS AND NOT (MSVC OR WIN32))
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
IF(MNN_BUILD_CONVERTER)
SET(MNN_PORTABLE_BUILD ON CACHE BOOL "<docstring>" FORCE)
@ -117,6 +120,9 @@ endif()
if(MNN_SUPPORT_TFLITE_QUAN)
add_definitions(-DMNN_SUPPORT_TFLITE_QUAN)
endif()
if(MNN_BUILD_MINI)
add_definitions(-DMNN_BUILD_MINI)
endif()
# debug options
if(MNN_DEBUG_MEMORY)
@ -128,9 +134,6 @@ endif()
if(MNN_GPU_TRACE)
add_definitions(-DMNN_GPU_FORCE_FINISH)
endif()
if(MNN_OPENCL_LWS_TUNE)
add_definitions(-DMNN_OPENCL_LWS_TUNE)
endif()
# backend options
option(MNN_METAL "Enable Metal" OFF)
@ -138,11 +141,8 @@ option(MNN_OPENCL "Enable OpenCL" OFF)
option(MNN_OPENGL "Enable OpenGL" OFF)
option(MNN_VULKAN "Enable Vulkan" OFF)
option(MNN_ARM82 "Enable ARM82" OFF)
# codegen register ops
if (MNN_METAL)
add_definitions(-DMNN_CODEGEN_REGISTER)
endif()
option(MNN_CUDA "Enable CUDA" OFF)
option(MNN_TENSORRT "Enable TensorRT" OFF)
# target options
option(MNN_BUILD_BENCHMARK "Build benchmark or not" OFF)
@ -165,11 +165,13 @@ message(STATUS "\tOpenCL: ${MNN_OPENCL}")
message(STATUS "\tOpenGL: ${MNN_OPENGL}")
message(STATUS "\tVulkan: ${MNN_VULKAN}")
message(STATUS "\tARM82: ${MNN_ARM82}")
message(STATUS "\tTensorRT: ${MNN_TENSORRT}")
message(STATUS "\tCUDA: ${MNN_CUDA}")
message(STATUS "\tOpenMP: ${MNN_OPENMP}")
message(STATUS "\tHidden: ${MNN_HIDDEN}")
message(STATUS "\tBuild Path: ${CMAKE_CURRENT_BINARY_DIR}")
if(WIN32)
if(MSVC)
if(${CMAKE_VERSION} VERSION_LESS "3.14.0")
message(FATAL_ERROR "MNN requires CMake 3.14+ to build on Windows!")
endif()
@ -178,14 +180,14 @@ if(WIN32)
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
if (MNN_BUILD_SHARED_LIBS)
if(${flag_var} MATCHES "/MT")
string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
endif()
else ()
if (MNN_WIN_RUNTIME_MT)
if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif()
else ()
if(${flag_var} MATCHES "/MT")
string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
endif()
endif ()
endforeach()
elseif(CMAKE_SYSTEM_NAME MATCHES "^Android" OR CMAKE_SYSTEM_NAME MATCHES "^Linux")
@ -270,6 +272,8 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "^Linux")
endif()
include_directories(${CMAKE_CURRENT_LIST_DIR}/include/
${CMAKE_CURRENT_LIST_DIR}/source/
${CMAKE_CURRENT_LIST_DIR}/express/
${CMAKE_CURRENT_LIST_DIR}/tools/
${CMAKE_CURRENT_LIST_DIR}/schema/current/
${CMAKE_CURRENT_LIST_DIR}/3rd_party/
${CMAKE_CURRENT_LIST_DIR}/3rd_party/flatbuffers/include
@ -293,14 +297,14 @@ FILE(GLOB MNN_CV_SRC ${CMAKE_CURRENT_LIST_DIR}/source/cv/*)
add_library(MNNCV OBJECT ${MNN_CV_SRC})
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCV>)
list(APPEND MNN_TARGETS MNNCV)
if (MNN_USE_SSE)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(X86_64)|(x64)|(X64)|(amd64)|(AMD64)|(i686)")
if(WIN32 OR MSVC)
target_compile_options(MNNCV PRIVATE /arch:AVX)
else()
if (NOT MSVC)
target_compile_options(MNNCV PRIVATE -msse3)
target_compile_options(MNNCV PRIVATE -mavx)
endif()
endif()
endif()
# Math
FILE(GLOB MNN_Math_SRC ${CMAKE_CURRENT_LIST_DIR}/source/math/*)
@ -308,11 +312,19 @@ add_library(MNNMath OBJECT ${MNN_Math_SRC})
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMath>)
list(APPEND MNN_TARGETS MNNMath)
# Shape
FILE(GLOB MNN_Shape_SRC ${CMAKE_CURRENT_LIST_DIR}/source/shape/*)
add_library(MNNShape OBJECT ${MNN_Shape_SRC})
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNShape>)
list(APPEND MNN_TARGETS MNNShape)
# Transform
FILE(GLOB MNN_Transform_SRC ${CMAKE_CURRENT_LIST_DIR}/source/shape/* ${CMAKE_CURRENT_LIST_DIR}/source/geometry/*)
add_library(MNNTransform OBJECT ${MNN_Transform_SRC})
IF (NOT MNN_BUILD_MINI)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNTransform>)
ENDIF()
list(APPEND MNN_TARGETS MNNTransform)
# Utils
FILE(GLOB MNN_Utils_SRC ${CMAKE_CURRENT_LIST_DIR}/source/utils/*)
add_library(MNNUtils OBJECT ${MNN_Utils_SRC})
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNUtils>)
list(APPEND MNN_TARGETS MNNUtils)
# Compute
FILE(GLOB MNN_Compute_SRC ${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/compute/*)
@ -327,7 +339,9 @@ list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCPU>)
list(APPEND MNN_TARGETS MNNCPU)
# X86_64 AVX/SSE
if (MNN_USE_SSE)
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/x86_x64/CMakeLists.txt)
endif()
# AArch32/64 Assemblies
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/arm/CMakeLists.txt)
@ -377,7 +391,7 @@ if (NOT APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
if (WIN32)
if (MSVC)
set(OpenMP_C_FLAGS "/openmp ${OpenMP_C_FLAGS}")
set(OpenMP_CXX_FLAGS "/openmp ${OpenMP_CXX_FLAGS}")
endif()
@ -387,20 +401,22 @@ endif()
set(CMAKE_CXX_FLAGS_ORIGIN ${CMAKE_CXX_FLAGS})
set(CMAKE_C_FLAGS_ORIGIN ${CMAKE_C_FLAGS})
if ((NOT (MSVC OR WIN32)) AND MNN_HIDDEN)
if ((NOT MSVC) AND MNN_HIDDEN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden -fvisibility=hidden")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
if (NOT APPLE)
# Omit frame pointer may cause difficult debug
if ((NOT APPLE) AND (NOT WIN32))
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer")
endif()
endif()
if (NOT (MSVC OR WIN32))
if (NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions ")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
endif()
# Metal
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/metal/CMakeLists.txt)
set(MNN_DEPS "")
set(MNN_EXTRA_DEPENDS "")
list(APPEND MNN_DEPS MNN)
# Plugin
@ -409,6 +425,14 @@ if(MNN_WITH_PLUGIN)
include(${CMAKE_CURRENT_LIST_DIR}/source/plugin/CMakeLists.txt)
endif()
# Metal
if(MNN_METAL AND APPLE)
add_definitions(-DMNN_METAL_ENABLED=1)
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/metal/CMakeLists.txt)
list(APPEND MNN_TARGETS MNNMetal)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMetal>)
endif()
# Vulkan
IF(MNN_VULKAN)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/vulkan/)
@ -446,22 +470,34 @@ IF(MNN_OPENGL)
ENDIF()
ENDIF()
# CUDA
IF(MNN_CUDA)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/cuda/)
list(APPEND MNN_TARGETS MNN_CUDA)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_CUDA>)
list(APPEND MNN_EXTRA_DEPENDS ${MNN_CUDA_LIBS})
ENDIF()
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR IOS_ARCH STREQUAL "arm64")
# ARM82 Assemblies
IF(MNN_ARM82)
add_definitions(-DENABLE_ARMV82)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/arm82/)
IF(MNN_SEP_BUILD)
list(APPEND MNN_DEPS MNN_Arm82)
ELSE()
list(APPEND MNN_TARGETS MNN_Arm82)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_Arm82>)
ENDIF()
ENDIF()
ENDIF()
# Express
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/express/)
# TensorRT
IF(MNN_TENSORRT)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/tensorrt/)
list(APPEND MNN_TARGETS MNN_TRT)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_TRT>)
list(APPEND MNN_EXTRA_DEPENDS ${MNN_TRT_LIBS})
ENDIF()
IF(MNN_SEP_BUILD)
add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS})
target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS})
@ -471,7 +507,7 @@ ELSE()
list(APPEND MNN_TARGETS MNNExpress)
IF(MNN_BUILD_SHARED_LIBS)
add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS})
if (MSVC OR WIN32)
if (WIN32)
foreach(TARGET ${MNN_TARGETS})
target_compile_definitions(${TARGET} PRIVATE "-DBUILDING_MNN_DLL")
target_compile_definitions(${TARGET} INTERFACE "-DUSING_MNN_DLL")
@ -484,7 +520,7 @@ ELSE()
ENDIF()
target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS})
ENDIF()
if (MSVC OR WIN32)
if (MSVC)
target_link_options(MNN PRIVATE "/IGNORE:4049,4217")
endif()
@ -504,9 +540,11 @@ if(APPLE)
target_link_libraries(MNN PUBLIC ${FOUNDATION})
find_library(METAL Metal REQUIRED)
target_link_libraries(MNN PUBLIC ${METAL})
find_library(GRAPHIC CoreGraphics)
target_link_libraries(MNN PUBLIC ${GRAPHIC})
ENDIF()
endif()
add_dependencies(MNN MNNCore MNNCV MNNShape MNNMath MNNCompute MNNCPU GenVCSHDR)
add_dependencies(MNN MNNCore MNNCV MNNTransform MNNMath MNNCompute MNNCPU GenVCSHDR)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/tools/converter)
if(CMAKE_SYSTEM_NAME MATCHES "^Linux")
@ -532,12 +570,6 @@ if (NOT MNN_BUILD_SHARED_LIBS)
endif()
endif()
list(APPEND MNN_TARGETS MNN)
FOREACH(TARGET ${MNN_TARGETS})
IF((NOT MSVC) AND (NOT WIN32))
else()
target_compile_definitions(${TARGET} PRIVATE _CRT_SECURE_NO_WARNINGS)
endif()
ENDFOREACH()
list(REMOVE_ITEM MNN_TARGETS MNN)
IF(MNN_BUILD_DEMO)
include(${CMAKE_CURRENT_LIST_DIR}/demo/exec/CMakeLists.txt)

View File

@ -46,6 +46,7 @@ Pod::Spec.new do |s|
'schema/current/*.{h}',\
'3rd_party/flatbuffers/include/flatbuffers/*.{h}',\
'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/geometry/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
'source/shape/*.{h,c,m,mm,cc,hpp,cpp}',\

View File

@ -66,7 +66,7 @@ Interpreter由Engine和Backends构成。前者负责模型的加载、计算图
三群:
<img src="doc/DingTalkQR3.png" height="256"/>
<img src="doc/DingTalkQR23.png" height="256"/>
## License
Apache 2.0

View File

@ -0,0 +1,89 @@
//
// CPUBatchMatMul.cpp
// MNN
//
// Created by MNN on 2019/03/25.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "backend/cpu/CPUBatchMatMul.hpp"
#include "backend/cpu/CPUBackend.hpp"
#include "math/Matrix.hpp"
namespace MNN {
CPUBatchMatMul::CPUBatchMatMul(Backend* backend, bool adjX, bool adjY) : Execution(backend) {
mMatMul.reset(new CPUMatMul(backend, adjX, adjY, true));
}
ErrorCode CPUBatchMatMul::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
auto input0 = inputs[0];
auto input1 = inputs[1];
auto output = outputs[0];
// Fill output by zero if one of inputs is empty.
if (input0->elementSize() == 0 || input1->elementSize() == 0) {
return NO_ERROR;
}
auto dimensions = input0->dimensions();
mMatrixA.reset(Tensor::createDevice<float>({input0->length(input0->dimensions()-2), input0->length(input0->dimensions()-1)}));
mMatrixB.reset(Tensor::createDevice<float>({input1->length(input1->dimensions()-2), input1->length(input0->dimensions()-1)}));
mMatrixC.reset(Tensor::createDevice<float>({output->length(output->dimensions()-2), output->length(output->dimensions()-1)}));
mTempInputs = {mMatrixA.get(), mMatrixB.get()};
mTempOutputs = {mMatrixC.get()};
auto res = backend()->onAcquireBuffer(mMatrixA.get(), Backend::DYNAMIC);
res = res && backend()->onAcquireBuffer(mMatrixB.get(), Backend::DYNAMIC);
res = res && backend()->onAcquireBuffer(mMatrixC.get(), Backend::DYNAMIC);
if (!res) {
return OUT_OF_MEMORY;
}
int batch = 1;
for (int i = 0; i < dimensions - 2; ++i) {
batch *= input0->length(i);
}
mBatch = batch;
auto code = mMatMul->onResize(mTempInputs, mTempOutputs);
backend()->onReleaseBuffer(mMatrixA.get(), Backend::DYNAMIC);
backend()->onReleaseBuffer(mMatrixB.get(), Backend::DYNAMIC);
backend()->onReleaseBuffer(mMatrixC.get(), Backend::DYNAMIC);
return code;
}
ErrorCode CPUBatchMatMul::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
auto input0 = inputs[0];
auto input1 = inputs[1];
auto output = outputs[0];
// Fill output by zero if one of inputs is empty.
if (input0->elementSize() == 0 || input1->elementSize() == 0) {
::memset(output->host<float>(), 0, output->size());
return NO_ERROR;
}
const int dimensions = input0->dimensions();
MNN_ASSERT(dimensions >= 3);
const int input0Stride = input0->length(dimensions - 1) * input0->length(dimensions - 2);
const int input1Stride = input1->length(dimensions - 1) * input1->length(dimensions - 2);
const int outputStride = output->length(dimensions - 1) * output->length(dimensions - 2);
const auto input0Ptr = input0->host<float>();
const auto input1Ptr = input1->host<float>();
float* const outputPtr = output->host<float>();
for (int i = 0; i < mBatch; ++i) {
::memcpy(mMatrixA->host<float>(), input0Ptr + i * input0Stride, input0Stride * sizeof(float));
::memcpy(mMatrixB->host<float>(), input1Ptr + i * input1Stride, input1Stride * sizeof(float));
mMatMul->onExecute(mTempInputs, mTempOutputs);
::memcpy(outputPtr + i * outputStride, mMatrixC->host<float>(), outputStride * sizeof(float));
}
return NO_ERROR;
}
class CPUBatchMatMulCreator : public CPUBackend::Creator {
public:
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
const MNN::Op* op, Backend* backend) const override {
return new CPUBatchMatMul(backend, op->main_as_BatchMatMulParam()->adjX(), op->main_as_BatchMatMulParam()->adjY());
}
};
REGISTER_CPU_OP_CREATOR(CPUBatchMatMulCreator, OpType_BatchMatMul);
} // namespace MNN

View File

@ -0,0 +1,35 @@
//
// CPUBatchMatMul.hpp
// MNN
//
// Created by MNN on 2019/03/25.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef CPUBatchMatMul_hpp
#define CPUBatchMatMul_hpp
#include "backend/cpu/CPUMatMul.hpp"
namespace MNN {
class CPUBatchMatMul : public Execution {
public:
CPUBatchMatMul(Backend *backend, bool adjX, bool adjY);
virtual ~CPUBatchMatMul() = default;
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
private:
int mBatch;
std::shared_ptr<Execution> mMatMul;
std::vector<Tensor*> mTempInputs;
std::vector<Tensor*> mTempOutputs;
std::shared_ptr<Tensor> mMatrixA;
std::shared_ptr<Tensor> mMatrixB;
std::shared_ptr<Tensor> mMatrixC;
};
} // namespace MNN
#endif /* CPUBatchMatMul_hpp */

View File

@ -18,7 +18,6 @@
#include "backend/cpu/compute/ConvOpt.h"
#include "backend/cpu/CPUBackend.hpp"
#include "backend/cpu/compute/ConvolutionFloatFactory.h"
#include "math/Vec4.hpp"
#define MIN_CON_PLANESIZE 256

View File

@ -10,7 +10,9 @@
#include <math.h>
#include "backend/cpu/CPUBackend.hpp"
#include "core/Macro.h"
#include "math/Vec4.hpp"
#include "math/Vec.hpp"
using Vec4 = MNN::Math::Vec<float, 4>;
namespace MNN {
@ -39,12 +41,12 @@ ErrorCode CPUCosineSimilarity::onExecute(const std::vector<Tensor*>& inputs, con
const auto x1ChannelPtr = x1DataBatchPtr + j;
const auto x2ChannelPtr = x2DataBatchPtr + j;
Math::Vec4 innerProduct(.0f);
Math::Vec4 x1Square(.0f);
Math::Vec4 x2Square(.0f);
Vec4 innerProduct(.0f);
Vec4 x1Square(.0f);
Vec4 x2Square(.0f);
for (int c = 0; c < channel; ++c) {
Math::Vec4 x1Data = Math::Vec4::load(x1ChannelPtr + c * channleStride);
Math::Vec4 x2Data = Math::Vec4::load(x2ChannelPtr + c * channleStride);
Vec4 x1Data = Vec4::load(x1ChannelPtr + c * channleStride);
Vec4 x2Data = Vec4::load(x2ChannelPtr + c * channleStride);
auto x1Xx2 = x1Data * x2Data;
innerProduct = innerProduct + x1Xx2;
x1Square = x1Square + x1Data * x1Data;

View File

@ -12,8 +12,8 @@
#include "core/Concurrency.h"
#include "core/Macro.h"
#include "math/Vec4.hpp"
using MNN::Math::Vec4;
#include "math/Vec.hpp"
using Vec4 = MNN::Math::Vec<float, 4>;
namespace MNN {

View File

@ -21,7 +21,7 @@ public:
auto parameter = op->main_as_InnerProduct();
int outputCount = parameter->outputCount();
int srcCount = parameter->weight()->size() / outputCount;
mWeight.reset(CPUConvolution::reorderWeightSize(srcCount, outputCount, 1, 4));
mWeight.reset(CPUConvolution::reorderWeightSize(srcCount, outputCount, 1, 4, 4));
if (mWeight.get() == nullptr) {
mValid = false;
return;

View File

@ -180,6 +180,14 @@ ErrorCode CPULSTM::onResize(const std::vector<Tensor *> &inputs, const std::vect
::memcpy(mBiasC->host<float>(), mLSTM->bias()->float32s()->data(), mBiasC->size());
::memcpy(mWeightH->host<float>(), mLSTM->weightH()->float32s()->data(), mWeightH->size());
}
if (mGateHaveBias) {
// Merge bias
auto biasPtr = mBiasC->host<float>();
auto biasPtr2 = biasPtr + 4 * numUnits;
for (int i=0; i<4*numUnits; ++i) {
biasPtr[i] = biasPtr[i] + biasPtr2[i];
}
}
}
if (inputs.size() > 1) {
@ -260,16 +268,8 @@ ErrorCode CPULSTM::onExecute(const std::vector<Tensor *> &inputs, const std::vec
MNN_CONCURRENCY_END();
float* biasStartPtr = mBiasC->host<float>();
if(!mGateHaveBias){
biasStartPtr = nullptr;
}
mRetriveOutputFunction(mGates.host<float>(), biasStartPtr);
float* recurrenceBiasStartPtr = mBiasC->host<float>();
if(mGateHaveBias){
recurrenceBiasStartPtr += 4 * numUnits;
}
// tranform
const float *contData = nullptr;
if (inputs.size() > 1) {
@ -330,14 +330,11 @@ ErrorCode CPULSTM::onExecute(const std::vector<Tensor *> &inputs, const std::vec
}
// add bias
auto biasPtr = recurrenceBiasStartPtr + oc;
I = sigmoid(*biasPtr + I);
biasPtr = biasPtr + numUnits;
F = sigmoid(*biasPtr + F);
biasPtr = biasPtr + numUnits;
O = sigmoid(*biasPtr + O);
biasPtr = biasPtr + numUnits;
G = tanhf(*biasPtr + G);
//MNN_PRINT("%f, %f, %f, %f\n", I, O, F, G);
I = sigmoid(I);
F = sigmoid(F);
O = sigmoid(O);
G = tanhf(G);
auto newCell = F * cellData[oc] + I * G;
cellData[oc] = newCell;

View File

@ -0,0 +1,311 @@
//
// CPUSoftmax.cpp
// MNN
//
// Created by MNN on 2018/07/16.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "backend/cpu/CPUSoftmax.hpp"
#include <math.h>
#include "backend/cpu/CPUBackend.hpp"
#include "backend/cpu/compute/CommonOptFunction.h"
#include "core/Concurrency.h"
#include "core/Macro.h"
#include "core/TensorUtils.hpp"
#ifdef MNN_USE_NEON
#include <arm_neon.h>
#endif
namespace MNN {
int CPUSoftmax::_softmax1(const float *srcData, float *dstData, int outside, int channel, int threadNum) {
// Max and sub
MNN_CONCURRENCY_BEGIN(tId, threadNum)
{
const float *srcY = srcData + tId * channel;
float *dstY = dstData + tId * channel;
for (int y = (int)tId; y < outside; y += threadNum, srcY += channel * threadNum, dstY += channel * threadNum) {
float maxValue = srcY[0];
{
int c = 1;
#ifdef MNN_USE_NEON
#if !(defined(__ARM_FEATURE_FMA) && defined(__aarch64__))
#define vmaxvq_f32(v) \
({ \
float __m = v[0]; \
for (int i = 1; i < 4; i++) { \
if (v[i] > __m) \
__m = v[i]; \
} \
__m; \
})
#endif
if (c + 3 < channel) {
float32x4_t maxx4 = vld1q_f32(srcY + c);
c += 4;
for (; c + 3 < channel; c += 4) {
maxx4 = vmaxq_f32(maxx4, vld1q_f32(srcY + c));
}
float value = vmaxvq_f32(maxx4);
if (value > maxValue)
maxValue = value;
}
#endif
for (; c < channel; ++c) {
float value = srcY[c];
if (value > maxValue)
maxValue = value;
}
}
for (int c = 0; c < channel; ++c) {
dstY[c] = -srcY[c] + maxValue;
}
}
}
MNN_CONCURRENCY_END();
//Exp
auto schedule = ((CPUBackend*)backend())->multiThreadDivide(channel * outside);
int sizeDivide = schedule.first;
int scheduleNumber = schedule.second;
MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
int start = sizeDivide * (int)tId;
int realSize = sizeDivide;
if (tId == scheduleNumber -1 ) {
realSize = channel * outside - start;
}
if (realSize > 0) {
MNNExp(dstData + start, dstData + start, realSize);
}
}
MNN_CONCURRENCY_END();
// Sum and div
MNN_CONCURRENCY_BEGIN(tId, threadNum);
{
float *dstY = dstData + tId * channel;
for (int y = (int)tId; y < outside; y += threadNum, dstY += channel * threadNum) {
// sum
float sumValue = 0;
for (int c = 0; c < channel; ++c) {
sumValue += dstY[c];
}
// div
{
int c = 0;
#ifdef MNN_USE_NEON
float div = 1.f / sumValue;
for (; c + 3 < channel; c += 4) {
vst1q_f32(dstY + c, vmulq_n_f32(vld1q_f32(dstY + c), div));
}
#endif
for (; c < channel; ++c) {
dstY[c] /= sumValue;
}
}
}
}
MNN_CONCURRENCY_END();
return 0;
}
int CPUSoftmax::_softmaxCommon(const float *srcData, float *dstData, int inside, int outside, int channel,
float *maxValue, float *sumValue, int threadNum) {
if (inside == 1)
return _softmax1(srcData, dstData, outside, channel, threadNum);
const int stepY = inside * channel;
MNN_CONCURRENCY_BEGIN(tId, threadNum);
{
const float *srcY = srcData + tId * stepY;
float *dstY = dstData + tId * stepY;
float *maxValueSub = maxValue + tId * inside;
for (int y = (int)tId; y < outside; y += threadNum, srcY += stepY * threadNum, dstY += stepY * threadNum) {
memcpy(maxValueSub, srcY, sizeof(float) * inside);
const float *src = srcY + inside;
for (int c = 1; c < channel; ++c, src += inside) {
for (int x = 0; x < inside; ++x) {
if (src[x] > maxValueSub[x])
maxValueSub[x] = src[x];
}
}
src = srcY;
float *dst = dstY;
for (int c = 0; c < channel; ++c, src += inside, dst += inside) {
for (int x = 0; x < inside; ++x) {
dst[x] = -src[x] + maxValueSub[x];
}
}
}
}
MNN_CONCURRENCY_END();
auto totalSize = channel * inside * outside;
//Exp
auto schedule = ((CPUBackend*)backend())->multiThreadDivide(totalSize);
int sizeDivide = schedule.first;
int scheduleNumber = schedule.second;
MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
int start = sizeDivide * (int)tId;
int realSize = sizeDivide;
if (tId == scheduleNumber -1 ) {
realSize = totalSize - start;
}
if (realSize > 0) {
MNNExp(dstData + start, dstData + start, realSize);
}
}
MNN_CONCURRENCY_END();
MNN_CONCURRENCY_BEGIN(tId, threadNum);
{
const float *srcY = srcData + tId * stepY;
float *dstY = dstData + tId * stepY;
float *sumValueSub = sumValue + tId * inside;
for (int y = (int)tId; y < outside; y += threadNum, srcY += stepY * threadNum, dstY += stepY * threadNum) {
memset(sumValueSub, 0, sizeof(float) * inside);
float *dst = dstY;
for (int c = 0; c < channel; ++c, dst += inside) {
for (int x = 0; x < inside; ++x) {
sumValueSub[x] += dst[x];
}
}
dst = dstY;
for (int c = 0; c < channel; ++c, dst += inside) {
for (int x = 0; x < inside; ++x) {
dst[x] /= sumValueSub[x];
}
}
}
}
MNN_CONCURRENCY_END();
return 0;
}
ErrorCode CPUSoftmax::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
auto input = inputs[0];
const int dimensions = input->buffer().dimensions;
const auto layout = TensorUtils::getDescribe(input)->dimensionFormat;
mNeedUnpackC4 = layout == MNN_DATA_FORMAT_NC4HW4;
if (mNeedUnpackC4) {
int totalSize = 1;
for (int i = 1; i < dimensions; ++i) {
totalSize *= input->length(i);
}
mStorage.buffer().dim[0].extent = input->length(0);
mStorage.buffer().dim[1].extent = totalSize;
TensorUtils::getDescribe(&mStorage)->dimensionFormat = MNN_DATA_FORMAT_NHWC;
mStorage.buffer().dimensions = 2;
mStorage.buffer().type = input->getType();
backend()->onAcquireBuffer(&mStorage, Backend::DYNAMIC);
}
int inside = 1;
int dims = input->buffer().dimensions;
for (int i = mAxis + 1; i < dims; ++i) {
inside *= input->length(i);
}
if (inside != 1) { // not run _softmax1, we need maxValue Tensor and sumValue Tensor.
int threadNum = ((CPUBackend *)backend())->threadNumber();
mMaxValue.buffer().dim[0].extent = inside * threadNum;
mMaxValue.buffer().dimensions = 1;
mMaxValue.setType(DataType_DT_FLOAT);
backend()->onAcquireBuffer(&mMaxValue, Backend::DYNAMIC);
mSumValue.buffer().dim[0].extent = inside * threadNum;
mSumValue.buffer().dimensions = 1;
mSumValue.setType(DataType_DT_FLOAT);
backend()->onAcquireBuffer(&mSumValue, Backend::DYNAMIC);
backend()->onReleaseBuffer(&mMaxValue, Backend::DYNAMIC);
backend()->onReleaseBuffer(&mSumValue, Backend::DYNAMIC);
}
if (mNeedUnpackC4) {
backend()->onReleaseBuffer(&mStorage, Backend::DYNAMIC);
}
return NO_ERROR;
}
ErrorCode CPUSoftmax::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
MNN_ASSERT(1 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto inputTensor = inputs[0];
auto outputTensor = outputs[0];
const auto inputDataPtr = inputTensor->host<float>();
auto outputDataPtr = outputTensor->host<float>();
const int batch = inputTensor->batch();
const auto dims = inputTensor->buffer().dimensions;
float *tempData = nullptr;
if (mNeedUnpackC4) {
tempData = mStorage.host<float>();
}
int areaInput = 1;
for (int i = 2; i < dims; ++i) {
areaInput *= inputTensor->length(i);
}
int inside = 1;
int outside = 1;
int channel = 1;
for (int i = 0; i < mAxis; ++i) {
outside *= inputTensor->length(i);
}
channel = inputTensor->length(mAxis);
for (int i = mAxis + 1; i < dims; ++i) {
inside *= inputTensor->length(i);
}
int threadNum = ((CPUBackend *)backend())->threadNumber();
if (!mNeedUnpackC4) {
_softmaxCommon(inputDataPtr, outputDataPtr, inside, outside, channel, mMaxValue.host<float>(),
mSumValue.host<float>(), threadNum);
return NO_ERROR;
}
auto outputSize = outputTensor->elementSize();
int batchSize = outputSize / batch;
for (int batchIndex = 0; batchIndex < batch; ++batchIndex) {
auto inputData = inputDataPtr + batchIndex * batchSize;
MNNUnpackC4(outputDataPtr + batchIndex * mStorage.length(1), inputData, areaInput, inputTensor->channel());
}
_softmaxCommon(outputDataPtr, tempData, inside, outside, channel, mMaxValue.host<float>(), mSumValue.host<float>(), threadNum);
for (int batchIndex = 0; batchIndex < batch; ++batchIndex) {
auto outputData = outputDataPtr + batchIndex * batchSize;
auto tempPtr = tempData + batchIndex * mStorage.length(1);
MNNPackC4(outputData, tempPtr, areaInput, outputTensor->channel());
}
return NO_ERROR;
}
CPUSoftmax::CPUSoftmax(Backend *b, int axis) : MNN::Execution(b), mAxis(axis), mStorage(2), mNeedUnpackC4(false) {
// nothing to do
}
class CPUSoftmaxCreator : public CPUBackend::Creator {
public:
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
const MNN::Op *op, Backend *backend) const override {
auto axis = op->main_as_Axis()->axis();
if (axis < 0) {
axis = inputs[0]->dimensions() + axis;
}
return new CPUSoftmax(backend, axis);
}
};
REGISTER_CPU_OP_CREATOR(CPUSoftmaxCreator, OpType_Softmax);
} // namespace MNN

View File

@ -0,0 +1,35 @@
//
// CPUSoftmax.hpp
// MNN
//
// Created by MNN on 2018/07/16.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef CPUSoftmax_hpp
#define CPUSoftmax_hpp
#include "core/Execution.hpp"
namespace MNN {
class CPUSoftmax : public Execution {
public:
CPUSoftmax(Backend *b, int axis);
virtual ~CPUSoftmax() = default;
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
private:
int _softmaxCommon(const float *srcData, float *dstData, int inside, int outside, int channel, float *maxValue,
float *sumValue, int threadNum);
int _softmax1(const float *srcData, float *dstData, int outside, int channel, int threadNum);
int mAxis;
Tensor mStorage;
Tensor mMaxValue;
Tensor mSumValue;
bool mNeedUnpackC4;
};
} // namespace MNN
#endif /* CPUSoftmax_hpp */

View File

@ -13,10 +13,8 @@
#include "backend/cpu/compute/ConvOpt.h"
#include "core/Macro.h"
#include "core/TensorUtils.hpp"
#include "math/Vec4.hpp"
using namespace MNN::Math;
typedef Vec4 float4;
#include "math/Vec.hpp"
using Vec4 = MNN::Math::Vec<float, 4>;
#define SOURCE_BLOCK 64
#define WEIGHT_BLOCK 256

View File

@ -0,0 +1,128 @@
//
// GeometryCropAndResize.cpp
// MNN
//
// Created by MNN on 2020/08/5.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "geometry/GeometryComputer.hpp"
#include "core/OpCommonUtils.hpp"
#include "geometry/GeometryComputerUtils.hpp"
#include "ConvertUtils.hpp"
namespace MNN {
class GeometryCropAndResize : public GeometryComputer {
public:
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& res) const override {
MNN_ASSERT(4 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto img = inputs[0];
auto boxes = inputs[1];
auto box_ind = inputs[2];
auto crop_size = inputs[3];
auto output = outputs[0];
auto extrapolation = op->main_as_CropAndResize()->extrapolationValue();
auto method = op->main_as_CropAndResize()->method();
// resizeType of Interp : 1-NEAREST, 2-BILINEAR
const int resizeType = method == CropAndResizeMethod_BILINEAR ? 2 : 1;
int batch = img->length(0), ih = img->length(1), iw = img->length(2),
depth = img->length(3), boxNum = boxes->length(0);
const int cropHeight = crop_size->host<uint32_t>()[0],
cropWidth = crop_size->host<uint32_t>()[1];
auto des = TensorUtils::getDescribe(output);
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
des->dimensionFormat = MNN_DATA_FORMAT_NHWC;
des->regions.clear();
des->regions.reserve(boxNum);
for (int i = 0; i < boxNum; i++) {
const float y1 = boxes->host<float>()[i*4];
const float x1 = boxes->host<float>()[i*4+1];
const float y2 = boxes->host<float>()[i*4+2];
const float x2 = boxes->host<float>()[i*4+3];
const int ind = box_ind->host<uint32_t>()[i];
const float ch = (y2 - y1) * (ih - 1), cw = (x2 - x1) * (iw - 1);
const float yScale = ch / static_cast<float>(cropHeight - 1);
const float xScale = cw / static_cast<float>(cropWidth - 1);
const float yOffset = y1 * (ih - 1), xOffset = x1 * (iw - 1);
// select croped image from images, convert it's format from NHWC to NC4HW4
std::shared_ptr<Tensor> cropValue(new Tensor);
{
cropValue->buffer().type = halide_type_of<float>();
cropValue->buffer().dimensions = 4;
cropValue->setLength(0, 1);
cropValue->setLength(1, depth);
cropValue->setLength(2, ih);
cropValue->setLength(3, iw);
auto des = TensorUtils::getDescribe(cropValue.get());
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
des->regions.clear();
Tensor::InsideDescribe::Region region;
region.origin = img;
region.size[1] = depth;
region.size[2] = ih * iw;
region.src.offset = ind * ih * iw * depth;
region.dst.offset = 0;
region.src.stride[1] = 1;
region.src.stride[2] = depth;
region.dst.stride[1] = ih * iw;
region.dst.stride[2] = 1;
des->regions.emplace_back(std::move(region));
res.extras.emplace_back(cropValue);
}
// using Interp Op deal with crop and resize for selected image
std::shared_ptr<Tensor> resizeValue;
{
resizeValue.reset(Tensor::createDevice<float>({1, depth, cropHeight, cropWidth}));
auto des = TensorUtils::getDescribe(resizeValue.get());
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
std::unique_ptr<OpT> interp(new OpT);
interp->type = OpType_Interp;
interp->main.type = OpParameter_Interp;
interp->main.value = new InterpT;
interp->main.AsInterp()->widthScale = xScale;
interp->main.AsInterp()->heightScale = yScale;
interp->main.AsInterp()->widthOffset = xOffset;
interp->main.AsInterp()->heightOffset = yOffset;
interp->main.AsInterp()->alignCorners = false;
interp->main.AsInterp()->resizeType = resizeType;
auto cmd = GeometryComputerUtils::makeCommand(interp.get(), {cropValue.get()}, {resizeValue.get()});
res.extras.emplace_back(resizeValue);
res.command.emplace_back(cmd);
}
// convert resize image's format from NC4HW4 to NHWC, add it to output's batch
{
Tensor::InsideDescribe::Region region;
region.origin = resizeValue.get();
region.size[1] = cropHeight * cropWidth;
region.size[2] = depth;
region.src.offset = 0;
region.dst.offset = i * cropHeight * cropWidth * depth;
region.src.stride[1] = 1;
region.src.stride[2] = cropHeight * cropWidth;
region.dst.stride[1] = depth;
region.dst.stride[2] = 1;
des->regions.emplace_back(std::move(region));
}
}
return true;
}
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
//return {false};
return {true};
}
};
static void _create() {
std::shared_ptr<GeometryComputer> comp(new GeometryCropAndResize);
// GeometryComputer::registerGeometryComputer(comp, {OpType_CropAndResize});
}
REGISTER_GEOMETRY(GeometryCropAndResize, _create);
} // namespace MNN

View File

@ -0,0 +1,304 @@
//
// GeometryGather.cpp
// MNN
//
// Created by MNN on 2020/06/09.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "geometry/GeometryComputer.hpp"
#include "core/OpCommonUtils.hpp"
namespace MNN {
class GeometryGather : public DefaultGeometryComputer {
public:
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
MNN_ASSERT(inputs.size() == 2);
MNN_ASSERT(1 == outputs.size());
auto embedding = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
const int firstDimStride = embedding->buffer().dim[0].stride;
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && firstDimStride != 0) {
std::vector<bool> res(outputs.size(), true);
return res;
}
return std::vector<bool>(outputs.size(), false);
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
MNN_ASSERT(2 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto embedding = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
const int firstDimStride = embedding->buffer().dim[0].stride;
if (TensorUtils::getDescribe(indices)->usage != MNN::Tensor::InsideDescribe::CONSTANT || firstDimStride == 0) {
Command cmd;
cmd.op = op;
cmd.inputs = std::move(inputs);
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
auto bytes = embedding->buffer().type.bytes();
const size_t indicesCount = indices->elementSize();
const auto limit = embedding->length(0);
const int* indicesData = indices->host<int32_t>();
auto outputDes = TensorUtils::getDescribe(output);
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
for (int i = 0; i < indicesCount; i++) {
if (indicesData[i] < 0 || indicesData[i] > limit) {
MNN_PRINT("Gather indice error\n");
return false;
}
Tensor::InsideDescribe::Region slice;
slice.origin = embedding;
slice.size[0] = 1;
slice.size[1] = 1;
slice.size[2] = firstDimStride;
slice.src.offset = firstDimStride * indicesData[i];
slice.dst.offset = i * firstDimStride;
slice.src.stride[0] = 1;
slice.src.stride[1] = 1;
slice.src.stride[2] = 1;
slice.dst.stride[0] = 1;
slice.dst.stride[1] = 1;
slice.dst.stride[2] = 1;
outputDes->regions.emplace_back(std::move(slice));
}
return true;
}
};
class GeometryGatherND : public DefaultGeometryComputer {
public:
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
MNN_ASSERT(inputs.size() == 2);
MNN_ASSERT(1 == outputs.size());
auto params = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
int mSliceN = 1;
int mSliceSize = 1;
for (int i = 0; i < indices->dimensions() - 1; ++i) {
mSliceN *= indices->length(i);
}
auto indiceNd = indices->length(indices->dimensions() - 1);
std::vector<int> mDimsToCount;
mDimsToCount.resize(indiceNd);
for (int i = indiceNd; i < params->dimensions(); ++i) {
mSliceSize *= params->length(i);
}
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && mSliceSize != 0) {
std::vector<bool> res(outputs.size(), true);
return res;
} else {
std::vector<bool> res(outputs.size(), false);
return res;
}
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
MNN_ASSERT(2 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto params = inputs[0];
auto indice = inputs[1];
auto output = outputs[0];
int mSliceN = 1;
int mSliceSize = 1;
for (int i = 0; i < indice->dimensions() - 1; ++i) {
mSliceN *= indice->length(i);
}
auto indiceNd = indice->length(indice->dimensions() - 1);
std::vector<int> mDimsToCount;
mDimsToCount.resize(indiceNd);
for (int i = indiceNd; i < params->dimensions(); ++i) {
mSliceSize *= params->length(i);
}
if (TensorUtils::getDescribe(indice)->usage != MNN::Tensor::InsideDescribe::CONSTANT || mSliceSize == 0) {
Command cmd;
cmd.op = op;
cmd.inputs = std::move(inputs);
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
auto paramSize = params->elementSize();
for (int i = 0; i < indiceNd; ++i) {
mDimsToCount[i] = paramSize / params->length(i);
paramSize = mDimsToCount[i];
}
mDimsToCount.resize(indiceNd);
auto indiceData = indice->host<int32_t>();
auto outputDes = TensorUtils::getDescribe(output);
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
for (int i = 0; i < mSliceN; i++) {
int fromPos = 0;
for (int j = 0; j < indiceNd; ++j) {
fromPos += mDimsToCount[j] * indiceData[i * indiceNd + j];
}
Tensor::InsideDescribe::Region slice;
slice.origin = params;
slice.size[0] = 1;
slice.size[1] = 1;
slice.size[2] = mSliceSize;
slice.src.offset = fromPos;
slice.dst.offset = i * mSliceSize;
slice.src.stride[0] = 1;
slice.src.stride[1] = 1;
slice.src.stride[2] = 1;
slice.dst.stride[0] = 1;
slice.dst.stride[1] = 1;
slice.dst.stride[2] = 1;
outputDes->regions.emplace_back(std::move(slice));
}
return true;
}
};
class GeometryGatherV2 : public DefaultGeometryComputer {
public:
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
MNN_ASSERT(inputs.size() >= 2);
MNN_ASSERT(1 == outputs.size());
auto params = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
int axis = 0;
if (inputs.size() == 3) {
const Tensor* axisTensor = inputs[2];
axis = axisTensor->host<int32_t>()[0];
}
MNN_ASSERT(axis > -params->buffer().dimensions && axis < params->buffer().dimensions);
if (axis < 0) {
axis = params->buffer().dimensions + axis;
}
const int gatherDimSize = params->buffer().dim[axis].extent;
const int N = indices->elementSize();
MNN_ASSERT(gatherDimSize <= std::numeric_limits<int32_t>::max());
int inside = 1;
for (int i = axis + 1; i < params->dimensions(); ++i) {
inside *= params->length(i);
}
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && inside != 0) {
std::vector<bool> res(outputs.size(), true);
return res;
}
return std::vector<bool>(outputs.size(), false);
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
MNN_ASSERT(inputs.size() >= 2);
MNN_ASSERT(1 == outputs.size());
auto params = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
int axis = 0;
if (inputs.size() == 3) {
const Tensor* axisTensor = inputs[2];
axis = axisTensor->host<int32_t>()[0];
}
MNN_ASSERT(axis > -params->buffer().dimensions && axis < params->buffer().dimensions);
if (axis < 0) {
axis = params->buffer().dimensions + axis;
}
const int gatherDimSize = params->buffer().dim[axis].extent;
const int N = indices->elementSize();
MNN_ASSERT(gatherDimSize <= std::numeric_limits<int32_t>::max());
int inside = 1;
int outside = 1;
for (int i = 0; i < axis; ++i) {
outside *= params->length(i);
}
for (int i = axis + 1; i < params->dimensions(); ++i) {
inside *= params->length(i);
}
if (TensorUtils::getDescribe(indices)->usage != MNN::Tensor::InsideDescribe::CONSTANT || inside == 0) {
Command cmd;
cmd.op = op;
cmd.inputs = std::move(inputs);
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
const int limit = params->length(axis);
auto bytes = output->buffer().type.bytes();
const int insideStride = inside;
const int outputOutsideStride = inside * N;
const int inputOutsideStride = inside * inputs[0]->length(axis);
const int* indicesPtr = indices->host<int32_t>();
auto outputDes = TensorUtils::getDescribe(output);
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
for (int o = 0; o < outside; ++o) {
for (int i = 0; i < N; i++) {
if (indicesPtr[i] < 0 || indicesPtr[i] > limit) {
continue;
}
Tensor::InsideDescribe::Region slice;
slice.origin = params;
slice.size[0] = 1;
slice.size[1] = 1;
slice.size[2] = insideStride;
slice.src.offset = inputOutsideStride * o + insideStride * indicesPtr[i];
slice.dst.offset = outputOutsideStride * o + i * insideStride;
slice.src.stride[0] = 1;
slice.src.stride[1] = 1;
slice.src.stride[2] = 1;
slice.dst.stride[0] = 1;
slice.dst.stride[1] = 1;
slice.dst.stride[2] = 1;
outputDes->regions.emplace_back(std::move(slice));
}
}
return true;
}
};
static void _create() {
// std::shared_ptr<GeometryComputer> comp(new GeometryGather);
// GeometryComputer::registerGeometryComputer(comp, {OpType_Gather});
//
// std::shared_ptr<GeometryComputer> comp2(new GeometryGatherND);
// GeometryComputer::registerGeometryComputer(comp2, {OpType_GatherND});
//
// std::shared_ptr<GeometryComputer> comp3(new GeometryGatherV2);
// GeometryComputer::registerGeometryComputer(comp3, {OpType_GatherV2});
}
REGISTER_GEOMETRY(GeometryGather, _create);
} // namespace MNN

View File

@ -0,0 +1,214 @@
//
// GeometrySoftmax.cpp
// MNN
//
// Created by MNN on 2020/06/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "geometry/GeometryComputer.hpp"
#include "core/OpCommonUtils.hpp"
#include "geometry/GeometryComputerUtils.hpp"
namespace MNN {
class GeometrySoftmax : public GeometryComputer {
public:
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
auto axis = op->main_as_Axis()->axis();
if (axis < 0) {
axis = inputs[0]->dimensions() + axis;
}
if (axis == 1) {
return std::vector<bool>(outputs.size(), false);
}
return std::vector<bool>(outputs.size(), true);
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& res) const override {
MNN_ASSERT(1 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto input = inputs[0];
auto output = outputs[0];
auto dims = input->buffer().dimensions;
auto axis = op->main_as_Axis()->axis();
if (axis < 0) {
axis = inputs[0]->dimensions() + axis;
}
if (axis == 1) {
Command cmd;
cmd.op = op;
cmd.inputs = std::move(inputs);
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
int inside = 1;
int outside = 1;
int channel = 1;
for (int i = 0; i < axis; ++i) {
outside *= input->length(i);
}
channel = input->length(axis);
for (int i = axis + 1; i < dims; ++i) {
inside *= input->length(i);
}
//input transform to NCHW format
std::shared_ptr<Tensor> tmpInput;
{
tmpInput.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto outputDes = TensorUtils::getDescribe(tmpInput.get());
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region desReg;
desReg.size[0] = outside;
desReg.size[1] = channel;
desReg.size[2] = inside;
desReg.dst.offset = 0;
desReg.dst.stride[0] = channel*inside;
desReg.dst.stride[1] = inside;
desReg.dst.stride[2] = 1;
desReg.src.offset = 0;
desReg.src.stride[0] = channel*inside;
desReg.src.stride[1] = inside;
desReg.src.stride[2] = 1;
desReg.origin = input;
outputDes->regions.emplace_back(std::move(desReg));
res.extras.emplace_back(tmpInput);
}
//reduction max, axis=1
std::shared_ptr<Tensor> maxValue;
{
maxValue.reset(Tensor::createDevice<float>({outside, 1, inside}));
res.extras.emplace_back(maxValue);
res.command.emplace_back(GeometryComputerUtils::makeReduce(ReductionType_MAXIMUM, tmpInput.get(), maxValue.get()));
}
//broadcast reduction axis dim
std::shared_ptr<Tensor> maxBroadValue;
{
maxBroadValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto outputDes = TensorUtils::getDescribe(maxBroadValue.get());
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region desReg;
desReg.size[0] = outside;
desReg.size[1] = channel;
desReg.size[2] = inside;
desReg.dst.offset = 0;
desReg.dst.stride[0] = channel*inside;
desReg.dst.stride[1] = inside;
desReg.dst.stride[2] = 1;
desReg.src.offset = 0;
desReg.src.stride[0] = inside;
desReg.src.stride[1] = 0;
desReg.src.stride[2] = 1;
desReg.origin = maxValue.get();
outputDes->regions.emplace_back(std::move(desReg));
res.extras.emplace_back(maxBroadValue);
}
//sub
std::shared_ptr<Tensor> subMaxValue;
{
subMaxValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto cmd = GeometryComputerUtils::makeBinary(BinaryOpOperation_SUB, tmpInput.get(), maxBroadValue.get(), subMaxValue.get());
res.extras.emplace_back(subMaxValue);
res.command.emplace_back(std::move(cmd));
}
//exp
std::shared_ptr<Tensor> expValue;
{
expValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto cmd = GeometryComputerUtils::makeUnary(UnaryOpOperation_EXP, subMaxValue.get(), expValue.get());
res.extras.emplace_back(expValue);
res.command.emplace_back(std::move(cmd));
}
//reduction sum, axis=2, only support NCHW
std::shared_ptr<Tensor> sumValue;
{
sumValue.reset(Tensor::createDevice<float>({outside, 1, inside}));
res.extras.emplace_back(sumValue);
res.command.emplace_back(GeometryComputerUtils::makeReduce(ReductionType_SUM, expValue.get(), sumValue.get()));
}
//broadcast reduction axis dim
std::shared_ptr<Tensor> sumBroadValue;
{
sumBroadValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto outputDes = TensorUtils::getDescribe(sumBroadValue.get());
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region desReg;
desReg.size[0] = outside;
desReg.size[1] = channel;
desReg.size[2] = inside;
desReg.dst.offset = 0;
desReg.dst.stride[0] = channel*inside;
desReg.dst.stride[1] = inside;
desReg.dst.stride[2] = 1;
desReg.src.offset = 0;
desReg.src.stride[0] = inside;
desReg.src.stride[1] = 0;
desReg.src.stride[2] = 1;
desReg.origin = sumValue.get();
outputDes->regions.emplace_back(std::move(desReg));
res.extras.emplace_back(sumBroadValue);
}
//div
std::shared_ptr<Tensor> tmpOutput;
{
tmpOutput.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto cmd = GeometryComputerUtils::makeBinary(BinaryOpOperation_REALDIV, expValue.get(), sumBroadValue.get(), tmpOutput.get());
res.extras.emplace_back(tmpOutput);
res.command.emplace_back(std::move(cmd));
}
//transform to output
{
auto outputDes = TensorUtils::getDescribe(output);
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region desReg;
desReg.size[0] = outside;
desReg.size[1] = channel;
desReg.size[2] = inside;
desReg.dst.offset = 0;
desReg.dst.stride[0] = channel*inside;
desReg.dst.stride[1] = inside;
desReg.dst.stride[2] = 1;
desReg.src.offset = 0;
desReg.src.stride[0] = channel*inside;
desReg.src.stride[1] = inside;
desReg.src.stride[2] = 1;
desReg.origin = tmpOutput.get();
outputDes->regions.emplace_back(std::move(desReg));
}
return true;
}
};
static void _create() {
// std::shared_ptr<GeometryComputer> comp(new GeometrySoftmax);
// GeometryComputer::registerGeometryComputer(comp, {OpType_Softmax});
}
REGISTER_GEOMETRY(GeometrySoftmax, _create);
} // namespace MNN

View File

@ -7,7 +7,7 @@ add_executable(benchmarkExprModels.out ${CMAKE_CURRENT_LIST_DIR}/benchmarkExprMo
target_include_directories(benchmarkExprModels.out PRIVATE "${CMAKE_CURRENT_LIST_DIR}/exprModels" ${CMAKE_CURRENT_SOURCE_DIR}/)
target_link_libraries(benchmarkExprModels.out ${MNN_DEPS})
if ((MSVC OR WIN32) AND NOT MNN_BUILD_SHARED_LIBS)
if (MSVC AND NOT MNN_BUILD_SHARED_LIBS)
foreach (DEPEND ${MNN_DEPS})
target_link_options(benchmark.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>)
target_link_options(benchmarkExprModels.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>)

View File

@ -124,6 +124,7 @@ std::vector<float> doBench(Model& model, int loop, int warmup = 10, int forward
const auto bufferSize = revertor->getBufferSize();
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromBuffer(modelBuffer, bufferSize));
revertor.reset();
net->setSessionMode(MNN::Interpreter::Session_Release);
MNN::ScheduleConfig config;
config.numThread = numberThread;
config.type = static_cast<MNNForwardType>(forward);

View File

@ -90,6 +90,7 @@ static std::vector<float> runNet(VARP netOutput, const ScheduleConfig& config, i
const void* buf = builder.GetBufferPointer();
size_t size = builder.GetSize();
std::unique_ptr<Interpreter> net(Interpreter::createFromBuffer(buf, size));
net->setSessionMode(MNN::Interpreter::Session_Release);
auto session = net->createSession(config);
net->releaseModel();
auto inputTensor = net->getSessionInput(session, NULL);

View File

@ -1,84 +0,0 @@
import os
import sys
major_py_ver = sys.version_info.major
def convert_string_to_hex_list(code_str):
hex_list = []
for i in range(len(code_str)):
hex_ = hex(ord(code_str[i]))
hex_list.append(hex_)
return hex_list
def opencl_codegen():
cl_kernel_dir = sys.argv[1]
output_path = sys.argv[2]
print("Generating OpenCL Kernels in "+cl_kernel_dir+" to "+output_path)
if not os.path.exists(cl_kernel_dir):
print(cl_kernel_dir + " doesn't exist!")
#common.h
common_header_code = ""
#quantized_common.h
quantized_common_header_code = ""
#activation_common.h
activation_common_header_code = ""
for file_name in os.listdir(cl_kernel_dir):
file_path = os.path.join(cl_kernel_dir, file_name)
if file_path[-2:] == ".h" and file_name[:-2] == "quantized_common":
with open(file_path, "r") as f:
quantized_common_header_code += f.read()
elif file_path[-2:] == ".h" and file_name[:-2] == "activation_common":
with open(file_path, "r") as f:
activation_common_header_code += f.read()
opencl_code_maps = {}
for file_name in os.listdir(cl_kernel_dir):
file_path = os.path.join(cl_kernel_dir, file_name)
if file_path[-3:] == ".cl":
with open(file_path, "r") as f:
code_str = ""
for line in f.readlines():
if "#include <activation_common.h>" in line:
code_str += common_header_code
code_str += activation_common_header_code
elif "#include <quantized_common.h>" in line:
code_str += common_header_code
code_str += quantized_common_header_code
elif "#include <common.h>" in line:
code_str += common_header_code
else:
code_str += line
opencl_code_maps[file_name[:-3]] = convert_string_to_hex_list(code_str)
#source model
opencl_source_map = "#include <map> \n"
opencl_source_map += "#include <string> \n"
opencl_source_map += "#include <vector> \n"
opencl_source_map += "namespace MNN { \n"
opencl_source_map += "extern const std::map<std::string, std::vector<unsigned char>> OpenCLProgramMap = \n { \n"
if major_py_ver == 2:
items = opencl_code_maps.iteritems()
else:
items = opencl_code_maps.items()
for file_name, file_source in items:
opencl_source_map += "{\n \""
opencl_source_map += file_name
opencl_source_map += "\", \n"
opencl_source_map += " { "
for source_hex in file_source:
opencl_source_map += source_hex
opencl_source_map += ","
opencl_source_map += " } "
opencl_source_map += "\n }, \n"
opencl_source_map += " }; \n"
opencl_source_map += "} \n"
with open(output_path, "w") as w_file:
w_file.write(opencl_source_map)
print("Generate OpenCL Source done !!! \n")
if __name__ == '__main__':
opencl_codegen()

140
ciscripts/build.sh Normal file
View File

@ -0,0 +1,140 @@
#!/usr/bin/env bash
. ./parse_options.sh || exit 1;
CMAKE=cmake
MAKE=make
ANDROID_NDK=/home/android-ndk-r18b
BUILD_ROOT=`pwd`
# Clean the exist directory other than remove it in order to solve
# the problem "Current working directory cannot be established".
function make_or_clean_dir {
if [ -d $1 ]; then
rm -rf $1/*
else
mkdir $1
fi
}
function build_arm_android_32 {
make_or_clean_dir build_arm_android_32 && cd build_arm_android_32
$CMAKE ../.. \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_ABI="armeabi-v7a" \
-DANDROID_STL=c++_static \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_NATIVE_API_LEVEL=android-21 \
-DANDROID_TOOLCHAIN=clang \
-DMNN_USE_LOGCAT=true \
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
-DNATIVE_LIBRARY_OUTPUT=. \
-DNATIVE_INCLUDE_OUTPUT=. \
-DMNN_VULKAN=$USE_VULKAN \
-DMNN_OPENCL=$USE_OPENCL \
-DMNN_OPENGL=$USE_OPENGL \
-DMNN_USE_THREAD_POOL=$USE_THREAD_POOL || exit 1;
$MAKE -j $build_threads || exit 1;
cd $BUILD_ROOT; true;
}
function build_arm_android_64 {
make_or_clean_dir build_arm_android_64 && cd build_arm_android_64
$CMAKE ../.. \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_ABI="arm64-v8a" \
-DANDROID_STL=c++_static \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_NATIVE_API_LEVEL=android-21 \
-DANDROID_TOOLCHAIN=clang \
-DMNN_USE_LOGCAT=true \
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
-DNATIVE_LIBRARY_OUTPUT=. \
-DNATIVE_INCLUDE_OUTPUT=. \
-DMNN_ARM82=ON \
-DMNN_VULKAN=$USE_VULKAN \
-DMNN_OPENCL=$USE_OPENCL \
-DMNN_OPENGL=$USE_OPENGL \
-DMNN_USE_THREAD_POOL=$USE_THREAD_POOL || exit 1;
$MAKE -j $build_threads || exit 1;
cd $BUILD_ROOT; true;
}
function build_arm_linux_32 {
cd $BUILD_ROOT; true;
}
function build_arm_linux_64 {
cd $BUILD_ROOT; true;
}
function build_x86_linux {
make_or_clean_dir build_x86_linux && cd build_x86_linux
$CMAKE ../.. \
-DCMAKE_BUILD_TYPE=Release \
-DMNN_BUILD_TRAIN=ON \
-DMNN_SEP_BUILD=OFF \
-DMNN_BUILD_DEMO=ON \
-DMNN_BUILD_QUANTOOLS=ON \
-DMNN_EVALUATION=ON \
-DMNN_BUILD_CONVERTER=ON \
-DMNN_SUPPORT_TFLITE_QUAN=ON \
-DMNN_BUILD_TEST=ON \
-DMNN_OPENCL=$USE_OPENCL \
-DMNN_VULKAN=$USE_VULKAN \
-DMNN_OPENMP=$USE_OPENMP \
-DMNN_USE_THREAD_POOL=OFF \
-DMNN_BUILD_BENCHMARK=ON || exit 1;
$MAKE -j $build_threads || exit 1;
cd $BUILD_ROOT; true;
}
function build_all {
build_arm_android_32 || exit 1;
build_arm_android_64 || exit 1;
build_arm_linux_32 || exit 1;
build_arm_linux_64 || exit 1;
build_x86_linux || exit 1;
true;
}
function clean {
rm -rf build_arm_android_32
rm -rf build_arm_android_64
rm -rf build_arm_linux_32
rm -rf build_arm_linux_64
rm -rf build_x86_linux
}
function build {
case $platform in
"arm_linux_32")
build_arm_linux_32 || exit 1;
;;
"arm_linux_64")
build_arm_linux_64 || exit 1;
;;
"x86_linux")
build_x86_linux || exit 1;
;;
"arm_android_32")
build_arm_android_32 || exit 1;
;;
"arm_android_64")
build_arm_android_64 || exit 1;
;;
"all")
build_all || exit 1;
;;
*) echo "Invalid platform: $platform" && exit 1;
esac
}
if [ $clean == 1 ]; then
clean
else
build $@
fi
true;

113
ciscripts/parse_options.sh Normal file
View File

@ -0,0 +1,113 @@
#!/usr/bin/env bash
# Valid platform:
# - arm_android_32
# - arm_android_64
# - arm_linux_32
# - arm_linux_64
# - x86_linux
platform="all"
# Option to build with opencl.
use_opencl=0
# Option to build with opengl.
use_opengl=0
# Option to build with vulkan.
use_vulkan=0
# Option to build with openmp multithreads library.
use_openmp=0
build_threads=1
# Option to clear the build history.
clean=0
USE_OPENCL=OFF
USE_VULKAN=OFF
USE_OPENGL=OFF
USE_OPENMP=OFF
USE_THREAD_POOL=ON
function print_usage {
echo -e "Usgae: ./build.sh"
echo -e " --platform=x: Specify build platform x. "
echo -e " All valid platforms are \"arm_android_32\", \"arm_android_64\",
\"arm_linux_32\", \"arm_linux_64\", \"x86_linux\", \"all\"."
echo -e " The default is \"all\"."
echo -e " --use_openmp=true|false: Build with openmp or not."
echo -e " The default is false."
echo -e " --use_opencl=true|false: Build with opencl or not."
echo -e " The default is false."
echo -e " --use_opengl=true|false: Build with opengl or not."
echo -e " The default is false."
echo -e " --use_vulkan=true|false: Build with vulkan or not."
echo -e " The default is false."
echo -e " --job=n: Build with n threads. Default is 1."
}
function parse_platform {
platform=`echo "$1" | awk -F '=' '{print $2}'`
}
function parse_nthreads {
build_threads=`echo "$1" | awk -F '=' '{print $2}'`
}
function parse_bool {
val=`echo "$1" | awk -F '=' '{print $2}'`
if [ $val == "true" ] || [ $val == "1" ]; then
return 1;
else
return 0;
fi
}
[ -z "${1:-}" ] && print_usage && exit 1;
while true; do
[ -z "${1:-}" ] && break;
case "$1" in
--platform=*) parse_platform "$1"; shift 1;
;;
--use_openmp=*) parse_bool "$1"; use_openmp=$?; shift 1;
;;
--use_openmp) use_openmp=true; shift 1;
;;
--use_opencl=*) parse_bool "$1"; use_opencl=$?; shift 1;
;;
--use_opencl) use_opencl=true; shift 1;
;;
--use_opengl=*) parse_bool "$1"; use_opengl=$?; shift 1;
;;
--use_opengl) use_opengl=true; shift 1;
;;
--use_vulkan=*) parse_bool "$1"; use_vulkan=$?; shift 1;
;;
--use_vulkan) use_vulkan=true; shift 1;
;;
--job=*) parse_nthreads "$1"; shift 1;
;;
clean) clean=1; shift 1;
;;
*) break;
esac
done
if [ $use_opencl == 1 ]; then
USE_OPENCL=ON
fi
if [ $use_opengl == 1 ]; then
USE_OPENGL=ON
fi
if [ $use_vulkan == 1 ]; then
USE_VULKAN=ON
fi
if [ $use_openmp == 1 ]; then
USE_OPENMP=ON
USE_THREAD_POOL=OFF
fi
true;

View File

@ -0,0 +1,3 @@
call "C:/Program Files (x86)/Microsoft Visual Studio/2017/BuildTools/VC/Auxiliary/Build/vcvars64.bat"
cmake -G "Ninja" -DCMAKE_BUILD_TYPE=Release ..
ninja

View File

@ -0,0 +1,3 @@
call "C:/Program Files (x86)/Microsoft Visual Studio/2017/BuildTools/VC/Auxiliary/Build/vcvars32.bat"
cmake -G "Ninja" -DCMAKE_BUILD_TYPE=Release ..
ninja

View File

@ -12,3 +12,9 @@ target_link_libraries(segment.out ${MNN_DEPS})
add_executable(expressDemo.out ${CMAKE_CURRENT_LIST_DIR}/expressDemo.cpp)
target_link_libraries(expressDemo.out ${MNN_DEPS})
add_executable(transformerDemo.out ${CMAKE_CURRENT_LIST_DIR}/transformerDemo.cpp)
target_link_libraries(transformerDemo.out ${MNN_DEPS})
add_executable(rasterDemo.out ${CMAKE_CURRENT_LIST_DIR}/rasterDemo.cpp)
target_link_libraries(rasterDemo.out ${MNN_DEPS})

View File

@ -53,7 +53,6 @@ int main(int argc, const char* argv[]) {
MNN_ERROR("Output Not valid\n");
return 0;
}
auto size = outputInfo->size;
//Test Speed
if (testTime > 0){
//Let the frequence up
@ -82,6 +81,7 @@ int main(int argc, const char* argv[]) {
}
{
auto size = outputInfo->size;
auto outputPtr = output->readMap<float>();
if (nullptr == outputPtr) {
MNN_ERROR("Output Not valid read error\n");

251
demo/exec/rasterDemo.cpp Normal file
View File

@ -0,0 +1,251 @@
//
// rasterDemo.cpp
// MNN
//
// Created by MNN on 2020/10/14.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <fstream>
#include <sstream>
#include <iostream>
#include <chrono>
#include <MNN/MNNDefine.h>
#include <MNN/Tensor.hpp>
#include <MNN/Interpreter.hpp>
#include "MNN_generated.h"
#include "core/TensorUtils.hpp"
#include "core/Execution.hpp"
#include "core/Backend.hpp"
#include "rapidjson/document.h"
#include "rapidjson/stringbuffer.h"
#include "rapidjson/writer.h"
using namespace MNN;
/*
1.Raster will do the index mapping like below:
for (region : regions)
src = region.src, dst = region.dst;
for (i = 0 -> size[0])
for (j = 0 -> size[1])
for (k = 0 -> size[2])
output[dst.offset + i * dst.stride[0] + j * dst.stride[1] + k * dst.stride[2]] =
region.origion[src.offset + i * src.stride[0] + j * src.stride[1] + k * src.stride[2]];
2. Raster Op has a input and a output, but the input is not the real input tensor, it's a
middle tensor whith VIRTUAL type that has many regions point to inputs tensors, like below.
input_0 --> region_0 --\
\
input_1 --> region_1 ---- middle ----> output
/
input_2 --> region_2 --/
3. This example read a json file and construct some Rasters and compute.
The input json file format is as below:
{
"inputs" : [
{
"id" : int,
"type" : "type_name", // float or int
"dims" : [int],
"data" : [int/float] // if null, fill with random number
}
],
"outputs" : [
// same with inputs
],
"regions" : [
{
"id" : int, // points to outputs
"size" : [int],
"src" : {
"offset" : int,
"stride" : [int]
},
"dst" : { // same with src },
"origin" : int // point to inputs
}
]
}
*/
static std::string runRaster(std::string jsonString, int runNum) {
srand(0);
rapidjson::Document document;
document.Parse(jsonString.c_str());
if (document.HasParseError()) {
MNN_ERROR("Invalid Json Format!\n");
return 0;
}
// prepare CPU backend
ScheduleConfig config;
config.type = MNN_FORWARD_CPU;
BackendConfig backendConfig;
backendConfig.precision = BackendConfig::Precision_High;
config.backendConfig = &backendConfig;
Backend::Info compute;
compute.type = config.type;
compute.numThread = config.numThread;
compute.user = config.backendConfig;
const RuntimeCreator* runtimeCreator(MNNGetExtraRuntimeCreator(compute.type));
std::unique_ptr<Runtime> runtime(runtimeCreator->onCreate(compute));
std::unique_ptr<Backend> backend(runtime->onCreate());
// build Op
std::unique_ptr<OpT> opt(new OpT);
opt->type = OpType_Raster;
flatbuffers::FlatBufferBuilder builder(1024);
builder.ForceDefaults(true);
auto len = Op::Pack(builder, opt.get());
builder.Finish(len);
auto buffer = builder.GetBufferPointer();
const Op* op = flatbuffers::GetMutableRoot<Op>(buffer);
// build tensors (NCHW) from json
std::vector<std::unique_ptr<Tensor>> inputs;
std::vector<std::unique_ptr<Tensor>> outputs;
auto readTensors = [&document, &backend](std::vector<std::unique_ptr<Tensor>>& tensors, const char* type) {
if (document.HasMember(type)) {
auto info = document[type].GetArray();
tensors.resize(info.Size());
for (auto iter = info.begin(); iter != info.end(); iter++) {
auto obj = iter->GetObject();
int id = obj["id"].GetInt();
tensors[id].reset(new Tensor(4));
auto tensor = tensors[id].get();
auto dataType = obj["type"].GetString();
bool isFloat = !strcmp(dataType, "float");
tensor->setType(isFloat ? DataType_DT_FLOAT : DataType_DT_INT32);
auto dims = obj["dims"].GetArray();
for (auto d = dims.begin(); d != dims.end(); d++) {
tensor->setLength(d - dims.begin(), d->GetInt());
}
TensorUtils::setLinearLayout(tensor);
backend->onAcquireBuffer(tensor, Backend::STATIC);
TensorUtils::getDescribe(tensor)->backend = backend.get();
auto data = obj["data"].GetArray();
if (!strcmp(type, "inputs")) {
bool hasData = data.Size() == tensor->elementSize();
auto dataIter = data.begin();
for (int i = 0; i < tensor->elementSize(); i++, dataIter++) {
if (isFloat) {
tensor->host<float>()[i] = hasData ? dataIter->GetFloat() : rand() % 10 / 10.0;
} else {
tensor->host<int>()[i] = hasData ? dataIter->GetInt() : rand() % 10;
}
}
}
}
}
};
readTensors(inputs, "inputs");
readTensors(outputs, "outputs");
// build middle tensors' region info from json
std::vector<std::unique_ptr<Tensor>> middles;
middles.resize(outputs.size());
if (document.HasMember("regions")) {
auto info = document["regions"].GetArray();
for (auto iter = info.begin(); iter != info.end(); iter++) {
auto obj = iter->GetObject();
int id = obj["id"].GetInt();
if (middles[id] == nullptr) {
middles[id].reset(new Tensor(4));
}
auto des = TensorUtils::getDescribe(middles[id].get());
des->memoryType = MNN::Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region region;
int origin = obj["origin"].GetInt();
region.origin = inputs[origin].get();
auto size = obj["size"].GetArray();
auto src = obj["src"].GetObject();
auto dst = obj["dst"].GetObject();
auto srcStride = src["stride"].GetArray();
auto dstStride = dst["stride"].GetArray();
for (int i = 0; i < 3; i++) {
region.size[i] = size[i].GetInt();
region.src.stride[i] = srcStride[i].GetInt();
region.dst.stride[i] = dstStride[i].GetInt();
}
region.src.offset = src["offset"].GetInt();
region.dst.offset = dst["offset"].GetInt();
des->regions.push_back(region);
}
}
// build execution of Raster and run them
for (int i = 0; i < outputs.size(); i++) {
std::vector<Tensor*> ins = {middles[i].get()}, outs = {outputs[i].get()};
std::unique_ptr<Execution> exe(backend->onCreate(ins, outs, op));
exe->onResize(ins, outs);
auto t1 = std::chrono::high_resolution_clock::now();
for (int j = 0; j < runNum; j++) {
exe->onExecute(ins, outs);
}
auto t2 = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
double time = time_span.count() * 1000.0 / runNum;
printf("For output_id = %d, run %d times, the average time is %f ms.\n", i, runNum, time);
}
auto writeTensors = [&document](std::vector<std::unique_ptr<Tensor>>& tensors, const char* type) {
auto info = document[type].GetArray();
for (auto iter = info.begin(); iter != info.end(); iter++) {
auto obj = iter->GetObject();
int id = obj["id"].GetInt();
auto data = obj["data"].GetArray();
if (data.Size() == tensors[id]->elementSize()) {
// has data, dont write
return;
}
bool isFloat = !strcmp(obj["type"].GetString(), "float");
data.Reserve(tensors[id]->elementSize(), document.GetAllocator());
for (int i = 0; i < tensors[id]->elementSize(); i++) {
if (isFloat) {
data.PushBack(tensors[id]->host<float>()[i], document.GetAllocator());
} else {
data.PushBack(tensors[id]->host<int>()[i], document.GetAllocator());
}
}
}
};
writeTensors(inputs, "inputs");
writeTensors(outputs, "outputs");
rapidjson::StringBuffer stringBuffer;
rapidjson::Writer<rapidjson::StringBuffer> writer(stringBuffer);
document.Accept(writer);
return stringBuffer.GetString();
}
int main(int argc, const char* argv[]) {
if (argc < 2) {
printf("Usage: ./rasterDemo.out input.json [output.json] [runNum]\ndefault output is input, and default runNum is 100.\n");
return 0;
}
const char* inputFile = argv[1];
const char* outputFile = argv[1];
int runNum = 100;
if (argc >= 3) {
outputFile = argv[2];
}
if (argc >= 4) {
runNum = ::atoi(argv[3]);
}
std::ifstream in(inputFile);
if (in.fail()) {
printf("Invalid input Json File!\n");
return 0;
}
std::ofstream out(outputFile);
if (out.fail()) {
printf("Invalid output Json File!\n");
return 0;
}
std::stringstream ss;
ss << in.rdbuf();
out << runRaster(ss.str(), runNum);
out.close();
printf("Run Raster Done!\n");
return 0;
}

View File

@ -0,0 +1,60 @@
#include <MNN/expr/Module.hpp>
#define MNN_OPEN_TIME_TRACE
#include <MNN/AutoTime.hpp>
#include <MNN/expr/ExprCreator.hpp>
#include <MNN/expr/Executor.hpp>
#include <fstream>
#include <sstream>
#include <stdio.h>
#include<string.h>
using namespace MNN::Express;
using namespace MNN;
using namespace std;
int main(int argc, const char* argv[]) {
if (argc < 2) {
MNN_ERROR("Don't has model name\n");
return 0;
}
BackendConfig config;
//Executor::getGlobalExecutor()->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 4);
auto modelName = argv[1];
std::shared_ptr<Module> model;
model.reset(Module::load({"NmtModel/Placeholder", "NmtModel/Placeholder_1"}, {"NmtModel/transpose_2"}, modelName));
std::vector<int> input0 = {32,16,234,3215,61,135,29,10,24317,4661,4,0};
std::vector<int> input1 = {1,1,1,1,1,1,1,1,1,1,1,1};
auto first = _Input({1, (int)input0.size()}, NHWC, halide_type_of<int>());
::memcpy(first->writeMap<int>(), input0.data(), input0.size() * sizeof(int));
auto second = _Input({1, (int)input1.size()}, NHWC, halide_type_of<int>());
::memcpy(second->writeMap<int>(), input1.data(), input1.size() * sizeof(int));
std::vector<VARP> outputs;
for (int i = 0; i < 2; ++i) {
{
AUTOTIME;
Executor::getGlobalExecutor()->resetProfile();
outputs = model->onForward({first, second});
Executor::getGlobalExecutor()->dumpProfile();
}
std::ostringstream fileNameOs;
std::ostringstream dimInfo;
fileNameOs << i << "_output.txt";
auto info = outputs[0]->getInfo();
for (int d=0; d<info->dim.size(); ++d) {
dimInfo << info->dim[d] << "_";
}
auto fileName = fileNameOs.str();
MNN_PRINT("Output Name: %s, Dim: %s\n", fileName.c_str(), dimInfo.str().c_str());
auto ptr = outputs[0]->readMap<int>();
std::ofstream outputOs(fileName.c_str());
for (int i=0; i<info->size; ++i) {
outputOs << ptr[i] << "\n";
}
}
for (int i = 0; i < 10; ++i) {
AUTOTIME;
outputs = model->onForward({first, second});
}
return 0;
}

View File

@ -53,27 +53,23 @@ static int CompareElements(const LabeledElement *a, const LabeledElement *b) {
if (!_net || !_session) {
return nil;
}
MNN::Tensor *output = _net->getSessionOutput(_session, nullptr);
MNN::Tensor copy(output);
auto input = _net->getSessionInput(_session, nullptr);
MNN::Tensor tensorCache(input);
input->copyToHostTensor(&tensorCache);
// run
NSTimeInterval begin = NSDate.timeIntervalSinceReferenceDate;
// you should set input data for each inference
if (cycles == 1) {
_net->runSession(_session);
} else {
auto input = _net->getSessionInput(_session, nullptr);
MNN::Tensor tensorCache(input);
input->copyToHostTensor(&tensorCache);
for (int i = 0; i < cycles; i++) {
input->copyFromHostTensor(&tensorCache);
_net->runSession(_session);
}
output->copyToHostTensor(&copy);
}
NSTimeInterval cost = NSDate.timeIntervalSinceReferenceDate - begin;
// result
MNN::Tensor *output = _net->getSessionOutput(_session, nullptr);
MNN::Tensor copy(output);
output->copyToHostTensor(&copy);
float *data = copy.host<float>();
LabeledElement objects[1000];
for (int i = 0; i < 1000; i++) {

View File

@ -1,14 +1,21 @@
file(GLOB_RECURSE MNN_EXPR_SRCS "${CMAKE_CURRENT_LIST_DIR}/*.cpp")
file(GLOB_RECURSE MNN_EXPR_SRCS "${CMAKE_CURRENT_LIST_DIR}/*.*")
option(MNN_EXPR_ENABLE_PROFILER "Support profile Expr's op cost" OFF)
option(MNN_EXPR_SHAPE_EAGER "Force compute Expr's shape directly cost" OFF)
IF (MNN_EXPR_ENABLE_PROFILER)
add_definitions(-DMNN_EXPR_ENABLE_PROFILER)
ENDIF()
IF (MNN_EXPR_SHAPE_EAGER)
add_definitions(-DMNN_EXPR_SHAPE_EAGER)
ENDIF()
IF(MNN_SEP_BUILD)
if (MNN_BUILD_FOR_ANDROID_COMMAND)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../")
endif()
add_library(MNN_Express SHARED ${MNN_EXPR_SRCS})
target_link_libraries(MNN_Express MNN)
if (MNN_BUILD_MINI)
target_link_libraries(MNN_Express $<TARGET_OBJECTS:MNNTransform>)
endif()
ELSE()
add_library(MNNExpress OBJECT ${MNN_EXPR_SRCS})
ENDIF()

30
express/Distributions.cpp Normal file
View File

@ -0,0 +1,30 @@
//
// Distributions.cpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "Distributions.hpp"
#include <cmath>
namespace MNN {
namespace Express {
void Distributions::uniform(const int count, const float min, const float max, float *r, std::mt19937 gen) {
std::uniform_real_distribution<float> dis(min, std::nextafter(max, std::numeric_limits<float>::max()));
for (int i = 0; i < count; i++) {
r[i] = dis(gen);
}
}
void Distributions::gaussian(const int count, const float mu, const float sigma, float *r, std::mt19937 gen) {
std::normal_distribution<float> dis(mu, sigma);
for (int i = 0; i < count; i++) {
r[i] = dis(gen);
}
}
} // namespace Express
} // namespace MNN

27
express/Distributions.hpp Normal file
View File

@ -0,0 +1,27 @@
//
// Distributions.hpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef Distributions_hpp
#define Distributions_hpp
#include <MNN/MNNDefine.h>
#include <random>
namespace MNN {
namespace Express {
class Distributions {
public:
static void uniform(const int count, const float min, const float max, float* r, std::mt19937 gen);
static void gaussian(const int count, const float mu, const float sigma, float* r, std::mt19937 gen);
};
} // namespace Express
} // namespace MNN
#endif // Distritutions_hpp

File diff suppressed because it is too large Load Diff

45
express/ExecutorScope.cpp Normal file
View File

@ -0,0 +1,45 @@
//
// ExecutorScope.cpp
// MNN
//
// Created by MNN on 2020/10/26.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <thread>
#include <MNN/expr/Executor.hpp>
#include <MNN/expr/Scope.hpp>
#include <MNN/expr/ExecutorScope.hpp>
namespace MNN {
namespace Express {
typedef std::shared_ptr<Express::Executor> ExecutorRef;
#if !defined(__APPLE__)
thread_local static Scope<ExecutorRef> g_executor_scope;
#else
static Scope<ExecutorRef> g_executor_scope;
#endif
ExecutorScope::ExecutorScope(const std::shared_ptr<Executor>& current) {
g_executor_scope.EnterScope(current);
}
ExecutorScope::ExecutorScope(const std::string& scope_name,
const std::shared_ptr<Executor>& current) {
g_executor_scope.EnterScope(scope_name, current);
}
ExecutorScope::~ExecutorScope() {
g_executor_scope.ExitScope();
}
const std::shared_ptr<Executor> ExecutorScope::Current() {
if (g_executor_scope.ScopedLevel() > 0) {
return g_executor_scope.Current().content;
}
return Executor::getGlobalExecutor();
}
} // namespace Express
} // namespace MNN

View File

@ -8,23 +8,33 @@
#define FLATBUFFERS_PREFER_PRINTF
#include <MNN/expr/Expr.hpp>
#include <MNN/expr/Executor.hpp>
#include <MNN/expr/ExprCreator.hpp>
#include <map>
#include "core/MNNMemoryUtils.h"
#include "Utils.hpp"
#include <map>
#include "core/FileLoader.hpp"
#include <MNN/expr/Executor.hpp>
#include "core/TensorUtils.hpp"
#include "MNN_generated.h"
//#define MNN_OPEN_TIME_TRACE
#include "MNN/AutoTime.hpp"
#include "MNN/expr/ExecutorScope.hpp"
//#define MNN_EXPRESS_ERROR_REPORT
static inline std::string numberToString(int index) {
char s[10];
snprintf(s, 10, "%d", index);
return std::string(s);
}
static bool HasUnknownDim(const std::vector<int>& dims) {
for (const int& dim : dims) {
if (dim < 0) {
return true;
}
}
return false;
}
namespace MNN {
namespace Express {
void Variable::Info::syncSize() {
@ -87,8 +97,7 @@ bool VARP::fix(VARP::InputType type) const {
}
Expr::Expr(int outputSize) {
mInside.reset(new Inside);
mInside->mOutputInfos.resize(outputSize);
mInside.reset(new Inside(outputSize));
mOutputNames.resize(outputSize);
}
@ -117,27 +126,46 @@ void Expr::_addLinkForInputs(EXPRP expr) {
}
}
}
EXPRP Expr::create(Variable::Info&& info) {
EXPRP Expr::create(Variable::Info&& info, const void* ptr, VARP::InputType type, bool copy) {
EXPRP expr(new Expr(1));
expr->mOp = nullptr;
auto originPtr = info.ptr;
auto originPtr = ptr;
expr->mInside->mOutputInfos[0] = std::move(info);
auto& dstInfo = expr->mInside->mOutputInfos[0];
dstInfo.syncSize();
if (dstInfo.size > 0) {
expr->mExtraBuffer.reset(new char[dstInfo.size * dstInfo.type.bytes()], std::default_delete<char[]>());
expr->mInside->mOutputInfos[0].ptr = expr->mExtraBuffer.get();
expr->mInside->mInfoDirty = false;
dstInfo.syncSize();
Utils::copyInfoToTensor(expr->mInside->mOutputTensors[0], expr->mInside->mOutputInfos.data());
expr->mType = type;
if (type == VARP::CONSTANT) {
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::CONSTANT;
} else if (type == VARP::INPUT) {
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::INPUT;
} else {
expr->mInside->mOutputInfos[0].ptr = nullptr;
expr->mInside->mInfoDirty = true;
// VARP::TRAINABLE
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::TRAINABLE;
}
if (dstInfo.size > 0 && copy) {
auto res = Utils::allocMemoryForHostTensor(expr->mInside->mOutputTensors[0]);
if (!res) {
MNN_ASSERT(false);
return nullptr;
}
} else {
expr->mInside->mOutputTensors[0]->buffer().host = nullptr;
}
if (nullptr == originPtr) {
expr->mType = VARP::INPUT;
if (type == VARP::INPUT && dstInfo.size > 0) {
expr->mInside->mContentDirty = true;
}
return expr;
}
expr->mType = VARP::CONSTANT;
::memcpy(expr->mInside->mOutputInfos[0].ptr, originPtr, dstInfo.size * dstInfo.type.bytes());
expr->mInside->mContentDirty = false;
if (copy) {
::memcpy(expr->mInside->mOutputTensors[0]->buffer().host, originPtr, dstInfo.size * dstInfo.type.bytes());
} else {
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->memoryType = Tensor::InsideDescribe::MEMORY_OUTSIDE;
expr->mInside->mOutputTensors[0]->buffer().host = (uint8_t*)originPtr;
}
return expr;
}
EXPRP Expr::create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize) {
@ -147,8 +175,7 @@ EXPRP Expr::create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP
expr->mOp = flatbuffers::GetMutableRoot<Op>(extra.first.get());
expr->mOpBufferSize = extra.second;
expr->mInputs = std::move(inputs);
expr->mInside->mInputInfos.resize(expr->mInputs.size());
expr->mInside->mReq = Executor::getGlobalExecutor()->getRequirement(expr.get());
expr->mInside->mReq = ExecutorScope::Current()->getRequirement(expr.get());
_addLinkForInputs(expr);
return expr;
}
@ -161,34 +188,34 @@ EXPRP Expr::create(const OpT* op, std::vector<VARP> inputs, int outputSize) {
info.dim[0] = 1;
}
info.order = Utils::revertFormat(op->main.AsInput()->dformat);
info.ptr = nullptr;
info.type = Utils::revertDataType(op->main.AsInput()->dtype);
return create(std::move(info));
return create(std::move(info), nullptr, VARP::INPUT);
}
if (OpType_Const == op->type || OpType_TrainableParam == op->type) {
Variable::Info info;
info.dim = op->main.AsBlob()->dims;
info.order = Utils::revertFormat(op->main.AsBlob()->dataFormat);
info.ptr = nullptr;
void* ptr = nullptr;
info.type = Utils::revertDataType(op->main.AsBlob()->dataType);
switch (op->main.AsBlob()->dataType) {
case DataType_DT_INT8:
info.ptr = (void*)op->main.AsBlob()->int8s.data();
ptr = (void*)op->main.AsBlob()->int8s.data();
break;
case DataType_DT_INT32:
info.ptr = (void*)op->main.AsBlob()->int32s.data();
ptr = (void*)op->main.AsBlob()->int32s.data();
break;
case DataType_DT_UINT8:
info.ptr = (void*)op->main.AsBlob()->uint8s.data();
ptr = (void*)op->main.AsBlob()->uint8s.data();
break;
case DataType_DT_FLOAT:
info.ptr = (void*)op->main.AsBlob()->float32s.data();
ptr = (void*)op->main.AsBlob()->float32s.data();
break;
default:
break;
}
auto expr = create(std::move(info));
if (OpType_TrainableParam == op->type) {
//MNN_ASSERT(nullptr != ptr);
auto expr = create(std::move(info), ptr, VARP::CONSTANT);
if (OpType_TrainableParam == op->type && nullptr != ptr) {
expr->mType = VARP::TRAINABLE;
}
return expr;
@ -213,7 +240,7 @@ bool Expr::requireInfo() {
return false;
}
if (nullptr == mOp) {
return mInside->mOutputInfos[0].size > 0;
return !HasUnknownDim(mInside->mOutputInfos[0].dim);
}
bool ready = true;
for (int i = 0; i < mInputs.size(); ++i) {
@ -221,8 +248,8 @@ bool Expr::requireInfo() {
// The Variable is set nullptr by api
return false;
}
mInside->mInputInfos[i] = mInputs[i]->getInfo();
if (nullptr == mInside->mInputInfos[i] && (!mInside->mReq.supportError[i])) {
auto inputInfo = mInputs[i]->getInfo();
if (nullptr == inputInfo) {
#ifdef MNN_EXPRESS_ERROR_REPORT
MNN_ERROR("%s, %d input not ready\n", mName.c_str(), i);
#endif
@ -233,15 +260,19 @@ bool Expr::requireInfo() {
for (int i = 0; i < mInputs.size(); ++i) {
auto& v = mInputs[i];
if (mInside->mReq.shapeNeedContent[i]) {
// `readInternal` maybe return nullptr if element count is 0.
v->readInternal(true);
// For shape need content, the content must not be nullptr
auto ptr = v->readInternal(true);
if (nullptr == ptr) {
ready = false;
break;
}
}
}
if (!ready) {
return false;
}
//MNN_PRINT("Info %s, %p Start\n", mName.c_str(), this);
auto res = Executor::getGlobalExecutor()->computeInfo(this);
auto res = ExecutorScope::Current()->computeInfo(this);
//MNN_PRINT("Info Compute %s\n", mName.c_str());
if (NO_ERROR == res) {
@ -261,6 +292,14 @@ const std::vector<WeakEXPRP>& Variable::toExprs() const {
VARP Variable::create(EXPRP expr, int index) {
VARP res(new Variable(expr, index));
#ifdef MNN_EXPR_SHAPE_EAGER
auto info = expr->requireInfo();
if (!info) {
#ifdef MNN_EXPRESS_ERROR_REPORT
MNN_ERROR("Can't compute shape\n");
#endif
}
#endif
return res;
}
void Expr::replace(EXPRP old, EXPRP from) {
@ -307,16 +346,22 @@ void Expr::replace(EXPRP old, EXPRP from) {
old->mValid = from->mValid;
old->mInside = from->mInside;
old->mInputs = from->mInputs;
std::vector<Expr*> visited;
old->visitOutputs([&](EXPRP expr, int index) {
if (expr->mInside->mInfoDirty && expr->mValid && !expr->mInside->mLinkCache) {
if (expr->visited()) {
return false;
}
visited.emplace_back(expr.get());
expr->setVisited(true);
expr->mInside->mCache.reset();
expr->mInside->mCacheOffset = 0;
expr->mValid = true;
expr->mInside->mInfoDirty = true;
return true;
});
for (auto e : visited) {
e->setVisited(false);
}
}
void Variable::setName(const std::string& name) {
@ -351,7 +396,7 @@ bool Variable::input(VARP src) {
info = tempInfo.get();
}
auto dstInfo = getInfo();
bool needChange = nullptr == dstInfo || info->order != dstInfo->order || info->dim.size() != dstInfo->dim.size();
bool needChange = nullptr == dstInfo || info->order != dstInfo->order || info->dim.size() != dstInfo->dim.size() || info->type != dstInfo->type;
if (!needChange) {
for (int i=0; i<info->dim.size(); ++i) {
if (dstInfo->dim[i] != info->dim[i]) {
@ -362,22 +407,19 @@ bool Variable::input(VARP src) {
}
if (!mFrom->mInside->mCache) {
Executor::getGlobalExecutor()->makeCache({mFrom}, false);
ExecutorScope::Current()->makeCache({mFrom}, false);
}
if (needChange) {
bool needAlloc = info->size * info->type.bytes() > mFrom->mInside->mOutputInfos[0].size * mFrom->mInside->mOutputInfos[0].type.bytes();
mFrom->mInside->mOutputInfos[0] = *info;
if (needAlloc) {
mFrom->mExtraBuffer.reset(new char[info->size * info->type.bytes()], std::default_delete<char[]>());
}
mFrom->mInside->mOutputInfos[0].ptr = mFrom->mExtraBuffer.get();
mFrom->mInside->mCache->setShapeDirty(0, mFrom->outputInfo(0));
Utils::releaseMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
Utils::copyInfoToTensor(mFrom->inside()->mOutputTensors[0], mFrom->inside()->mOutputInfos.data());
Utils::allocMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
}
if (info->size) {
auto dstPtr = writeInternal(false);
auto srcPtr = src->readMap<void>();
if (nullptr == dstPtr || nullptr == srcPtr) {
MNN_ERROR("Alloc memory error or compute src error in Variable::Input\n");
//MNN_ERROR("Alloc memory error or compute src error in Variable::Input\n");
return false;
}
::memcpy(dstPtr, srcPtr, info->size * info->type.bytes());
@ -387,7 +429,7 @@ bool Variable::input(VARP src) {
} else {
informDirty();
}
mFrom->mInside->mCache->setContentReady();
mFrom->mInside->mContentDirty = false;
return true;
}
@ -396,23 +438,44 @@ void Variable::replace(VARP dst, VARP src) {
dst->setExpr(nullptr, 0);
return;
}
if (nullptr == dst) {
dst.mContent = src.mContent;
return;
}
if (src->mFrom.get() == dst->mFrom.get()) {
dst->mFromIndex = src->mFromIndex;
return;
}
if (src->mFrom->outputSize() != dst->mFrom->outputSize()) {
// Can't replace Expr, Just replace VARP
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
src->mFrom->mTo.emplace_back(expr);
std::vector<Expr*> visited;
dst->mFrom->visitOutputs([src, dst, &visited](EXPRP expr, int index) {
if (expr->visited()) {
return false;
});
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
}
expr->setVisited(true);
visited.emplace_back(expr.get());
expr->mInside->mCache.reset();
expr->mInside->mCacheOffset = 0;
expr->mValid = true;
expr->mInside->mInfoDirty = true;
expr->mInside->mContentDirty = true;
return true;
});
for (auto v : visited) {
v->setVisited(false);
}
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
for (int i =0; i< expr->inputs().size(); ++i) {
auto input = expr->inputs()[i];
if (input == dst) {
expr->mInputs[i] = src;
}
}
src->mFrom->mTo.emplace_back(expr);
return false;
});
dst->mFrom = src->mFrom;
dst->mFromIndex = src->mFromIndex;
return;
@ -452,15 +515,19 @@ bool Variable::resize(INTS dims) {
}
info.dim = dims;
info.syncSize();
mFrom->mExtraBuffer.reset(new char[info.size * info.type.bytes()], std::default_delete<char[]>());
info.ptr = mFrom->mExtraBuffer.get();
Utils::copyInfoToTensor(mFrom->inside()->mOutputTensors[0], mFrom->inside()->mOutputInfos.data());
Utils::releaseMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
if (0 >= info.size) {
return false;
}
bool res = Utils::allocMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
if (!res) {
return false;
}
mFrom->mValid = true;
mFrom->mInside->mInputInfos.clear();
auto cache = mFrom->mInside->mCache;
if (nullptr != cache) {
cache->setShapeDirty(0, mFrom->outputInfo(0));
}
mFrom->inside()->mInfoDirty = false;
mFrom->inside()->mContentDirty = true;
mFrom->visitOutputs([](EXPRP expr, int index) { return expr->setInfoDirty(); });
return true;
}
@ -478,11 +545,12 @@ void Expr::visit(EXPRP expr, const std::function<bool(EXPRP)>& before, const std
void* Variable::readInternal(bool forShape) {
if (nullptr == mFrom->get()) {
if (VARP::INPUT == mFrom->mType) {
if (nullptr == mFrom->mInside->mCache) {
if (mFrom->mInside->mContentDirty) {
return nullptr;
}
}
return mFrom->outputInfo(mFromIndex)->ptr;
//MNN_ASSERT(nullptr != mFrom->inside()->mOutputTensors[0]->buffer().host);
return mFrom->inside()->mOutputTensors[0]->buffer().host;
}
auto res = mFrom->requireInfo();
if (false == res) {
@ -490,21 +558,26 @@ void* Variable::readInternal(bool forShape) {
}
auto cache = mFrom->inside()->mCache;
if (nullptr == cache) {
Executor::getGlobalExecutor()->makeCache({mFrom}, forShape);
ExecutorScope::Current()->makeCache({mFrom}, forShape);
cache = mFrom->inside()->mCache;
}
if (nullptr == cache) {
return nullptr;
}
if (NO_ERROR != Executor::getGlobalExecutor()->runCache(cache)) {
if (NO_ERROR != ExecutorScope::Current()->runCache(cache)) {
return nullptr;
}
cache->syncOutput(mFrom->mInside->mCacheOffset + mFromIndex, mFrom->outputInfo(mFromIndex));
return mFrom->outputInfo(mFromIndex)->ptr;
return Executor::mapOutput(cache.get(), mFrom->mInside->mCacheOffset + mFromIndex, mFrom->mInside->mOutputTensors[mFromIndex]);
}
void Variable::informDirty() {
mFrom->visitOutputs([](EXPRP expr, int index) {
std::vector<Expr*> visited;
mFrom->visitOutputs([&visited](EXPRP expr, int index) {
if (expr->visited()) {
return false;
}
visited.emplace_back(expr.get());
expr->setVisited(true);
if (expr->inside()->mReq.shapeNeedContent.empty()) {
// Not init
return false;
@ -514,28 +587,32 @@ void Variable::informDirty() {
expr->visitOutputs([](EXPRP e, int index) { return e->setInfoDirty(); });
return false;
}
if (expr->inside()->mContentDirty) {
return false;
}
expr->inside()->mContentDirty = true;
if (expr->inside()->mReq.contentNeedContent[index]) {
if (expr->inside()->mCache != nullptr) {
expr->inside()->mCache->setContentDirty();
Executor::setContentDirty(expr->inside()->mCache.get());
}
return true;
}
return false;
});
for (auto e : visited) {
e->setVisited(false);
}
}
void Variable::prepareCompute(const std::vector<VARP>& vars, bool forceCpu) {
std::vector<EXPRP> exprs;
for (auto v : vars) {
if (v->expr().first->inside()->mCache == nullptr) {
if (!v->expr().first->visited()) {
v->expr().first->inside()->mCache = nullptr;
v->expr().first->requireInfo();
v->expr().first->setVisited(true);
exprs.emplace_back(v->expr().first);
}
}
Executor::getGlobalExecutor()->makeCache(std::move(exprs), forceCpu);
for (auto v : vars) {
v->expr().first->setVisited(false);
}
ExecutorScope::Current()->makeCache(std::move(exprs), forceCpu);
}
void* Variable::writeInternal(bool inform) {
@ -545,16 +622,8 @@ void* Variable::writeInternal(bool inform) {
if (inform) {
informDirty();
}
auto cache = mFrom->mInside->mCache;
if (nullptr == cache) {
Executor::getGlobalExecutor()->makeCache({mFrom});
cache = mFrom->mInside->mCache;
}
if (nullptr == cache) {
return nullptr;
}
mFrom->mInside->mCache->setContentReady();
return mFrom->mInside->mOutputInfos[0].ptr;
mFrom->mInside->mContentDirty = false;
return mFrom->inside()->mOutputTensors[0]->host<void>();
}
void Variable::unMap() {
@ -591,12 +660,17 @@ bool Expr::setInfoDirty() {
mInside->mContentDirty = true;
mValid = true;
if (mInside->mCache != nullptr) {
mInside->mCache->setShapeDirty(0, nullptr);
Executor::setShapeDirty(mInside->mCache.get());
}
for (auto o : mInside->mOutputTensors) {
Utils::releaseMemoryForHostTensor(o);
}
return true;
}
std::vector<VARP> Variable::load(const char* fileName) {
AutoStorage<uint8_t> buffer;
{
FileLoader loader(fileName);
if (!loader.valid()) {
MNN_ERROR("Error for open %s\n", fileName);
@ -606,11 +680,11 @@ std::vector<VARP> Variable::load(const char* fileName) {
if (!loader.valid()) {
return {};
}
AutoStorage<uint8_t> buffer;
loader.merge(buffer);
if (buffer.get() == nullptr) {
return {};
}
}
return load(buffer.get(), buffer.size());
}
std::vector<VARP> Variable::load(const uint8_t* buffer, size_t length) {
@ -722,6 +796,7 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
} else {
MNN_ASSERT(1 == expr->outputSize());
auto& info = expr->mInside->mOutputInfos[0];
auto ptr = expr->mInside->mOutputTensors[0]->host<void>();
op.reset(new OpT);
if (expr->mType != VARP::INPUT) {
auto blob = new BlobT;
@ -730,16 +805,20 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
if (info.type.code == halide_type_float) {
blob->dataType = DataType_DT_FLOAT;
blob->float32s.resize(info.size);
::memcpy(blob->float32s.data(), info.ptr, info.size * sizeof(float));
} else if (info.type.code == halide_type_int) {
::memcpy(blob->float32s.data(), ptr, info.size * sizeof(float));
} else if (info.type.code == halide_type_int && info.type.bits == 32) {
blob->dataType = DataType_DT_INT32;
blob->int32s.resize(info.size);
::memcpy(blob->int32s.data(), info.ptr, info.size * sizeof(int));
}
else if (info.type.code == halide_type_uint && info.type.bits == 8) {
::memcpy(blob->int32s.data(), ptr, info.size * sizeof(int));
} else if (info.type.code == halide_type_int && info.type.bits == 8) {
blob->dataType = DataType_DT_INT8;
blob->int8s.resize(info.size);
auto pptr = (int8_t *)ptr;
::memcpy(blob->int8s.data(), ptr, info.size * sizeof(int8_t));
} else if (info.type.code == halide_type_uint && info.type.bits == 8) {
blob->dataType = DataType_DT_UINT8;
blob->uint8s.resize(info.size);
::memcpy(blob->uint8s.data(), info.ptr, info.size * sizeof(uint8_t));
::memcpy(blob->uint8s.data(), ptr, info.size * sizeof(uint8_t));
}
op->type = OpType_Const;
if (expr->mType == VARP::TRAINABLE) {
@ -781,12 +860,12 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
auto op = dest->oplists[index].get();
auto tensorIndexOffset = varIndexInfo[expr];
for (int v=0; v<expr->outputSize(); ++v) {
auto const tensorIndex = tensorIndexOffset + v;
if (dest->tensorName[tensorIndex].empty()) {
auto subindex = tensorIndexOffset + v;
if (dest->tensorName[subindex].empty()) {
if (v == 0) {
dest->tensorName[tensorIndex] = op->name;
dest->tensorName[subindex] = op->name;
} else {
dest->tensorName[tensorIndex] = op->name + numberToString(v);
dest->tensorName[subindex] = op->name + numberToString(v);
}
}
}

210
express/Initializer.cpp Normal file
View File

@ -0,0 +1,210 @@
//
// Initializer.cpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "Initializer.hpp"
#include <MNN/expr/ExprCreator.hpp>
#include <cmath>
#include <vector>
#include "Distributions.hpp"
#include "RandomGenerator.hpp"
namespace MNN {
namespace Express {
Express::VARP Initializer::createConstVar(Express::INTS dim, Express::Dimensionformat format) {
auto res = Express::_Input(dim, format, halide_type_of<float>());
this->onExecute(res);
res.fix(Express::VARP::CONSTANT);
return res;
}
class ConstantInitializer : public Initializer {
public:
ConstantInitializer(float value) : mConstant(value) {
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
auto ptr = p->writeMap<float>();
for (int i = 0; i < count; i++) {
ptr[i] = mConstant;
}
}
private:
float mConstant;
};
Initializer* Initializer::constValue(float value) {
return new ConstantInitializer(value);
}
class UniformInitializer : public Initializer {
public:
UniformInitializer(float min = 0, float max = 1) {
mMin = min;
mMax = max;
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
Distributions::uniform(count, mMin, mMax, p->writeMap<float>(), RandomGenerator::generator());
}
private:
float mMin;
float mMax;
};
Initializer* Initializer::uniform(float minValue, float maxValue) {
return new UniformInitializer(minValue, maxValue);
}
class XavierInitializer : public Initializer {
public:
XavierInitializer(VarianceNorm norm = FANIN) {
mNorm = norm;
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
const std::vector<int> dims = p->getInfo()->dim;
// referenced from Caffe
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
int fanIn = count / dims[0];
int fanOut = dims.size() > 1 ? count / dims[1] : count;
float n = fanIn; // default: FANIN
if (mNorm == VarianceNorm::AVERAGE) {
n = (fanIn + fanOut) / 2.0f;
} else if (mNorm == VarianceNorm::FANOUT) {
n = fanOut;
}
float scale = sqrtf(3.0f / n);
Distributions::uniform(count, -scale, scale, p->writeMap<float>(), RandomGenerator::generator());
}
private:
VarianceNorm mNorm;
};
Initializer* Initializer::xavier(VarianceNorm norm) {
return new XavierInitializer(norm);
}
class GaussianInitializer : public Initializer {
public:
GaussianInitializer(float mean = 0, float std = 1) {
mMean = mean;
mStd = std;
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
Distributions::gaussian(count, mMean, mStd, p->writeMap<float>(), RandomGenerator::generator());
}
private:
float mMean;
float mStd;
};
Initializer* Initializer::gauss(float mean, float std) {
return new GaussianInitializer(mean, std);
}
class MSRAInitializer : public Initializer {
public:
MSRAInitializer(VarianceNorm norm = FANIN) {
mNorm = norm;
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
const std::vector<int> dims = p->getInfo()->dim;
// referenced from Caffe
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
int fanIn = count / dims[0];
int fanOut = dims.size() > 1 ? count / dims[1] : count;
float n = fanIn; // default: FANIN
if (mNorm == VarianceNorm::AVERAGE) {
n = (fanIn + fanOut) / 2.0f;
} else if (mNorm == VarianceNorm::FANOUT) {
n = fanOut;
}
float std = sqrtf(2.0f / n);
Distributions::gaussian(count, 0.0f, std, p->writeMap<float>(), RandomGenerator::generator());
}
private:
VarianceNorm mNorm;
};
Initializer* Initializer::MSRA(VarianceNorm norm) {
return new MSRAInitializer(norm);
}
class BilinearInitializer : public Initializer {
public:
BilinearInitializer() = default;
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
const std::vector<int> dims = p->getInfo()->dim;
MNN_ASSERT(dims.size() == 4);
MNN_ASSERT(dims[2] == dims[3]); // NCHW, H == W
// referenced from Caffe
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
int f = ceilf(dims[3] / 2.0f);
float c = (dims[3] - 1) / (2.0f * f);
auto ptr = p->writeMap<float>();
for (int i = 0; i < count; i++) {
float x = i % dims[3];
float y = (i / dims[3]) % dims[2];
ptr[i] = (1 - std::fabs(x / f - c)) * (1 - std::fabs(y / f - c));
}
}
};
Initializer* Initializer::bilinear() {
return new BilinearInitializer();
}
class PositiveUnitball : public Initializer {
public:
PositiveUnitball() = default;
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
const std::vector<int> dims = p->getInfo()->dim;
auto ptr = p->writeMap<float>();
Distributions::uniform(count, 0, 1, ptr, RandomGenerator::generator());
int dim = count / dims[0];
for (int i = 0; i < dims[0]; i++) {
float sum = 0;
for (int j = 0; j < dim; j++) {
sum += ptr[i * dim + j];
}
for (int j = 0; j < dim; j++) {
ptr[i * dim + j] = ptr[i * dim + j] / sum;
}
}
}
};
Initializer* Initializer::positiveUnitball() {
return new PositiveUnitball();
}
} // namespace Express
} // namespace MNN

43
express/Initializer.hpp Normal file
View File

@ -0,0 +1,43 @@
//
// Initializer.hpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef Initializer_hpp
#define Initializer_hpp
#include <MNN/expr/Expr.hpp>
namespace MNN {
namespace Express {
class RandomGenerator;
class MNN_PUBLIC Initializer {
public:
Initializer() = default;
virtual ~Initializer() = default;
Express::VARP createConstVar(Express::INTS dim, Express::Dimensionformat format = Express::NCHW);
virtual void onExecute(Express::VARP p) = 0;
static Initializer* constValue(float value);
static Initializer* uniform(float minValue = 0.0f, float maxValue = 1.0f);
enum VarianceNorm {
FANIN,
FANOUT,
AVERAGE,
};
static Initializer* xavier(VarianceNorm norm = FANIN);
static Initializer* gauss(float mean = 0.0f, float std = 1.0f);
static Initializer* MSRA(VarianceNorm norm = FANIN);
static Initializer* bilinear();
static Initializer* positiveUnitball();
};
} // namespace Express
} // namespace MNN
#endif // Initializer_hpp

View File

@ -30,7 +30,18 @@ static DataType _convertDataType(halide_type_t type) {
}
return DataType_DT_INVALID;
}
static VARP _checkNC4HW4(VARP x) {
#ifdef MNN_EXPR_SHAPE_EAGER
auto info = x->getInfo();
if (nullptr != info && info->order == NC4HW4) {
return _Convert(x, NCHW);
}
#endif
return x;
}
static VARP _Binary(VARP x, VARP y, BinaryOpOperation operation) {
x = _checkNC4HW4(x);
y = _checkNC4HW4(y);
std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_BinaryOp;
op->type = OpType_BinaryOp;
@ -49,6 +60,7 @@ static VARP _Unary(VARP x, UnaryOpOperation operation) {
return (Variable::create(Expr::create(op.get(), {x})));
}
static VARP _Reduce(VARP x, INTS dim, ReductionType type, bool keepDim) {
x = _checkNC4HW4(x);
std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_ReductionParam;
op->type = OpType_Reduction;
@ -60,6 +72,7 @@ static VARP _Reduce(VARP x, INTS dim, ReductionType type, bool keepDim) {
return (Variable::create(Expr::create(op.get(), {x})));
}
static VARP _ReduceMutable(VARP x, VARP dim, ReductionType type, bool keepDim) {
x = _checkNC4HW4(x);
std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_ReductionParam;
op->type = OpType_Reduction;
@ -955,6 +968,7 @@ Returns:
A variable of type int.
*/
VARP _ArgMax(VARP input, int axis) {
input = _checkNC4HW4(input);
std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_ArgMax;
op->type = OpType_ArgMax;
@ -976,6 +990,7 @@ Returns:
A variable of type int.
*/
VARP _ArgMin(VARP input, int axis) {
input = _checkNC4HW4(input);
std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_ArgMax;
op->type = OpType_ArgMin;

View File

@ -5,6 +5,7 @@
// Created by MNN on 2019/08/20.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef MergeOptimizer_hpp
#define MergeOptimizer_hpp

View File

@ -54,16 +54,14 @@ VARP _Input(INTS shape, Dimensionformat data_format, halide_type_t dtype) {
info.dim = std::move(shape);
info.order = data_format;
info.type = dtype;
info.ptr = nullptr;
return (Variable::create(Expr::create(std::move(info))));
return (Variable::create(Expr::create(std::move(info), nullptr, VARP::INPUT)));
}
VARP _Scalar(const void* ptr, halide_type_t type) {
Variable::Info info;
info.dim = {};
info.order = NHWC;
info.type = type;
info.ptr = (void*)ptr;
return (Variable::create(Expr::create(std::move(info))));
return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
}
/*create a constant variable.
Args:
@ -79,8 +77,7 @@ VARP _Const(const void* ptr, INTS shape, Dimensionformat format, halide_type_t t
info.dim = std::move(shape);
info.order = format;
info.type = type;
info.ptr = (void*)ptr;
return (Variable::create(Expr::create(std::move(info))));
return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
}
VARP _Const(float value, INTS shape, Dimensionformat format) {
@ -93,8 +90,8 @@ VARP _Const(float value, INTS shape, Dimensionformat format) {
for (int i = 0; i < info.size; ++i) {
values[i] = value;
}
info.ptr = (void*)values.data();
return (Variable::create(Expr::create(std::move(info))));
auto ptr = (void*)values.data();
return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
}
VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format, halide_type_t type) {
@ -107,6 +104,23 @@ VARP _TrainableParam(float value, INTS dims, Dimensionformat format) {
v.fix(VARP::TRAINABLE);
return v;
}
VARP _InnerProduct(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS outputShape) {
std::unique_ptr<OpT> ipOp(new OpT);
ipOp->type = OpType_InnerProduct;
ipOp->main.type = OpParameter_InnerProduct;
ipOp->main.value = new InnerProductT;
auto ipParam = ipOp->main.AsInnerProduct();
ipParam->outputCount = outputShape[1];
if(!bias.empty()) {
ipParam->biasTerm = 1;
}
ipParam->weightSize = weight.size();
ipParam->weight = std::move(weight);
ipParam->bias = std::move(bias);
return (Variable::create(Expr::create(ipOp.get(), {x})));
}
VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads) {
std::unique_ptr<OpT> convOp(new OpT);
@ -183,7 +197,7 @@ VARP _Conv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS
return (Variable::create(Expr::create(convOp.get(), {x})));
}
VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6) {
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6, int nbits) {
std::unique_ptr<OpT> convOp(new OpT);
convOp->type = OpType_Convolution;
if (channel[0] == channel[1] && channel[0] == group) {
@ -285,6 +299,42 @@ VARP _Deconv(VARP weight, VARP bias, VARP x, PaddingMode pad, INTS stride, INTS
return (Variable::create(Expr::create(std::move(convOp), {x, weight})));
}
VARP _Deconv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6) {
std::unique_ptr<OpT> convOp(new OpT);
convOp->type = OpType_Deconvolution;
if (channel[0] == channel[1] && channel[0] == group) {
convOp->type = OpType_DeconvolutionDepthwise;
}
convOp->main.type = OpParameter_Convolution2D;
convOp->main.value = new Convolution2DT;
auto conv2D = convOp->main.AsConvolution2D();
conv2D->common.reset(new Convolution2DCommonT);
conv2D->common->padMode = _convertPadMode(pad);
if (pads.size() == 2) {
conv2D->common->padX = pads[0];
conv2D->common->padY = pads[1];
} else {
conv2D->common->pads = std::move(pads);
}
conv2D->common->strideX = stride[0];
conv2D->common->strideY = stride[1];
conv2D->common->group = group;
conv2D->common->outputCount = channel[1];
conv2D->common->inputCount = channel[0];
conv2D->common->dilateX = dilate[0];
conv2D->common->dilateY = dilate[1];
conv2D->common->kernelX = kernelSize[0];
conv2D->common->kernelY = kernelSize[1];
conv2D->common->relu6 = relu6;
conv2D->common->relu = relu;
MNN_ASSERT(weight.size() == channel[1] * (channel[0] / group) * kernelSize[0] * kernelSize[1]);
conv2D->weight = std::move(weight);
MNN_ASSERT(bias.size() == channel[1]);
conv2D->bias = std::move(bias);
return (Variable::create(Expr::create(convOp.get(), {x})));
}
static VARP _Pool(VARP x, INTS kernel, INTS stride, PoolType type, PaddingMode pad, INTS pads) {
std::unique_ptr<OpT> pool(new OpT);
pool->type = OpType_Pooling;
@ -381,9 +431,13 @@ x: A variable.
Returns:
output: A variable with the same type as `x`.
*/
VARP _Relu6(VARP x) {
VARP _Relu6(VARP x, float minValue, float maxValue) {
std::unique_ptr<OpT> relu(new OpT);
relu->type = OpType_ReLU6;
relu->main.value = new Relu6T;
relu->main.type = OpParameter_Relu6;
relu->main.AsRelu6()->maxValue = maxValue;
relu->main.AsRelu6()->minValue = minValue;
return (Variable::create(Expr::create(relu.get(), {x})));
}
/*Given an input value x, it computes the output as x if x > 0 and slopes * x if x <= 0.
@ -746,9 +800,12 @@ input: A variable.
Returns:
A variable of Halide_Type_Int.
*/
VARP _Shape(VARP input) {
VARP _Shape(VARP input, bool nchw) {
std::unique_ptr<OpT> shape(new OpT);
shape->type = OpType_Shape;
if (nchw) {
shape->defaultDimentionFormat = MNN_DATA_FORMAT_NCHW;
}
return (Variable::create(Expr::create(std::move(shape), {input})));
}
/*Stacks a list of rank-R variables into one rank-(R+1) variable.
@ -906,6 +963,21 @@ VARP _Elu(VARP features, float alpha) {
op->main.value = eluParam;
return (Variable::create(Expr::create(std::move(op), {features})));
}
/*Given an input value x, it computes the output as 1.0 if x > threshold and 0.0 if x <= threshold.
features: A variable of type Halide_Type_Float
threshold: threshold value
Returns:
A variable. Has the same type as features.
*/
VARP _Threshold(VARP features, float threshold) {
std::unique_ptr<OpT> op(new OpT);
op->type = OpType_Threshold;
auto eluParam = new ELUT;
op->main.type = OpParameter_ELU;
eluParam->alpha = threshold;
op->main.value = eluParam;
return (Variable::create(Expr::create(std::move(op), {features})));
}
/*Computes the size of the variable
Args:
input: A variable of type Halide_Type_Float or Halide_Type_Int
@ -1049,7 +1121,6 @@ std::vector<VARP> _Moments(VARP x, INTS axis, VARP shift, bool keepDims) {
op->main.type = OpParameter_MomentsParam;
momentsParam->dim = axis;
momentsParam->keepDims = keepDims;
momentsParam->dType = (MNN::DataType)Utils::convertDataType(x->getInfo()->type);
op->main.value = momentsParam;
EXPRP expr = Expr::create(std::move(op), {x}, 2);
std::vector<VARP> res;
@ -1405,7 +1476,7 @@ VARP _ZeroGrad(VARP x) {
}
VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu) {
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, int nbits) {
std::unique_ptr<OpT> convOp(new OpT);
convOp->type = OpType_ConvInt8;
if (channel[0] == channel[1] && channel[0] == group) {
@ -1433,9 +1504,16 @@ VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<fl
conv2D->symmetricQuan->bias = std::move(bias);
conv2D->symmetricQuan->scale = std::move(scale);
conv2D->symmetricQuan->weight = std::move(weight);
conv2D->symmetricQuan->nbits = nbits;
return (Variable::create(Expr::create(convOp.get(), {x})));
}
VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim) {
std::unique_ptr<MNN::OpT> cosineSimilarityOp(new MNN::OpT);
cosineSimilarityOp->type = MNN::OpType_CosineSimilarity;
return (Variable::create(Expr::create(std::move(cosineSimilarityOp), {input0, input1, inputDim})));
}
VARP _FloatToInt8(VARP x, VARP scale, char minValue/*For future*/, char maxValue/*For future*/) {
auto xInfo = x->getInfo();
auto scaleInfo = scale->getInfo();

View File

@ -22,28 +22,7 @@ Optimizer::Parameters::~Parameters() {
}
}
std::shared_ptr<Optimizer> Optimizer::create(Config config) {
const int numThread = config.numThread;
auto forwardType = config.forwardType;
if (forwardType != MNN_FORWARD_ALL) {
if (MNNGetExtraBackendCreator(forwardType) == nullptr) {
return nullptr;
}
return std::shared_ptr<Optimizer>(new MergeOptimizer(config.forwardType, numThread, nullptr));
}
auto device = config.device;
if (CPU == device) {
return std::shared_ptr<Optimizer>(new MergeOptimizer(MNN_FORWARD_CPU, numThread, nullptr));
}
if (GPU == device) {
std::vector<MNNForwardType> types {MNN_FORWARD_METAL, MNN_FORWARD_OPENCL, MNN_FORWARD_VULKAN, MNN_FORWARD_OPENGL};
for (auto type : types) {
auto creator = MNNGetExtraBackendCreator(type);
if (nullptr != creator) {
return std::shared_ptr<Optimizer>(new MergeOptimizer(type, numThread, nullptr));
}
}
}
// Do nothing
return nullptr;
}

View File

@ -0,0 +1,45 @@
//
// RandomGenerator.hpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef RandomGenerator_hpp
#define RandomGenerator_hpp
#include <MNN/MNNDefine.h>
#include <random>
namespace MNN {
namespace Express {
class MNN_PUBLIC RandomGenerator {
private:
RandomGenerator(int seed = std::random_device()()) {
mSeed = seed;
mGenerator.seed(mSeed);
}
~RandomGenerator() = default;
RandomGenerator(RandomGenerator &);
RandomGenerator &operator=(const RandomGenerator &);
private:
int mSeed;
std::mt19937 mGenerator;
public:
static std::mt19937 &generator(int seed = std::random_device()()) {
static RandomGenerator rng(seed);
return rng.mGenerator;
}
};
} // namespace Express
} // namespace MNN
#endif // RandomGenerator_hpp

View File

@ -10,8 +10,24 @@
#include <map>
#include "MNN_generated.h"
#include "core/TensorUtils.hpp"
#include "core/MNNMemoryUtils.h"
namespace MNN {
namespace Express {
Expr::Inside::Inside(int outputSize) {
mOutputInfos.resize(outputSize);
mOutputTensors.resize(outputSize);
for (int i=0; i<outputSize; ++i) {
mOutputTensors[i] = new Tensor;
TensorUtils::getDescribe(mOutputTensors[i])->memoryType = Tensor::InsideDescribe::MEMORY_HOST;
}
}
Expr::Inside::~Inside() {
for (auto t : mOutputTensors) {
delete t;
}
}
#define CONVERT(src, dst, f)\
if (f == src) return dst;
@ -61,7 +77,6 @@ void Utils::copyInfoToTensor(Tensor* dest, const Variable::Info* source) {
}
dest->buffer().dimensions = (int)source->dim.size();
dest->buffer().type = source->type;
dest->buffer().host = (uint8_t*)source->ptr;
TensorUtils::getDescribe(dest)->dimensionFormat = (MNN_DATA_FORMAT)Utils::convertFormat(source->order);
TensorUtils::setLinearLayout(dest);
}
@ -70,7 +85,31 @@ void Utils::copyTensorToInfo(Variable::Info* shape, const Tensor* tensor) {
shape->dim = tensor->shape();
shape->size = tensor->elementSize();
shape->order = Utils::revertFormat(TensorUtils::getDescribe(tensor)->dimensionFormat);
shape->ptr = tensor->host<float>();
}
bool Utils::allocMemoryForHostTensor(Tensor* dest) {
if (nullptr != dest->buffer().host) {
return true;
}
if (TensorUtils::getDescribe(dest)->memoryType != Tensor::InsideDescribe::MEMORY_HOST) {
return false;
}
auto size = dest->size();
if (0 >= size) {
return false;
}
dest->buffer().host = (uint8_t*)MNNMemoryAllocAlign(size, MNN_MEMORY_ALIGN_DEFAULT);
return dest->buffer().host != nullptr;
}
bool Utils::releaseMemoryForHostTensor(Tensor* dest) {
if (nullptr == dest->buffer().host) {
return true;
}
if (TensorUtils::getDescribe(dest)->memoryType != Tensor::InsideDescribe::MEMORY_HOST) {
return false;
}
MNNMemoryFreeAlign(dest->buffer().host);
dest->buffer().host = nullptr;
return true;
}
} // namespace Express

View File

@ -15,15 +15,16 @@
namespace MNN {
namespace Express {
struct Expr::Inside {
std::vector<const Variable::Info*> mInputInfos;
Inside(int outputSize);
~ Inside();
std::vector<Variable::Info> mOutputInfos;
std::vector<Tensor*> mOutputTensors;
Executor::Requirement mReq;
std::shared_ptr<Executor::ComputeCache::Unit> mUnit;
std::shared_ptr<Executor::Unit> mUnit;
std::shared_ptr<Executor::ComputeCache> mCache;
int mCacheOffset = 0;
bool mInfoDirty = true;
bool mContentDirty = true;
bool mLinkCache = false;
};
class Utils {
public:
@ -33,6 +34,8 @@ public:
static int convertFormat(Dimensionformat format);
static Express::Dimensionformat revertFormat(int format);
static halide_type_t revertDataType(DataType dataType);
static bool allocMemoryForHostTensor(Tensor* dest);
static bool releaseMemoryForHostTensor(Tensor* dest);
};
} // namespace Express
} // namespace MNN

View File

@ -10,7 +10,7 @@
#include <MNN/expr/ExprCreator.hpp>
using namespace MNN::Express;
namespace MNN {
namespace Train {
namespace Express {
FixModule::FixModule(std::vector<Express::VARP> output, std::vector<Express::VARP> parameters,
std::vector<std::pair<Express::VARP, Express::Dimensionformat>> inputs) {
for (auto p : parameters) {
@ -34,5 +34,19 @@ std::vector<Express::VARP> FixModule::onForward(const std::vector<Express::VARP>
}
return mOutput;
}
} // namespace Train
Module* FixModule::clone(CloneContext* ctx) const {
FixModule* module(new FixModule);
for (auto& it : mInputs) {
VARP v = ctx->getOrClone(it.first);
module->mInputs.push_back(std::make_pair(v, it.second));
}
for (auto& it : mOutput) {
VARP v = ctx->getOrClone(it);
module->mOutput.push_back(v);
}
return this->cloneBaseTo(ctx, module);
}
} // namespace Express
} // namespace MNN

View File

@ -8,9 +8,9 @@
#ifndef FixModule_hpp
#define FixModule_hpp
#include "Module.hpp"
#include <MNN/expr/Module.hpp>
namespace MNN {
namespace Train {
namespace Express {
class FixModule : public Module {
public:
@ -20,10 +20,14 @@ public:
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
virtual void onClearCache() override;
private:
FixModule() = default;
Module* clone(CloneContext* ctx) const override;
std::vector<std::pair<Express::VARP, Express::Dimensionformat>> mInputs;
std::vector<Express::VARP> mOutput;
};
} // namespace Train
} // namespace Express
} // namespace MNN
#endif

112
express/module/IfModule.cpp Normal file
View File

@ -0,0 +1,112 @@
//
// IfModule.cpp
// MNN
//
// Created by MNN on 2020/09/01.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "IfModule.hpp"
#include "MNN_generated.h"
namespace MNN {
namespace Express {
static int _findPos(const std::vector<std::string>& names, const std::string& key) {
for (int i=0; i<names.size(); ++i) {
if (names[i] == key) {
return i;
}
}
return -1;
}
std::vector<Express::VARP> IfModule::onForward(const std::vector<Express::VARP>& inputs) {
std::vector<Express::VARP> outputs(mOutputFromElse.size());
MNN_ASSERT(mOutputFromThen.size() == mOutputFromElse.size());
if (inputs[0]->readMap<int>()[0] > 0) {
std::vector<Express::VARP> subInputs(mInputForThen.size());
for (auto& p : mInputForThen) {
subInputs[p.first] = inputs[p.second];
}
auto subOutputs = mThen->onForward(subInputs);
for (int i=0; i<mOutputFromThen.size(); ++i) {
outputs[i] = subOutputs[mOutputFromThen[i]];
}
} else {
std::vector<Express::VARP> subInputs(mInputForElse.size());
for (auto& p : mInputForElse) {
subInputs[p.first] = inputs[p.second];
}
auto subOutputs = mElse->onForward(subInputs);
for (int i=0; i<mOutputFromElse.size(); ++i) {
outputs[i] = subOutputs[mOutputFromElse[i]];
}
}
return outputs;
}
IfModule* IfModule::create(const Op* op, const std::map<std::string, SubGraph>& subGraph) {
auto module = new IfModule;
auto ifParam = op->main_as_IfParam();
auto& thenG = subGraph.find(ifParam->then_graph()->str())->second;
auto& elseG = subGraph.find(ifParam->else_graph()->str())->second;
module->mElse = elseG.m;
module->mThen = thenG.m;
if (nullptr != op->name()) {
module->setName(op->name()->str());
}
/** Compute map index
std::vector<std::pair<int, int>> mInputForThen;
// First mElse' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForElse;
std::vector<int> mOutputFromThen;
std::vector<int> mOutputFromElse;
*/
// Map Inputs
for (int i=0; i<ifParam->aliases_inputs()->size(); ++i) {
auto index = i;
auto data = ifParam->aliases_inputs()->GetAs<StringVec>(i);
if (nullptr == data->data()) {
continue;
}
for (int s=0; s<data->data()->size(); ++s) {
auto name = data->data()->GetAsString(s)->str();
auto thenPos = _findPos(thenG.inputs, name);
if (thenPos >= 0) {
module->mInputForThen.emplace_back(std::make_pair(thenPos, i));
}
auto elsePos = _findPos(elseG.inputs, name);
if (elsePos >= 0) {
module->mInputForElse.emplace_back(std::make_pair(elsePos, i));
}
}
}
// Map outputs
auto output = ifParam->aliases_outputs();
module->mOutputFromThen.resize(output->size());
module->mOutputFromElse.resize(output->size());
for (int i=0; i<output->size(); ++i) {
auto data = output->GetAs<StringVec>(i);
MNN_ASSERT(data->data()->size() == 2);
auto thenPos = _findPos(thenG.outputs, data->data()->GetAsString(0)->str());
MNN_ASSERT(thenPos >= 0);
auto elsePos = _findPos(elseG.outputs, data->data()->GetAsString(1)->str());
module->mOutputFromThen[i] = thenPos;
module->mOutputFromElse[i] = elsePos;
}
return module;
}
Module* IfModule::clone(CloneContext* ctx) const {
IfModule* module(new IfModule);
module->mInputForThen = mInputForThen;
module->mInputForElse = mInputForElse;
module->mOutputFromThen = mOutputFromThen;
module->mOutputFromElse = mOutputFromElse;
module->mThen.reset(mThen->clone(ctx));
module->mElse.reset(mElse->clone(ctx));
return this->cloneBaseTo(ctx, module);
}
} // namespace Express
} // namespace MNN

View File

@ -0,0 +1,43 @@
//
// IfModule.hpp
// MNN
//
// Created by MNN on 2020/09/01.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef IfModule_hpp
#define IfModule_hpp
#include <MNN/expr/Module.hpp>
namespace MNN {
namespace Express {
class IfModule : public Module {
public:
virtual ~ IfModule() {
// Do nothing
}
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
static IfModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
private:
IfModule(){}
Module* clone(CloneContext* ctx) const override;
// First mThen' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForThen;
// First mElse' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForElse;
std::vector<int> mOutputFromThen;
std::vector<int> mOutputFromElse;
std::shared_ptr<Module> mThen;
std::shared_ptr<Module> mElse;
};
}
}
#endif /* IfModule_hpp */

182
express/module/Module.cpp Normal file
View File

@ -0,0 +1,182 @@
//
// Module.cpp
// MNN
//
// Created by MNN on 2019/11/25.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <MNN/expr/Module.hpp>
#include <MNN/expr/ExprCreator.hpp>
#include "FixModule.hpp"
#include "PipelineModule.hpp"
#include "core/FileLoader.hpp"
namespace MNN {
namespace Express {
class EmptyModule : public Module {
public:
EmptyModule(const std::vector<Express::VARP>& parameters) {
for (auto p : parameters) {
addParameter(p);
}
}
virtual ~EmptyModule() {
// Do nothing
}
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override {
return {};
}
protected:
EmptyModule() = default;
Module* clone(Module::CloneContext* ctx) const override {
EmptyModule* module(new EmptyModule);
return this->cloneBaseTo(ctx, module);
}
};
Module* Module::createEmpty(const std::vector<Express::VARP>& parameters) {
return new EmptyModule(parameters);
}
Express::VARP Module::forward(Express::VARP input) {
return this->onForward({input})[0];
}
std::vector<Express::VARP> Module::parameters() const {
std::vector<Express::VARP> result;
_collectParameters(result);
return result;
}
bool Module::loadParameters(const std::vector<Express::VARP>& parameters) {
std::vector<Express::VARP> result;
_collectParameters(result);
if (parameters.empty() || parameters.size() != result.size()) {
MNN_ERROR("Error parameters, empty or parameter size not match \n");
return false;
}
for (int i=0; i<parameters.size(); ++i) {
if (nullptr != result[i].get()) {
// Check Origin parameter's size
auto dstInfo = result[i]->getInfo();
auto srcInfo = parameters[i]->getInfo();
if (dstInfo->dim.size() != srcInfo->dim.size() || dstInfo->order != srcInfo->order) {
MNN_ERROR("Error parameters %d, dim size or order not match \n", i);
return false;
}
if (dstInfo->size != srcInfo->size || dstInfo->type != srcInfo->type) {
MNN_ERROR("Error parameters %d, size or type not match \n", i);
return false;
}
}
Variable::replace(result[i], parameters[i]);
}
return true;
}
void Module::setIsTraining(const bool isTraining) {
mIsTraining = isTraining;
for (auto c : mChildren) {
c->setIsTraining(isTraining);
}
}
bool Module::getIsTraining() {
return mIsTraining;
}
void Module::registerModel(const std::vector<std::shared_ptr<Module>>& children) {
mChildren.insert(mChildren.begin(), children.begin(), children.end());
}
int Module::addParameter(VARP parameter) {
auto res = mParameters.size();
mParameters.emplace_back(parameter);
return (int)res;
}
void Module::setParameter(Express::VARP parameter, int index) {
if (index < 0 || index >= mParameters.size()) {
MNN_ERROR("Module error: index out of range: %d - %d:\n", index, (int)mParameters.size());
return;
}
mParameters[index] = parameter;
}
void Module::_collectParameters(std::vector<Express::VARP>& result) const {
for (auto p : mParameters) {
result.push_back(p);
}
for (auto c : mChildren) {
c->_collectParameters(result);
}
}
void Module::clearCache() {
for (auto c : mChildren) {
c->clearCache();
}
this->onClearCache();
}
Module* Module::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const char* fileName, bool dynamic) {
AutoStorage<uint8_t> buffer;
{
FileLoader loader(fileName);
if (!loader.valid()) {
MNN_ERROR("Error for open %s\n", fileName);
return {};
}
loader.read();
if (!loader.valid()) {
return {};
}
loader.merge(buffer);
if (buffer.get() == nullptr) {
return {};
}
}
return load(inputs, outputs, buffer.get(), buffer.size(), dynamic);
}
Module* Module::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic) {
return PipelineModule::load(inputs, outputs, buffer, length, dynamic);
}
EXPRP Module::CloneContext::getOrClone(EXPRP expr) {
auto it = mExprMap.find(expr.get());
if (it == mExprMap.end()) {
// EXPRP replica = expr->clone(shareParams);
// TODO(hjchen2): Clone expr.
EXPRP replica = expr;
it = mExprMap.emplace(expr.get(), replica).first;
}
return it->second;
}
VARP Module::CloneContext::getOrClone(VARP var) {
auto it = mVarMap.find(var.get());
if (it != mVarMap.end()) {
// TODO(hjchen2): Clone variable.
VARP replica = var;
it = mVarMap.emplace(var.get(), replica).first;
}
return it->second;
}
Module* Module::clone(const Module* module, const bool shareParams) {
CloneContext context(shareParams);
return module->clone(&context);
}
Module* Module::cloneBaseTo(CloneContext* ctx, Module* module) const {
for (const Express::VARP& var : mParameters) {
module->mParameters.push_back(ctx->getOrClone(var));
}
module->mIsTraining = mIsTraining;
module->mName = mName;
module->mType = mType;
return module;
}
} // namespace Express
} // namespace MNN

View File

@ -6,9 +6,11 @@
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "NN.hpp"
#include <MNN/expr/NN.hpp>
#include "Distributions.hpp"
#include "FixModule.hpp"
#include "WhileModule.hpp"
#include "IfModule.hpp"
#include "Initializer.hpp"
#include "MNN_generated.h"
#include "RandomGenerator.hpp"
@ -17,7 +19,7 @@
using namespace MNN::Express;
namespace MNN {
namespace Train {
namespace Express {
static VARP _activate(VARP x, NN::ActivationFunctionType type) {
switch (type) {
case NN::None:
@ -58,6 +60,14 @@ public:
}
private:
DropoutModule() = default;
Module* clone(CloneContext* ctx) const override {
DropoutModule* module(new DropoutModule);
module->mDropRatio = mDropRatio;
return this->cloneBaseTo(ctx, module);
}
float mDropRatio;
};
@ -80,8 +90,8 @@ public:
mRunningVariance = _Const(bnPa->varData()->data(), {1, mChannels, 1, 1}, NCHW);
addParameter(mScale);
addParameter(mBias);
addParameter(mRunningVariance);
addParameter(mRunningMean);
mRunningVariancePos = addParameter(mRunningVariance);
mRunningMeanPos = addParameter(mRunningMean);
mReductionDims = {0, 2, 3};
setType("BatchNorm");
}
@ -110,8 +120,8 @@ public:
addParameter(mScale);
addParameter(mBias);
addParameter(mRunningVariance);
addParameter(mRunningMean);
mRunningVariancePos = addParameter(mRunningVariance);
mRunningMeanPos = addParameter(mRunningMean);
setType("BatchNorm");
}
@ -156,9 +166,8 @@ public:
mRunningVariance = _Const(mMomentum) * mRunningVariance + _Const(1 - mMomentum) * sampleVar;
outputData->setName(name());
outputData = _Convert(outputData, dimFormat);
Variable::prepareCompute({inputs[0], outputData, mRunningMean, mRunningVariance});
mRunningMean.fix(Express::VARP::CONSTANT);
mRunningVariance.fix(Express::VARP::CONSTANT);
setParameter(mRunningMean, mRunningMeanPos);
setParameter(mRunningVariance, mRunningVariancePos);
return {outputData};
}
auto rStd = _Const(1.0f) / _Sqrt(mRunningVariance + _Const(mEps));
@ -180,12 +189,31 @@ public:
}
private:
BatchNormModule() = default;
Module* clone(CloneContext* ctx) const override {
BatchNormModule* module(new BatchNormModule);
module->mMomentum = mMomentum;
module->mEps = mEps;
module->mScale = ctx->getOrClone(mScale);
module->mBias = ctx->getOrClone(mBias);
module->mRunningMean = ctx->getOrClone(mRunningMean);
module->mRunningVariance = ctx->getOrClone(mRunningVariance);
module->mRunningMeanPos = mRunningMeanPos;
module->mRunningVariancePos = mRunningVariancePos;
module->mChannels = mChannels;
module->mReductionDims = mReductionDims;
return this->cloneBaseTo(ctx, module);
}
float mMomentum = 0.99;
float mEps = 1e-5;
VARP mScale = nullptr;
VARP mBias = nullptr;
VARP mRunningMean = nullptr;
VARP mRunningVariance = nullptr;
int mRunningMeanPos = -1;
int mRunningVariancePos = -1;
int mChannels;
std::vector<int> mReductionDims;
};
@ -246,7 +274,18 @@ public:
tempOutput->setName(name());
return {tempOutput};
}
private:
ConvModule() = default;
Module* clone(CloneContext* ctx) const override {
ConvModule* module(new ConvModule);
module->mParameter = mParameter;
module->mParameter.weight = ctx->getOrClone(mParameter.weight);
module->mParameter.bias = ctx->getOrClone(mParameter.bias);
return this->cloneBaseTo(ctx, module);
}
NN::ConvParameters mParameter;
};
static std::tuple<VARP, VARP, int> _initParameters(const NN::ConvOption& option, bool hasBias,
@ -533,7 +572,23 @@ public:
}
private:
const NN::ConvOption mOption;
ConvOctaveModule() = default;
Module* clone(CloneContext* ctx) const override {
ConvOctaveModule* module(new ConvOctaveModule);
module->mOption = mOption;
module->mLLW = ctx->getOrClone(mLLW);
module->mLHW = ctx->getOrClone(mLHW);
module->mHLW = ctx->getOrClone(mHLW);
module->mHHW = ctx->getOrClone(mHHW);
module->mLBias = ctx->getOrClone(mLBias);
module->mHBias = ctx->getOrClone(mHBias);
module->mSplitInput = mSplitInput;
module->mGroup = mGroup;
return this->cloneBaseTo(ctx, module);
}
NN::ConvOption mOption;
VARP mLLW;
VARP mLHW;
VARP mHLW;
@ -555,7 +610,7 @@ Module* NN::ConvOctave(const ConvParameters& parameters,
module->setName(parameters.name);
return module;
}
Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr) {
Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr, const std::map<std::string, SubGraph>& subgraphs) {
if (nullptr == expr->get()) {
return nullptr;
}
@ -565,6 +620,12 @@ Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr) {
if (expr->get()->type() == OpType_Dropout) {
return new DropoutModule(0.3f);
}
if (expr->get()->type() == OpType_While) {
return WhileModule::create(expr->get(), subgraphs);
}
if (expr->get()->type() == OpType_If) {
return IfModule::create(expr->get(), subgraphs);
}
return nullptr;
}
@ -622,6 +683,9 @@ public:
mLimitScale = _Scalar<float>(1.0f / limit);
mClampValue = _Scalar<float>(limit);
mInputScalePos = addParameter(mInputScale);
mOutputScalePos = addParameter(mOutputScale);
setType("ConvBNReluFused");
}
@ -632,31 +696,16 @@ public:
tempX = _Convert(tempX, NCHW);
}
auto originX = tempX;
VARP scale;
if (mFeatureScaleStatMethod == NN::PerTensor) {
scale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
} else {
auto originSize = originX->getInfo()->size;
auto batch = originX->getInfo()->dim[0];
auto channel = originX->getInfo()->dim[1];
if (originSize / batch / channel < 10) {
// Too small data
//MNN_PRINT("%d - %d - %d\n", originSize, batch, channel);
std::vector<int> dims = {1, channel, 1, 1};
auto dimVar = _Const(dims.data(), {4}, NCHW, halide_type_of<int32_t>());
auto singleScale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
scale = _Fill(dimVar, singleScale);
} else {
//MNN_PRINT("%d - %d - %d\n", originSize, batch, channel);
scale = _Maximum(_ReduceMax(_Abs(tempX), {0, 2, 3}, true), _Scalar<float>(0.0001f)) * mLimitScale;
}
}
scale.fix(VARP::CONSTANT);
VARP scale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
if (useScale == nullptr) {
tempX = _Round(tempX * _Reciprocal(scale)) * scale;
} else {
tempX = _Round(tempX * _Reciprocal(useScale)) * useScale;
}
// Break the grad by use cast
tempX = _Cast<float>(tempX);
// Move grad from tempX to originX
tempX = _Convert(tempX + _ZeroGrad(originX), originFormat);
return std::make_pair(tempX, scale);
}
@ -684,18 +733,16 @@ public:
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override {
VARP res;
if (getIsTraining()) {
Variable::prepareCompute({inputs[0]});
auto x = _Convert(inputs[0], NCHW);
// simulate weight quant
auto weightScale = _Maximum(_ReduceMax(_Abs(mWeight), {1, 2, 3}, true), _Scalar<float>(1E-6)) * mLimitScale;
weightScale.fix(VARP::CONSTANT);
auto weightTemp = _Round(mWeight * _Reciprocal(weightScale)) * weightScale;
weightTemp = weightTemp + _ZeroGrad(mWeight);
// simulate input quant to get original input scale
auto inputPair = fakeQuantFeature(x);
mInputScale = updateScale(mInputScale, inputPair.second);
mInputScale.fix(VARP::CONSTANT);
setParameter(mInputScale, mInputScalePos);
// simulate output quant to get original output scale
res = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride,
@ -709,10 +756,9 @@ public:
res = _activate(res, mActivation);
Variable::prepareCompute({conv, res});
auto outputPair = fakeQuantFeature(res);
mOutputScale = updateScale(mOutputScale, outputPair.second);
mOutputScale.fix(VARP::CONSTANT);
setParameter(mOutputScale, mOutputScalePos);
res = outputPair.first;
} else {
if (nullptr == mInputScale) {
@ -725,6 +771,7 @@ public:
auto x = _Convert(inputs[0], NCHW);
auto inputPair = fakeQuantFeature(x);
mInputScale = inputPair.second;
setParameter(mInputScale, mInputScalePos);
inputPair.first.fix(VARP::CONSTANT);
auto simuRes = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride,
@ -737,6 +784,7 @@ public:
Variable::prepareCompute({simuRes});
auto outputPair = fakeQuantFeature(simuRes);
mOutputScale = outputPair.second;
setParameter(mOutputScale, mOutputScalePos);
outputPair.first.fix(VARP::CONSTANT);
}
@ -772,12 +820,7 @@ public:
{
std::vector<int> dims = {x->getInfo()->dim[1]};
auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>());
VARP channelScale;
if (mFeatureScaleStatMethod == NN::PerTensor) {
channelScale = _Reciprocal(_Fill(dimVar, mInputScale));
} else {
channelScale = _Reciprocal(mInputScale);
}
VARP channelScale = _Reciprocal(_Fill(dimVar, mInputScale));
x = _FloatToInt8(x, channelScale, -127, 127);// TODO add clamp
}
@ -824,12 +867,7 @@ public:
{
std::vector<int> dims = {res->getInfo()->dim[1]};
auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>());
VARP channelScale;
if (mFeatureScaleStatMethod == NN::PerTensor) {
channelScale = _Fill(dimVar, mOutputScale);
} else {
channelScale = mOutputScale;
}
VARP channelScale = _Fill(dimVar, mOutputScale);
res = _Int8ToFloat(res, channelScale);
}
}
@ -838,6 +876,34 @@ public:
}
private:
ConvBNReluFusedModule() = default;
Module* clone(CloneContext* ctx) const override {
ConvBNReluFusedModule* module(new ConvBNReluFusedModule);
module->mConvParameter = mConvParameter;
module->mConvParameter.weight = ctx->getOrClone(mConvParameter.weight);
module->mConvParameter.bias = ctx->getOrClone(mConvParameter.bias);
module->mOption = mOption;
module->mGroup = mGroup;
module->mWeight = ctx->getOrClone(mWeight);
module->mBias = ctx->getOrClone(mBias);
module->mActivation = mActivation;
module->mLimitScale = ctx->getOrClone(mLimitScale);
module->mInputScalePos = mInputScalePos;
module->mOutputScalePos = mOutputScalePos;
module->mInputScale = ctx->getOrClone(mInputScale);
module->mOutputScale = ctx->getOrClone(mOutputScale);
module->mClampValue = ctx->getOrClone(mClampValue);
module->mMomentum = mMomentum;
module->mFeatureScaleStatMethod = mFeatureScaleStatMethod;
module->mScaleUpdateMethod = mScaleUpdateMethod;
if (mBatchNorm) {
module->mBatchNorm.reset(mBatchNorm->clone(ctx));
module->registerModel({module->mBatchNorm});
}
return this->cloneBaseTo(ctx, module);
}
NN::ConvParameters mConvParameter;
NN::ConvOption mOption;
int mGroup;
@ -846,6 +912,8 @@ private:
NN::ActivationFunctionType mActivation = NN::ActivationFunctionType::None;
std::shared_ptr<Module> mBatchNorm = nullptr;
VARP mLimitScale;
int mInputScalePos = -1;
int mOutputScalePos = -1;
VARP mInputScale = nullptr;
VARP mOutputScale = nullptr;
VARP mClampValue;
@ -870,5 +938,5 @@ Module* NN::ConvInt8(const ConvParameters& para, int bits, NN::FeatureScaleStatM
return new ConvBNReluFusedModule({conv}, featureMethod, method, bits);
}
} // namespace Train
} // namespace Express
} // namespace MNN

View File

@ -0,0 +1,761 @@
//
// PipelineModule.cpp
// MNN
//
// Created by MNN on 2020/01/09.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "PipelineModule.hpp"
#include "MNN_generated.h"
#include <set>
#include <vector>
#include "StaticModule.hpp"
#include "IfModule.hpp"
#include "WhileModule.hpp"
using namespace MNN::Express;
namespace MNN {
namespace Express {
//#define DYNAMIC
#define PIPELINE_MODULE "_pipeline_module__"
class ExprModule : public Module {
public:
ExprModule(EXPRP expr) {
mExpr = expr;
setName(expr->name());
mInputs = expr->inputs();
auto op = mExpr->get();
if (op) {
auto typeName = EnumNameOpType(op->type());
setType(typeName);
}
for (int i = 0; i < mInputs.size(); ++i) {
auto inputExpr = mInputs[i]->expr().first;
if (inputExpr->get() != nullptr) {
mInputs[i] = nullptr;
mInputIndexes.emplace_back(i);
continue;
}
switch (inputExpr->inputType()) {
case VARP::INPUT:
mInputs[i] = nullptr;
mInputIndexes.emplace_back(i);
break;
case VARP::CONSTANT:
break;
case VARP::TRAINABLE:
addParameter(mInputs[i]);
break;
default:
break;
}
}
}
virtual std::vector<VARP> onForward(const std::vector<VARP>& inputs) override {
MNN_ASSERT(mInputIndexes.size() == inputs.size());
if (nullptr == mExpr->get()) {
return {Variable::create(mExpr)};
}
std::vector<VARP> tempInputs = mInputs;
for (int i = 0; i < inputs.size(); ++i) {
tempInputs[mInputIndexes[i]] = inputs[i];
}
std::vector<VARP> outputVars;
auto newExpr = Expr::create(mExpr->extra(), std::move(tempInputs), mExpr->outputSize());
newExpr->setName(mExpr->name());
for (int i = 0; i < mExpr->outputSize(); ++i) {
outputVars.emplace_back(Variable::create(newExpr, i));
}
return outputVars;
}
const std::vector<int>& inputIndexes() const {
return mInputIndexes;
}
private:
Module* clone(CloneContext* ctx) const override {
ExprModule* module(new ExprModule(ctx->getOrClone(mExpr)));
for (const VARP& var : mInputs) {
module->mInputs.push_back(ctx->getOrClone(var));
}
module->mInputIndexes = mInputIndexes;
return this->cloneBaseTo(ctx, module);
}
EXPRP mExpr;
std::vector<VARP> mInputs;
std::vector<int> mInputIndexes;
};
Module* PipelineModule::extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph) {
std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(EXPRP)> transformFunction;
if (fortrain) {
transformFunction =
[&subGraph](EXPRP source) {
if (source->get() == nullptr) {
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
}
std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
if (nullptr != m) {
m->setName(source->name());
return std::make_pair(std::vector<int>{}, m);
}
auto convExtracted = NN::Utils::ExtractConvolution(source);
if (convExtracted.weight == nullptr) {
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
}
std::shared_ptr<Module> module(NN::Conv(convExtracted));
module->setName(source->name());
return std::make_pair(std::vector<int>{0}, module);
};
} else {
transformFunction = [&subGraph](EXPRP source) {
if (source->get() == nullptr) {
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
}
std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
if (nullptr != m) {
m->setName(source->name());
return std::make_pair(std::vector<int>{}, m);
}
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
};
}
return new PipelineModule(inputs, outputs, transformFunction);
}
PipelineModule::PipelineModule(std::vector<VARP> inputs, std::vector<VARP> outputs, const Transformer& transformFunction) {
setType(PIPELINE_MODULE);
std::vector<EXPRP> executeOrder;
std::set<EXPRP> inputExpr;
for (auto v : inputs) {
inputExpr.insert(v->expr().first);
}
for (auto output : outputs) {
Expr::visit(output->expr().first,
[&executeOrder, &inputExpr](EXPRP expr) {
if (expr->visited()) {
return false;
}
if (inputExpr.find(expr)!= inputExpr.end()) {
expr->setVisited(true);
executeOrder.emplace_back(expr);
return false;
}
return true;
},
[&executeOrder](EXPRP expr) {
//FUNC_PRINT_ALL(var->name().c_str(), s);
if (!expr->visited()) {
executeOrder.emplace_back(expr);
expr->setVisited(true);
}
return true;
});
}
for (auto expr : executeOrder) {
expr->setVisited(false);
}
// Set Indexes
std::map<EXPRP, int> indexes;
int currentIndexes = 0;
for (auto expr : executeOrder) {
indexes[expr] = currentIndexes;
currentIndexes += expr->outputSize();
}
std::set<EXPRP> inputSets;
mInputIndexes.clear();
mStackSize = currentIndexes;
for (auto v : inputs) {
auto inputExpr = v->expr();
mInputIndexes.emplace_back(indexes[inputExpr.first] + inputExpr.second);
inputSets.insert(inputExpr.first);
}
// Create All SubModule
for (auto expr : executeOrder) {
if (inputSets.find(expr) != inputSets.end()) {
continue;
}
std::pair<std::vector<int>, std::shared_ptr<Module> > moduleResult;
bool extracted = false;
if (!transformFunction) {
moduleResult = std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
} else {
moduleResult = transformFunction(expr);
}
if (moduleResult.second == nullptr) {
std::shared_ptr<Module> module(new ExprModule(expr));
moduleResult.first = ((ExprModule*)module.get())->inputIndexes();
moduleResult.second = module;
} else {
extracted = true;
}
auto subInputs = expr->inputs();
auto& exprInputIndexes = moduleResult.first;
std::vector<int> inputIndexes;
if (exprInputIndexes.empty() && extracted) {
inputIndexes.resize(subInputs.size());
for (int i = 0; i < inputIndexes.size(); ++i) {
auto inputExpr = subInputs[i]->expr();
inputIndexes[i] = indexes[inputExpr.first] + inputExpr.second;
}
} else {
inputIndexes.resize(exprInputIndexes.size());
for (int i = 0; i < inputIndexes.size(); ++i) {
auto inputExpr = subInputs[exprInputIndexes[i]]->expr();
inputIndexes[i] = indexes[inputExpr.first] + inputExpr.second;
}
}
std::vector<int> outputIndexes(expr->outputSize());
for (int i = 0; i < outputIndexes.size(); ++i) {
outputIndexes[i] = indexes[expr] + i;
}
mSubModules.emplace_back(std::make_tuple(moduleResult.second, inputIndexes, outputIndexes));
registerModel({moduleResult.second});
}
mOutputIndexes.clear();
for (auto output : outputs) {
auto outputExpr = output->expr();
mOutputIndexes.emplace_back(indexes[outputExpr.first] + outputExpr.second);
}
}
bool PipelineModule::turnQuantize(Module* module, const int bit, NN::FeatureScaleStatMethod featureScaleStatMethod, NN::ScaleUpdateMethod scaleUpdateMethod) {
if (nullptr == module || module->type() != PIPELINE_MODULE) {
MNN_ERROR("Invalide module for quantized\n");
return false;
}
((PipelineModule*)module)->toTrainQuant(bit, featureScaleStatMethod, scaleUpdateMethod);
return true;
}
std::vector<int> PipelineModule::countOutputReference(std::vector<int> outputIndices) {
MNN_ASSERT(outputIndices.size() > 0);
std::vector<int> countResult(outputIndices.size(), 0);
for (int i = 0; i < mSubModules.size(); i++) {
auto &m = mSubModules[i];
auto& theModule = std::get<0>(m);
auto name = theModule->name();
auto &inputIndices = std::get<1>(m);
for (int j = 0; j < inputIndices.size(); j++) {
int index = inputIndices[j];
for (int k = 0; k < countResult.size(); k++) {
if (index == outputIndices[k]) {
countResult[k]++;
}
}
}
}
return countResult;
}
void PipelineModule::toTrainQuant(const int bits, NN::FeatureScaleStatMethod featureScaleStatMethod,
NN::ScaleUpdateMethod scaleUpdateMethod) {
std::vector<int> needEraseIndices;
for (int i = 0; i < mSubModules.size(); i++) {
auto& m = mSubModules[i];
auto& theModule = std::get<0>(m);
auto moduleType = theModule->type();
//auto& inputIndices = std::get<1>(m);
auto& outputIndices = std::get<2>(m);
if (moduleType == "Conv" && i < mSubModules.size() - 1) {
auto& p1 = mSubModules[i+1];
auto p1Module = std::get<0>(p1);
auto& p1ModuleType = p1Module->type();
auto& p1InputIndices = std::get<1>(p1);
auto& p1OutputIndices = std::get<2>(p1);
auto convOutputCount = countOutputReference(outputIndices);
bool convSingleOutputReference = ((outputIndices.size() == 1) && (convOutputCount[0] == 1));
// only conv
if ((!convSingleOutputReference) || (p1ModuleType == "Conv") ||
(p1ModuleType != "BatchNorm" && p1ModuleType != "ReLU" && p1ModuleType != "ReLU6")) {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
continue;
}
// conv + bn + ?
if (p1ModuleType == "BatchNorm") {
bool convBnConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
if (!convBnConnected) {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
continue;
}
// last conv + bn
if (i == mSubModules.size() - 2) {
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
}
// maybe there is a relu or relu6 after conv + bn
auto& p2 = mSubModules[i+2];
auto& p2Module = std::get<0>(p2);
auto p2ModuleType = p2Module->type();
auto& p2InputIndices = std::get<1>(p2);
auto& p2OutputIndices = std::get<2>(p2);
auto bnOutputCount = countOutputReference(p1OutputIndices);
bool bnSingleOutputReference = ((p1OutputIndices.size() == 1) && (bnOutputCount[0] == 1));
// only conv + bn
if ((!bnSingleOutputReference) || (p2ModuleType != "ReLU" && p2ModuleType != "ReLU6")) {
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
} else { // conv + bn + relu or conv + bn + relu6
bool convBnReluConnected = ((bnSingleOutputReference) && (p2InputIndices.size() == 1) && (p2InputIndices[0] == p1OutputIndices[0]));
if (!convBnReluConnected) {
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
}
theModule.reset(NN::ConvBNReluFused({theModule, p1Module, p2Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p2OutputIndices;
needEraseIndices.emplace_back(i + 1);
needEraseIndices.emplace_back(i + 2);
continue;
}
}
// conv + relu or conv + relu6
if (p1ModuleType == "ReLU" || p1ModuleType == "ReLU6") {
bool convReluConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
if (!convReluConnected) {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
continue;
}
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
}
}
if (i == mSubModules.size() - 1 && moduleType == "Conv") {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
}
}
// erase useless submodules
const int eraseSize = needEraseIndices.size();
int alreadyErasedCount = 0;
for (int i = 0; i < eraseSize; i++) {
auto position = needEraseIndices[i] - alreadyErasedCount;
auto type = std::get<0>(mSubModules[position])->type();
MNN_ASSERT(type == "BatchNorm" || type == "ReLU" || type == "ReLU6");
mSubModules.erase(mSubModules.begin() + position);
alreadyErasedCount++;
}
}
std::vector<VARP> PipelineModule::onForward(const std::vector<VARP>& inputs) {
std::vector<VARP> mStack(mStackSize);
for (int i = 0; i < mInputIndexes.size(); ++i) {
mStack[mInputIndexes[i]] = inputs[i];
}
for (int index = 0; index < mSubModules.size(); ++index) {
auto& m = mSubModules[index];
std::vector<VARP> tempInputs(std::get<1>(m).size());
for (int i = 0; i < tempInputs.size(); ++i) {
tempInputs[i] = mStack[std::get<1>(m)[i]];
MNN_ASSERT(nullptr != tempInputs[i]);
}
std::vector<VARP> tempOutputs = std::get<0>(m)->onForward(tempInputs);
MNN_ASSERT(tempOutputs.size() == std::get<2>(m).size());
for (int i = 0; i < tempOutputs.size(); ++i) {
mStack[std::get<2>(m)[i]] = tempOutputs[i];
MNN_ASSERT(nullptr != tempOutputs[i]);
}
}
std::vector<VARP> outputs(mOutputIndexes.size());
for (int i = 0; i < mOutputIndexes.size(); ++i) {
outputs[i] = mStack[mOutputIndexes[i]];
}
return outputs;
}
void PipelineModule::onClearCache() {
// Do nothing
}
static std::map<std::string, SubGraph> _createSubGraph(const MNN::Net* net, bool dynamic) {
std::map<std::string, SubGraph> subGraphMap;
auto subGraphs = net->subgraphs();
if (nullptr == subGraphs) {
return subGraphMap;
}
for (int i=0; i<subGraphs->size(); ++i) {
auto graph = subGraphs->GetAs<SubGraphProto>(i);
std::vector<std::string> subInputs;
std::vector<std::string> subOutputs;
if (nullptr != graph->inputs()) {
for (int v=0; v<graph->inputs()->size(); ++v) {
auto index = graph->inputs()->data()[v];
subInputs.emplace_back(graph->tensors()->GetAsString(index)->str());
}
}
for (int v=0; v<graph->outputs()->size(); ++v) {
auto index = graph->outputs()->data()[v];
subOutputs.emplace_back(graph->tensors()->GetAsString(index)->str());
}
// Pack to Net for loading
std::shared_ptr<Module> submodule;
{
std::unique_ptr<SubGraphProtoT> _tempInfo(graph->UnPack());
std::unique_ptr<NetT> _tempNet(new NetT);
_tempNet->oplists = std::move(_tempInfo->nodes);
_tempNet->tensorName = std::move(_tempInfo->tensors);
flatbuffers::FlatBufferBuilder builder(1024);
auto offset = Net::Pack(builder, _tempNet.get());
builder.Finish(offset);
if (dynamic) {
submodule.reset(PipelineModule::load(subInputs, subOutputs, (const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), dynamic));
} else {
submodule.reset(new StaticModule((const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), subInputs, subOutputs));
}
if (graph->name() != nullptr) {
submodule->setName(graph->name()->str());
}
}
auto key = graph->name()->str();
SubGraph subgraph;
subgraph.inputs = std::move(subInputs);
subgraph.outputs = std::move(subOutputs);
subgraph.m = submodule;
subGraphMap.insert(std::make_pair(key, subgraph));
}
return subGraphMap;
}
struct SubModuleInfo {
std::vector<int> opList;
std::vector<int> inputs;;
std::vector<int> outputs;
std::vector<uint8_t> tensorMask;
};
static std::vector<SubModuleInfo> _createSubModuleInfo(const MNN::Net* net, const std::set<int>& inputIndexes, const std::set<int>& outputIndexes) {
std::vector<SubModuleInfo> submodule;
SubModuleInfo current;
std::vector<int> inputOps;
// Seperate the graph to serveral submodule
for (int i=0; i<net->oplists()->size(); ++i) {
auto op = net->oplists()->GetAs<Op>(i);
// Collect Input
if (op->type() == OpType_Input) {
inputOps.emplace_back(i);
continue;
}
if (op->type() == OpType_If || op->type() == OpType_While) {
if (current.opList.size() > 0) {
// Not empty
submodule.emplace_back(std::move(current));
}
SubModuleInfo controlOp;
controlOp.opList = {i};
submodule.emplace_back(std::move(controlOp));
continue;
}
current.opList.emplace_back(i);
}
if (!current.opList.empty()) {
submodule.emplace_back(std::move(current));
}
/**Compute All SubModule's inputs and outputs*/
// 0: not use, 1: input, 2: output, 3: mid, 4: valid output
for (int moduleIndex=0; moduleIndex < submodule.size(); ++moduleIndex) {
auto& m = submodule[moduleIndex];
if (1 == m.opList.size()) {
// Fast way to determine
auto op = net->oplists()->GetAs<Op>(m.opList[0]);
if (nullptr != op->inputIndexes()) {
m.inputs.resize(op->inputIndexes()->size());
::memcpy(m.inputs.data(), op->inputIndexes()->data(), m.inputs.size() * sizeof(int));
}
if (nullptr != op->outputIndexes()) {
m.outputs.resize(op->outputIndexes()->size());
::memcpy(m.outputs.data(), op->outputIndexes()->data(), m.outputs.size() * sizeof(int));
}
} else {
m.tensorMask = std::vector<uint8_t>(net->tensorName()->size(), 0);
auto& tensorMask = m.tensorMask;
for (auto opIndex : m.opList) {
auto op = net->oplists()->GetAs<Op>(opIndex);
if (nullptr != op->inputIndexes()) {
for (int v=0; v<op->inputIndexes()->size(); ++v) {
auto index = op->inputIndexes()->data()[v];
tensorMask[index] = tensorMask[index] | 1;
}
}
if (nullptr != op->outputIndexes()) {
for (int v=0; v<op->outputIndexes()->size(); ++v) {
auto index = op->outputIndexes()->data()[v];
tensorMask[index] = tensorMask[index] | 2;
}
}
}
for (int i=0; i<tensorMask.size(); ++i) {
if (0 == tensorMask[i]) {
continue;
}
if (1 == tensorMask[i]) {
m.inputs.emplace_back(i);
continue;
}
if (2 == tensorMask[i]) {
m.outputs.emplace_back(i);
continue;
}
if (3 == tensorMask[i]) {
if (outputIndexes.find(i) != outputIndexes.end()) {
m.outputs.emplace_back(i);
}
}
}
}
// Check if the module's input is valid
for (int i=0; i<m.inputs.size(); ++i) {
auto index = m.inputs[i];
if (inputIndexes.find(index) != inputIndexes.end()) {
continue;
}
bool find = false;
for (int sub=0; sub < moduleIndex; ++sub) {
for (auto out : submodule[sub].outputs) {
if (out == index) {
find = true;
break;
}
}
if (find) {
break;
}
}
if (find) {
continue;
}
// Find from module
for (int sub=0; sub < moduleIndex; ++sub) {
if (submodule[sub].tensorMask.empty()) {
continue;
}
if (submodule[sub].tensorMask[index] == 2) {
find = true;
break;
}
if (submodule[sub].tensorMask[index] == 3) {
submodule[sub].outputs.emplace_back(index);
submodule[sub].tensorMask[index] = 2;
find = true;
break;
}
}
MNN_ASSERT(find);
}
}
for (auto& m : submodule) {
m.tensorMask.clear();
}
return submodule;
}
static Module* _createSubModule(const MNN::Net* net, const SubModuleInfo& info, const std::map<std::string, SubGraph>& subs) {
if (1 == info.opList.size()) {
auto op = net->oplists()->GetAs<Op>(info.opList[0]);
if (OpType_If == op->type()) {
return IfModule::create(op, subs);
}
if (OpType_While == op->type()) {
return WhileModule::create(op, subs);
}
MNN_ASSERT(false);
}
std::unique_ptr<NetT> _tempNet(new NetT);
// Copy Tensor Name
_tempNet->tensorName.resize(net->tensorName()->size());
for (int i=0; i<net->tensorName()->size(); ++i) {
_tempNet->tensorName[i] = net->tensorName()->GetAsString(i)->str();
}
// Create Input node
std::vector<std::string> inputNames;
for (auto index : info.inputs) {
std::unique_ptr<OpT> inputOp(new OpT);
inputOp->outputIndexes = {index};
inputOp->type = OpType_Input;
inputOp->main.type = OpParameter_Input;
inputOp->main.value = new InputT;
inputOp->main.AsInput()->dims = {0, 0, -1, -1};
_tempNet->oplists.emplace_back(std::move(inputOp));
inputNames.emplace_back(_tempNet->tensorName[index]);
}
// Create compute node
for (auto opIndex : info.opList) {
std::unique_ptr<OpT> op(net->oplists()->GetAs<Op>(opIndex)->UnPack());
_tempNet->oplists.emplace_back(std::move(op));
}
// Get output names
std::vector<std::string> outputNames;
for (auto index : info.outputs) {
outputNames.emplace_back(_tempNet->tensorName[index]);
}
// Create Net Buffer
flatbuffers::FlatBufferBuilder builder(1024);
auto offset = Net::Pack(builder, _tempNet.get());
builder.Finish(offset);
_tempNet.reset();
return new StaticModule((const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), inputNames, outputNames);
}
Module* PipelineModule::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic) {
// Create Subgraph
auto net = GetNet(buffer);
auto subGraphs = net->subgraphs();
if (nullptr == net->oplists() || nullptr == net->tensorName()) {
MNN_ERROR("Invalid net, for null oplist or tensorName\n");
return nullptr;
}
if (!dynamic) {
if (nullptr == subGraphs) {
// Has no control flow, can just use static module
return new StaticModule(buffer, length, inputs, outputs);
}
}
auto subGraphMap = _createSubGraph(net, dynamic);
if (dynamic) {
// For dynamic mode
auto varMaps = Variable::loadMap(buffer, length);
std::vector<VARP> inputVars(inputs.size());
for (int i=0; i<inputs.size(); ++i) {
inputVars[i] = varMaps[inputs[i]];
}
std::vector<VARP> outputVars(outputs.size());
for (int i=0; i<outputs.size(); ++i) {
outputVars[i] = varMaps[outputs[i]];
}
return extract(inputVars, outputVars, false, subGraphMap);
}
std::set<int> inputIndexes;
std::set<int> outputIndexes;
std::map<std::string, int> inputsMap;
std::map<std::string, int> outputsMap;
for (int i=0; i<net->tensorName()->size(); ++i) {
auto tname = net->tensorName()->GetAsString(i)->str();
for (auto& s : inputs) {
if (tname == s) {
inputIndexes.emplace(i);
inputsMap.insert(std::make_pair(s, i));
break;
}
}
for (auto& s : outputs) {
if (tname == s) {
outputIndexes.emplace(i);
outputsMap.insert(std::make_pair(s, i));
break;
}
}
}
std::vector<int> inputIndexesVec(inputs.size());
for (int i=0; i<inputs.size(); ++i) {
inputIndexesVec[i] = inputsMap[inputs[i]];
}
std::vector<int> outputIndexesVec(outputs.size());
for (int i=0; i<outputs.size(); ++i) {
outputIndexesVec[i] = outputsMap[outputs[i]];
}
auto subModulesInfo = _createSubModuleInfo(net, inputIndexes, outputIndexes);
std::vector<std::shared_ptr<Module>> subModules(subModulesInfo.size());
for (int i=0; i<subModulesInfo.size(); ++i) {
subModules[i].reset(_createSubModule(net, subModulesInfo[i], subGraphMap));
}
auto result = new PipelineModule;
/**
Compute:
std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
std::vector<int> mInputIndexes;
std::vector<int> mOutputIndexes;
int mStackSize = 0;
*/
// Make Stack, first: origin, second: new
std::map<int, int> stackMap;
int stackIndex = 0;
for (auto& m : subModulesInfo) {
for (auto index : m.inputs) {
if (stackMap.find(index) == stackMap.end()) {
stackMap.insert(std::make_pair(index, stackIndex));
stackIndex++;
}
}
for (auto index : m.outputs) {
if (stackMap.find(index) == stackMap.end()) {
stackMap.insert(std::make_pair(index, stackIndex));
stackIndex++;
}
}
}
result->mStackSize = stackMap.size();
for (int i=0; i<subModulesInfo.size(); ++i) {
auto& info = subModulesInfo[i];
// Reindex stack index
std::vector<int> subInputs(info.inputs.size());
for (int i=0; i<info.inputs.size(); ++i) {
subInputs[i] = stackMap[info.inputs[i]];
}
std::vector<int> subOutputs(info.outputs.size());
for (int i=0; i<info.outputs.size(); ++i) {
subOutputs[i] = stackMap[info.outputs[i]];
}
result->mSubModules.emplace_back(std::make_tuple(subModules[i], subInputs, subOutputs));
}
for (int i=0; i<inputIndexesVec.size(); ++i) {
inputIndexesVec[i] = stackMap[inputIndexesVec[i]];
}
for (int i=0; i<outputIndexesVec.size(); ++i) {
outputIndexesVec[i] = stackMap[outputIndexesVec[i]];
}
result->mInputIndexes = std::move(inputIndexesVec);
result->mOutputIndexes = std::move(outputIndexesVec);
return result;
}
Module* PipelineModule::clone(CloneContext* ctx) const {
PipelineModule* module(new PipelineModule);
for (const auto& it : mSubModules) {
const std::shared_ptr<Module>& submodule = std::get<0>(it);
const std::vector<int>& input_indices = std::get<1>(it);
const std::vector<int>& output_indices = std::get<2>(it);
std::shared_ptr<Module> replica_submodule(submodule->clone(ctx));
module->mSubModules.push_back(
std::make_tuple(replica_submodule, input_indices, output_indices));
module->registerModel({replica_submodule});
}
module->mInputIndexes = mInputIndexes;
module->mOutputIndexes = mOutputIndexes;
module->mStackSize = mStackSize;
return this->cloneBaseTo(ctx, module);
}
} // namespace Express
} // namespace MNN

View File

@ -8,16 +8,20 @@
#ifndef PipelineModule_hpp
#define PipelineModule_hpp
#include "Module.hpp"
#include "NN.hpp"
#include <MNN/expr/Module.hpp>
#include <MNN/expr/NN.hpp>
#include <MNN/expr/ExprCreator.hpp>
namespace MNN {
namespace Train {
namespace Express {
class MNN_PUBLIC PipelineModule : public Module {
public:
typedef std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(Express::EXPRP)> Transformer;
static Module* extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain);
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic = false);
static Module* extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph = {});
static Module* extractOrigin(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain) {
return extract(inputs, outputs, fortrain);
}
static bool turnQuantize(Module* module, const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor, NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
void toTrainQuant(const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor,
NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
@ -26,14 +30,18 @@ public:
std::vector<int> countOutputReference(std::vector<int> outputIndices);
private:
PipelineModule(){}
PipelineModule(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs,
const Transformer& transformFunction = {});
Module* clone(CloneContext* ctx) const override;
std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
std::vector<Express::VARP> mStack;
std::vector<int> mInputIndexes;
std::vector<int> mOutputIndexes;
int mStackSize = 0;
};
} // namespace Train
} // namespace Express
} // namespace MNN
#endif

View File

@ -0,0 +1,186 @@
//
// StaticModule.cpp
// MNN
//
// Created by MNN on b'2020/09/10'.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "StaticModule.hpp"
#include <MNN/expr/ExprCreator.hpp>
#include <MNN/AutoTime.hpp>
#include "core/TensorUtils.hpp"
#include "core/Session.hpp"
#include <MNN/expr/Executor.hpp>
#include <MNN/AutoTime.hpp>
#include <MNN/expr/ExecutorScope.hpp>
namespace MNN {
namespace Express {
StaticModule::StaticModule(const void* buffer, size_t length, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool shapeFix) : mInputs(inputs), mOutputs(outputs) {
mShapeFix = shapeFix;
mOutputNumbers = (int)outputs.size();
/** Compute:
std::vector<int, int> mOutputFromTensor;
std::vector<int, int> mOutputFromInput;
*/
for (int i=0; i<outputs.size(); ++i) {
auto& t = outputs[i];
bool fromInput = false;
for (int j=0; j<inputs.size(); ++j) {
if (inputs[j] == t) {
fromInput = true;
mOutputFromInput.emplace_back(std::make_pair(i, j));
break;
}
}
if (fromInput) {
continue;
}
mOutputFromTensor.emplace_back(i);
}
if (mOutputFromTensor.empty()) {
return;
}
mNet.reset(Interpreter::createFromBuffer(buffer, length));
#ifdef MNN_EXPR_ENABLE_PROFILER
mNet->setSessionMode(Interpreter::Session_Debug);
#else
mNet->setSessionMode(Interpreter::Session_Release);
#endif
if (mShapeFix) {
mNet->setSessionMode(Interpreter::Session_Input_Inside);
} else {
mNet->setSessionMode(Interpreter::Session_Input_User);
}
auto rt = Express::ExecutorScope::Current()->getRuntime();
// TODO: Add Config
ScheduleConfig config;
config.numThread = 1;
config.type = rt.first.begin()->first;
config.saveTensors = outputs;
mSession = mNet->createSession(config, rt);
mInputTensors.resize(inputs.size());
for (int i=0; i<inputs.size(); ++i) {
mInputTensors[i] = mNet->getSessionInput(mSession, inputs[i].c_str());
}
mOutputTensors.resize(mOutputFromTensor.size());
for (int i=0; i<mOutputFromTensor.size(); ++i) {
mOutputTensors[i] = mNet->getSessionOutput(mSession, outputs[mOutputFromTensor[i]].c_str());
}
}
StaticModule:: ~ StaticModule() {
// Do nothing
}
std::vector<Express::VARP> StaticModule::onForward(const std::vector<Express::VARP>& inputs) {
AUTOTIME;
std::vector<Express::VARP> outputs(mOutputNumbers);
for (auto& iter : mOutputFromInput) {
outputs[iter.first] = inputs[iter.second];
}
if (mOutputFromTensor.empty()) {
return outputs;
}
MNN_ASSERT(inputs.size() == mInputTensors.size());
for (int i=0; i<inputs.size(); ++i) {
auto info = inputs[i]->getInfo();
mInputTensors[i]->buffer().type = info->type;
auto des = TensorUtils::getDescribe(mInputTensors[i]);
if (info->order == Express::NCHW) {
des->dimensionFormat = MNN_DATA_FORMAT_NCHW;
}
if (info->order == Express::NHWC) {
des->dimensionFormat = MNN_DATA_FORMAT_NHWC;
}
if (info->order == Express::NC4HW4) {
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
}
mNet->resizeTensor(mInputTensors[i], info->dim);
}
if (!mShapeFix) {
for (int i=0; i<inputs.size(); ++i) {
mInputTensors[i]->buffer().host = (uint8_t*)inputs[i]->readMap<void>();
}
// FIXME: Use Interpreter's API
mSession->setNeedResize();
}
mNet->resizeSession(mSession);
if (mShapeFix) {
for (int i=0; i<inputs.size(); ++i) {
// For Shape only usage input, don't alloc memory
if (nullptr != mInputTensors[i]->host<void>()) {
::memcpy(mInputTensors[i]->host<void>(), inputs[i]->readMap<void>(), mInputTensors[i]->size());
}
}
}
#ifdef MNN_EXPR_ENABLE_PROFILER
auto globalExecutor = ExecutorScope::Current();
Timer cost;
TensorCallBackWithInfo beforeCallBack = [&cost] (const std::vector<Tensor*>&, const OperatorInfo* info) {
cost.reset();
return true;
};
TensorCallBackWithInfo afterCallBack = [&cost, globalExecutor] (const std::vector<Tensor*>&, const OperatorInfo* info) {
auto costTimes = (float)cost.durationInUs() / 1000.0f;
globalExecutor->addOpCostTime(info->type(), costTimes);
globalExecutor->addOpFlops(info->type(), info->flops());
return true;
};
mNet->runSessionWithCallBackInfo(mSession, beforeCallBack, afterCallBack);
#else
mNet->runSession(mSession);
#endif
for (int i=0; i<mOutputTensors.size(); ++i) {
Express::Variable::Info info;
info.dim = mOutputTensors[i]->shape();
info.type = mOutputTensors[i]->getType();
auto format = TensorUtils::getDescribe(mOutputTensors[i])->dimensionFormat;
info.order = Express::NHWC;
if (format == MNN_DATA_FORMAT_NCHW) {
info.order = Express::NCHW;
} else if (format == MNN_DATA_FORMAT_NC4HW4) {
info.order = Express::NC4HW4;
}
outputs[mOutputFromTensor[i]] = Express::Variable::create(Express::Expr::create(std::move(info), mOutputTensors[i]->host<void>(), Express::VARP::CONSTANT, true), 0);
//::memcpy(outputs[i]->writeMap<void>(), mOutputTensors[i]->host<void>(), mOutputTensors[i]->size());
}
return outputs;
}
Module* StaticModule::clone(CloneContext* ctx) const {
StaticModule* module(new StaticModule);
module->mInputs = mInputs;
module->mOutputs = mOutputs;
module->mShapeFix = mShapeFix;
module->mOutputNumbers = mOutputNumbers;
module->mOutputFromInput = mOutputFromInput;
module->mOutputFromTensor = mOutputFromTensor;
if (mOutputFromTensor.empty()) {
return this->cloneBaseTo(ctx, module);
}
module->mNet = mNet;
auto rt = Express::ExecutorScope::Current()->getRuntime();
ScheduleConfig config;
config.numThread = 1;
config.type = rt.first.begin()->first;
config.saveTensors = mOutputs;
module->mSession = module->mNet->createSession(config, rt);
module->mInputTensors.resize(mInputs.size());
module->mOutputTensors.resize(mOutputFromTensor.size());
for (int i=0; i<mInputs.size(); ++i) {
module->mInputTensors[i] =
module->mNet->getSessionInput(module->mSession, mInputs[i].c_str());
}
for (int i=0; i<mOutputFromTensor.size(); ++i) {
module->mOutputTensors[i] = module->mNet->getSessionOutput(
module->mSession, mOutputs[mOutputFromTensor[i]].c_str());
}
return this->cloneBaseTo(ctx, module);
}
}
}

View File

@ -0,0 +1,44 @@
//
// StaticModule.hpp
// MNN
//
// Created by MNN on b'2020/09/10'.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef StaticModule_hpp
#define StaticModule_hpp
#include <MNN/expr/Module.hpp>
#include <MNN/Interpreter.hpp>
namespace MNN {
namespace Express {
class StaticModule : public Module {
public:
StaticModule(const void* buffer, size_t length, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool shapeFix = false);
virtual ~ StaticModule();
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
private:
StaticModule() = default;
Module* clone(CloneContext* ctx) const override;
std::vector<std::string> mInputs;
std::vector<std::string> mOutputs;
std::shared_ptr<Interpreter> mNet;
Session* mSession;
std::vector<Tensor*> mInputTensors;
std::vector<Tensor*> mOutputTensors;
bool mShapeFix;
int mOutputNumbers;
// First: outputIndex, Second: outputTensor Index
std::vector<int> mOutputFromTensor;
// First: outputIndex, Second: input var index
std::vector<std::pair<int, int>> mOutputFromInput;
};
}
}
#endif

View File

@ -0,0 +1,186 @@
//
// WhileModule.cpp
// MNN
//
// Created by MNN on b'2020/09/10'.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "WhileModule.hpp"
#include <MNN/expr/ExprCreator.hpp>
#include "MNN_generated.h"
//#define MNN_OPEN_TIME_TRACE
#include <MNN/AutoTime.hpp>
namespace MNN {
namespace Express {
static int _findPos(const std::vector<std::string>& names, const std::string& key) {
for (int i=0; i<names.size(); ++i) {
if (names[i] == key) {
return i;
}
}
return -1;
}
WhileModule* WhileModule::create(const Op* op, const std::map<std::string, SubGraph>& subGraph) {
auto module = new WhileModule;
auto whileParam = op->main_as_WhileParam();
auto& body = subGraph.find(whileParam->body_graph()->str())->second;
auto& cond = subGraph.find(whileParam->cond_graph()->str())->second;
module->mBody = body.m;
module->mCond = cond.m;
/** Compute map index
int mCondInputNumber;
int mBodyInputNumber;
// First mCondInputs' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForCond;
// First mBodyInputs' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForBody;
std::vector<int> mOutputFromBody;
std::vector<std::pair<int, int>> mUpdateForCond;
std::vector<std::pair<int, int>> mUpdateForBody;
std::vector<std::pair<int, int>> mCondUpdateForCond;
std::vector<std::pair<int, int>> mCondUpdateForBody;
*/
// Map Inputs
module->mBodyInputNumber = body.inputs.size();
module->mCondInputNumber = cond.inputs.size();
for (int i=0; i<whileParam->aliases_inputs()->size(); ++i) {
auto index = i;
auto data = whileParam->aliases_inputs()->GetAs<StringVec>(i);
for (int s=0; s<data->data()->size(); ++s) {
auto name = data->data()->GetAsString(s)->str();
auto bodyInputPos = _findPos(body.inputs, name);
if (bodyInputPos >= 0) {
module->mInputForBody.emplace_back(std::make_pair(bodyInputPos, i));
}
auto condInputPos = _findPos(cond.inputs, name);
if (condInputPos >= 0) {
module->mInputForCond.emplace_back(std::make_pair(condInputPos, i));
}
}
}
// Map update
auto update = whileParam->aliases_updates();
std::map<int, int> replaceOutputs;
for (int i=0; i<update->size(); ++i) {
auto data = update->GetAs<StringVec>(i);
int bodyInputPos = -1;
int condInputPos = -1;
int bodyOutputPos = -1;
int condOutputPos = -1;
MNN_ASSERT(2 == data->data()->size());
auto outputName = data->data()->GetAsString(0)->str();
auto inputName = data->data()->GetAsString(1)->str();
bodyInputPos = _findPos(body.inputs, inputName);
condInputPos = _findPos(cond.inputs, inputName);
bodyOutputPos = _findPos(body.outputs, outputName);
condOutputPos = _findPos(cond.outputs, outputName);
auto updateBodyOutputPos = _findPos(body.outputs, inputName);
MNN_ASSERT(bodyOutputPos == -1 || condOutputPos == -1);
if (condOutputPos >= 0) {
if (bodyInputPos >= 0) {
module->mCondUpdateForBody.emplace_back(std::make_pair(bodyInputPos, condOutputPos));
}
if (condInputPos >= 0) {
module->mCondUpdateForCond.emplace_back(std::make_pair(condInputPos, condOutputPos));
}
}
if (bodyOutputPos >= 0) {
if (bodyInputPos >= 0) {
module->mUpdateForBody.emplace_back(std::make_pair(bodyInputPos, bodyOutputPos));
}
if (condInputPos >= 0) {
module->mUpdateForCond.emplace_back(std::make_pair(condInputPos, bodyOutputPos));
}
if (updateBodyOutputPos >= 0) {
replaceOutputs.insert(std::make_pair(updateBodyOutputPos, bodyOutputPos));
}
}
}
// Map outputs
auto output = whileParam->aliases_outputs();
for (int i=0; i<output->size(); ++i) {
auto data = output->GetAsString(i);
auto pos = _findPos(body.outputs, data->str());
MNN_ASSERT(pos >= 0);
if (replaceOutputs.find(pos) != replaceOutputs.end()) {
pos = replaceOutputs[pos];
}
module->mOutputFromBody.emplace_back(pos);
}
return module;
}
std::vector<Express::VARP> WhileModule::onForward(const std::vector<Express::VARP>& inputsI) {
std::vector<Express::VARP> condInputs(mCondInputNumber);
std::vector<Express::VARP> bodyInputs(mBodyInputNumber);
auto& inputs = inputsI;
for (auto& p : mInputForCond) {
condInputs[p.first] = inputs[p.second];
}
for (auto& p : mInputForBody) {
bodyInputs[p.first] = inputs[p.second];
}
std::vector<Express::VARP> outputs(mOutputFromBody.size());
while (true) {
auto res = mCond->onForward(condInputs)[0];
auto resPtr = res->readMap<int>();
if (resPtr[0] <= 0) {
break;
}
auto bodyOutputs = mBody->onForward(bodyInputs);
Express::Variable::prepareCompute(bodyOutputs);
for (int i=0; i<bodyOutputs.size(); ++i) {
auto p = bodyOutputs[i];
if (p->expr().first->get() != nullptr) {
auto ptr = p->readMap<void>();
auto info = p->getInfo();
auto newV = Express::_Input(info->dim, info->order, info->type);
if (nullptr != ptr) {
::memcpy(newV->writeMap<void>(), ptr, info->type.bytes() * info->size);
}
bodyOutputs[i] = newV;
}
}
for (int i=0; i<mOutputFromBody.size(); ++i) {
outputs[i] = bodyOutputs[mOutputFromBody[i]];
}
for (auto& p : mUpdateForCond) {
condInputs[p.first] = bodyOutputs[p.second];
}
for (auto& p : mUpdateForBody) {
bodyInputs[p.first] = bodyOutputs[p.second];
}
for (auto& p : mCondUpdateForCond) {
condInputs[p.first] = res;
}
for (auto& p : mCondUpdateForBody) {
bodyInputs[p.first] = res;
}
}
return outputs;
}
Module* WhileModule::clone(CloneContext* ctx) const {
WhileModule* module(new WhileModule);
module->mCondInputNumber = mCondInputNumber;
module->mBodyInputNumber = mBodyInputNumber;
module->mInputForCond = mInputForCond;
module->mInputForBody = mInputForBody;
module->mOutputFromBody = mOutputFromBody;
module->mUpdateForCond = mUpdateForCond;
module->mUpdateForBody = mUpdateForBody;
module->mCondUpdateForCond = mCondUpdateForCond;
module->mCondUpdateForBody = mCondUpdateForBody;
module->mCond.reset(mCond->clone(ctx));
module->mBody.reset(mBody->clone(ctx));
return this->cloneBaseTo(ctx, module);
}
};
};

View File

@ -0,0 +1,46 @@
//
// WhileModule.hpp
// MNN
//
// Created by MNN on b'2020/09/10'.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef WhileModule_hpp
#define WhileModule_hpp
#include <MNN/expr/Module.hpp>
namespace MNN {
namespace Express {
class WhileModule : public Module {
public:
virtual ~ WhileModule() {
// Do nothing
}
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
static WhileModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
private:
WhileModule(){}
Module* clone(CloneContext* ctx) const override;
int mCondInputNumber;
int mBodyInputNumber;
// First mCondInputs' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForCond;
// First mBodyInputs' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForBody;
std::vector<int> mOutputFromBody;
std::vector<std::pair<int, int>> mUpdateForCond;
std::vector<std::pair<int, int>> mUpdateForBody;
std::vector<std::pair<int, int>> mCondUpdateForCond;
std::vector<std::pair<int, int>> mCondUpdateForBody;
std::shared_ptr<Module> mCond;
std::shared_ptr<Module> mBody;
};
}
}
#endif

View File

@ -11,6 +11,7 @@
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <MNN/ErrorCode.hpp>
#include <MNN/MNNForwardType.h>
@ -67,6 +68,7 @@ class Session;
struct Content;
class Tensor;
class Backend;
class Runtime;
class MNN_PUBLIC OperatorInfo {
struct Info;
@ -89,6 +91,7 @@ protected:
typedef std::function<bool(const std::vector<Tensor*>&, const std::string& /*opName*/)> TensorCallBack;
typedef std::function<bool(const std::vector<Tensor*>&, const OperatorInfo*)> TensorCallBackWithInfo;
typedef std::pair<std::map<MNNForwardType, std::shared_ptr<Runtime>>, std::shared_ptr<Runtime>> RuntimeInfo;
/** net data holder. multiple sessions could share same net. */
class MNN_PUBLIC Interpreter {
@ -108,7 +111,43 @@ public:
static Interpreter* createFromBuffer(const void* buffer, size_t size);
~Interpreter();
enum SessionMode {
/** About CallBack, Default Session_Debug*/
/** runSessionWithCallBack is allowed and can get internal op info*/
Session_Debug = 0,
/** runSessionWithCallBack is not valid and can't get any info of op in session*/
Session_Release = 1,
/** About input tenosr, Default Session_Input_Inside*/
/** The input tensor is alloced by session, input data after session resized*/
Session_Input_Inside = 2,
/** The input tensor is alloced by user, set input data before session resize*/
Session_Input_User = 3,
};
/**
* @brief The API shoud be called before create session.
* @param mode session mode
* @return void
*/
void setSessionMode(SessionMode mode);
/**
* @brief The API shoud be called before create session.
* If the cache exist, try to load cache from file.
* After createSession, try to save cache to file.
* @param cacheFile cache file name
* @param keySize the first `keySize` bytes used as the key to check if the `cacheFile` exists.
* @return void
*/
void setCacheFile(const char* cacheFile, size_t keySize = 128);
public:
/**
* @brief create runtimeInfo seperately with schedule config.
* @param config session schedule configs.
*/
static RuntimeInfo createRuntime(const std::vector<ScheduleConfig>& configs);
/**
* @brief create session with schedule config. created session will be managed in net.
* @param config session schedule config.
@ -116,6 +155,13 @@ public:
*/
Session* createSession(const ScheduleConfig& config);
/**
* @brief create session with schedule config and user-specified runtime.
* @param config session schedule config, runtime runtimeInfo used by the created session.
* @return created session if success, NULL otherwise.
*/
Session* createSession(const ScheduleConfig& config, const RuntimeInfo& runtime);
/**
* @brief create multi-path session with schedule configs. created session will be managed in net.
* @param configs session schedule configs.
@ -123,6 +169,14 @@ public:
*/
Session* createMultiPathSession(const std::vector<ScheduleConfig>& configs);
/**
* @brief create multi-path session with schedule configs and user-specified runtime.
created session will be managed in net.
* @param configs session schedule configs.
* @return created session if success, NULL otherwise.
*/
Session* createMultiPathSession(const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtime);
/**
* @brief release session.
* @param session given session.
@ -204,17 +258,39 @@ public:
*/
Tensor* getSessionOutput(const Session* session, const char* name);
enum SessionInfoCode {
/** memory session used in MB, float* */
MEMORY = 0,
/** float operation needed in session in M, float* */
FLOPS = 1,
/** Backends in session in M, int*, length >= the configs when create session */
BACKENDS = 2,
ALL
};
/**
* @brief get all input tensors.
* @brief get session info
* @param session given session.
* @return all input tensors mapped with name.
* @param code given info code.
* @param void* given info ptr, see SessionInfoCode for detail
* @return true if support the code, false otherwise.
*/
const std::map<std::string, Tensor*>& getSessionOutputAll(const Session* session) const;
bool getSesionInfo(const Session* session, SessionInfoCode code, void* ptr);
/**
* @brief get all output tensors.
* @param session given session.
* @return all output tensors mapped with name.
*/
const std::map<std::string, Tensor*>& getSessionOutputAll(const Session* session) const;
/**
* @brief get all input tensors.
* @param session given session.
* @return all input tensors mapped with name.
*/
const std::map<std::string, Tensor*>& getSessionInputAll(const Session* session) const;
public:

View File

@ -38,13 +38,7 @@
} \
}
#else
#define MNN_ASSERT(x) \
{ \
int res = (x); \
if (!res) { \
MNN_ERROR("Error for %d\n", __LINE__); \
} \
}
#define MNN_ASSERT(x)
#endif
#define FUNC_PRINT(x) MNN_PRINT(#x "=%d in %s, %d \n", x, __func__, __LINE__);

View File

@ -23,8 +23,8 @@ typedef enum {
/*Hand write metal*/
MNN_FORWARD_METAL = 1,
/*Use IOS's MPS instead of hand-write metal, Not Support yet*/
MNN_FORWARD_MPS = 2,
/*NVIDIA GPU API*/
MNN_FORWARD_CUDA = 2,
/*Android / Common Device GPU API*/
MNN_FORWARD_OPENCL = 3,

View File

@ -12,6 +12,7 @@
#include <vector>
#include <MNN/HalideRuntime.h>
#include <MNN/MNNDefine.h>
#define MNN_MAX_TENSOR_DIM 6
namespace MNN {

View File

@ -10,6 +10,7 @@
#include <MNN/ErrorCode.hpp>
#include <MNN/expr/Expr.hpp>
#include <MNN/Tensor.hpp>
#include <MNN/Interpreter.hpp>
#include <vector>
#include <mutex>
#include <set>
@ -17,41 +18,19 @@
namespace MNN {
class Backend;
class Execution;
class Runtime;
struct Op;
namespace Express {
class MNN_PUBLIC Executor {
public:
class ComputeCache {
public:
void setShapeDirty(int offset, Variable::Info* info);
void setContentDirty();
void setContentReady();
void syncInput(int offset, const Variable::Info* info);
void syncOutput(int offset, Variable::Info* info);
struct TensorContent {
std::shared_ptr<Tensor> tensor;
int refCount = 0;
void reset();
bool aliveOutside = false;
};
class ComputeCache;
struct Unit;
virtual ~ ComputeCache() {}
ComputeCache() {}
virtual ErrorCode compute() = 0;
virtual ErrorCode resize() = 0;
protected:
// Get the index tensor with the need of needBackend
// If the Tensor don't belong to the backend, need use needBackend to alloc it and return
virtual Tensor* getTensor(int index, bool host) = 0;
void _setShapeDirty();
friend class Executor;
bool mContentDirty = true;
bool mShapeDirty = true;
};
static void setShapeDirty(ComputeCache* cache);
static void setContentDirty(ComputeCache* cache);
static void* mapOutput(ComputeCache* cache, int offset, Tensor* dest);
struct Requirement {
std::vector<bool> contentNeedContent;
std::vector<bool> shapeNeedContent;
std::vector<bool> supportError;
};
~Executor();
Requirement getRequirement(Expr* expr) const;
@ -65,25 +44,27 @@ public:
};
void gc(GCFlag flag = FULL);
static std::shared_ptr<Executor> getGlobalExecutor();
static std::shared_ptr<Executor> newExecutor(MNNForwardType type,
const BackendConfig& config,
int numberThread);
void resetProfile();
void dumpProfile();
void addOpCostTime(int op, float costTime);
void addOpCostTime(const std::string& type, float costTime);
void addOpFlops(const std::string& type, float flops);
class Profiler;
static RuntimeInfo getRuntime();
private:
void _createSingle(EXPRP expr);
void _create(const std::vector<EXPRP>& outputs, std::set<std::shared_ptr<Executor::ComputeCache>>&& inputCaches, std::vector<ComputeCache::TensorContent>&& tensors, bool forceCPU);
void _makeCache(const std::vector<EXPRP>& outputs, bool forceCPU);
void _create(const std::vector<EXPRP>& outputs, std::set<std::shared_ptr<Executor::ComputeCache>>&& inputCaches, std::set<std::shared_ptr<Expr::Inside>>&& inputNode, bool forceCPU);
void _addToCache(const std::vector<std::shared_ptr<ComputeCache>>& caches);
void _resetCache();
void _visit(EXPRP expr, std::set<std::shared_ptr<Executor::ComputeCache>>& inputCaches, std::vector<ComputeCache::TensorContent>& tensors);
void _visit(EXPRP expr, std::set<std::shared_ptr<Executor::ComputeCache>>& inputCaches, std::set<std::shared_ptr<Expr::Inside>>& inputNode);
Executor(std::shared_ptr<Backend> backend);
std::shared_ptr<Backend> mBackend;
std::shared_ptr<Backend> mBackupBackend;
Executor(std::shared_ptr<Runtime> backend, MNNForwardType type);
std::pair<std::shared_ptr<Runtime>, MNNForwardType> mRuntime;
std::pair<std::shared_ptr<Runtime>, MNNForwardType> mBackupRuntime;
std::mutex mMutex;
std::vector<std::shared_ptr<Tensor>> mStack;
std::vector<Tensor*> mStackInputs;
std::vector<Tensor*> mStackOutputs;
std::shared_ptr<Profiler> mProfiler;
};
} // namespace Express

View File

@ -0,0 +1,33 @@
//
// ExecutorScope.hpp
// MNN
//
// Created by MNN on 2020/10/26.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef MNN_EXPR_EXECUTOR_SCOPE_HPP_
#define MNN_EXPR_EXECUTOR_SCOPE_HPP_
#include <MNN/expr/Executor.hpp>
namespace MNN {
namespace Express {
struct ExecutorScope final {
public:
ExecutorScope() = delete;
explicit ExecutorScope(const ExecutorScope&) = delete;
explicit ExecutorScope(const std::shared_ptr<Executor>& current);
explicit ExecutorScope(const std::string& scope_name,
const std::shared_ptr<Executor>& current);
virtual ~ExecutorScope();
static const std::shared_ptr<Executor> Current();
};
} // namespace MNN
} // namespace Express
#endif // MNN_EXPR_EXECUTOR_SCOPE_HPP_

View File

@ -87,6 +87,7 @@ public:
};
bool fix(InputType type) const;
private:
friend class Variable;
std::shared_ptr<Variable> mContent;
};
inline bool operator==(Variable* src, VARP dst) {
@ -107,7 +108,6 @@ public:
INTS dim;
halide_type_t type;
int size;
void* ptr = nullptr;
void syncSize();
};
const std::string& name() const;
@ -173,7 +173,7 @@ private:
class MNN_PUBLIC Expr {
public:
struct Inside;
static EXPRP create(Variable::Info&& info);
static EXPRP create(Variable::Info&& info, const void* ptr, VARP::InputType type, bool copy = true);
static EXPRP create(const OpT* op, std::vector<VARP> inputs, int outputSize = 1);
static EXPRP create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize = 1);
static EXPRP create(std::unique_ptr<OpT>&& op, std::vector<VARP> inputs, int outputSize = 1) {
@ -188,7 +188,7 @@ public:
return mInputs;
}
int outputSize() const {
return mOutputNames.size();
return (int)mOutputNames.size();
}
static void replace(EXPRP oldExpr, EXPRP newExpr);
bool requireInfo();

View File

@ -8,9 +8,14 @@
#ifndef MNN_Train_Module_hpp
#define MNN_Train_Module_hpp
#include <vector>
#include <unordered_map>
#include <MNN/expr/Expr.hpp>
namespace MNN {
namespace Train {
namespace Express {
class MNN_PUBLIC Module {
public:
Module() = default;
@ -21,9 +26,6 @@ public:
bool loadParameters(const std::vector<Express::VARP>& parameters);
void setIsTraining(const bool isTraining);
bool getIsTraining();
static std::shared_ptr<Module> transform(const std::vector<Express::VARP>& inputs,
const std::vector<Express::VARP>& outputs);
void clearCache();
const std::string& name() const {
@ -38,12 +40,45 @@ public:
void setType(std::string type) {
mType = std::move(type);
}
// Return the parameter index
int addParameter(Express::VARP parameter);
void setParameter(Express::VARP parameter, int index);
static Module* createEmpty(const std::vector<Express::VARP>& parameters);
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic = false);
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const char* fileName, bool dynamic = false);
static Module* clone(const Module* module, const bool shareParams = false);
class CloneContext {
public:
CloneContext() = default;
explicit CloneContext(const bool shareParams)
: mShareParams(shareParams) {}
virtual ~CloneContext() = default;
const bool shareParams() const { return mShareParams; }
EXPRP getOrClone(const EXPRP expr);
VARP getOrClone(const VARP var);
private:
bool mShareParams = false;
std::unordered_map<const Expr*, EXPRP> mExprMap;
std::unordered_map<const Variable*, VARP> mVarMap;
};
virtual Module* clone(CloneContext* ctx) const {
return nullptr;
}
protected:
void registerModel(const std::vector<std::shared_ptr<Module>>& children);
void addParameter(Express::VARP parameter);
virtual void onClearCache() {
}
Module* cloneBaseTo(CloneContext* ctx, Module* module) const;
private:
void _collectParameters(std::vector<Express::VARP>& result) const;
std::vector<std::shared_ptr<Module>> mChildren;
@ -52,6 +87,13 @@ private:
std::string mName;
std::string mType;
};
struct SubGraph {
std::vector<std::string> inputs;
std::vector<std::string> outputs;
std::shared_ptr<Module> m;
};
} // namespace Train
} // namespace MNN

View File

@ -9,11 +9,10 @@
#ifndef MNN_Train_NN_hpp
#define MNN_Train_NN_hpp
#include <MNN/expr/ExprCreator.hpp>
#include "Distributions.hpp"
#include "Module.hpp"
#include <MNN/expr/Module.hpp>
#include <vector>
namespace MNN {
namespace Train {
namespace Express {
class Initializer;
class MNN_PUBLIC NN {
@ -29,7 +28,7 @@ public:
};
enum FeatureScaleStatMethod {
PerTensor = 0,
PerChannel = 1
PerChannel = 1 // Depercerate
};
/* Unlike enum in class, class in class need be dllimport or dllexport explcility.
Compiling in other system will not be affected.
@ -86,7 +85,7 @@ public:
static ConvParameters ExtractConvolution(Express::EXPRP expr);
// Extract BatchNormal and Dropout
static Module* ExtractNotRunableOp(Express::EXPRP expr);
static Module* ExtractNotRunableOp(Express::EXPRP expr, const std::map<std::string, SubGraph>& subgraphs);
};
};

View File

@ -31,25 +31,30 @@ MNN_PUBLIC VARP _Const(const void* ptr, INTS shape = {}, Dimensionformat format
MNN_PUBLIC VARP _TrainableParam(float value, INTS dims, Dimensionformat format);
MNN_PUBLIC VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format,
halide_type_t type = halide_type_of<float>());
MNN_PUBLIC VARP _InnerProduct(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS outputShape);
MNN_PUBLIC VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1},
INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
MNN_PUBLIC VARP _Conv(float weight, float bias, VARP x, INTS channel, INTS kernelSize, PaddingMode pad = VALID,
INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1);
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false, int nbits = 8);
MNN_PUBLIC VARP _Conv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
MNN_PUBLIC VARP _Deconv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1},
INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
MNN_PUBLIC VARP _Deconv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
MNN_PUBLIC VARP _MaxPool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0});
MNN_PUBLIC VARP _AvePool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0});
MNN_PUBLIC VARP _Reshape(VARP x, INTS shape, Dimensionformat original_format = NHWC);
MNN_PUBLIC VARP _Reshape(VARP x, INTS shape, Dimensionformat original_format = NCHW);
MNN_PUBLIC VARP _Reshape(VARP x, VARP shape);
MNN_PUBLIC VARP _Scale(VARP x, int channels, std::vector<float>&& scales, std::vector<float>&& bias);
MNN_PUBLIC VARP _Relu(VARP x, float slope = 0.0f);
MNN_PUBLIC VARP _Relu6(VARP x);
MNN_PUBLIC VARP _Relu6(VARP x, float minValue = 0.0f, float maxValue = 6.0f);
MNN_PUBLIC VARP _PRelu(VARP x, std::vector<float> &&slopes);
MNN_PUBLIC VARP _Softmax(VARP logits, int axis = -1);
MNN_PUBLIC VARP _Softplus(VARP features);
@ -76,7 +81,7 @@ MNN_PUBLIC VARP _Pad(VARP x, VARP paddings, PadValueMode mode = CONSTANT);
MNN_PUBLIC VARP _ExpandDims(VARP input, int axis);
MNN_PUBLIC VARP _ExpandDims(VARP input, VARP axis);
MNN_PUBLIC VARP _Shape(VARP input);
MNN_PUBLIC VARP _Shape(VARP input, bool nchw = false);
MNN_PUBLIC VARP _Stack(VARPS values, int axis=0);
enum InterpolationMethod {BILINEAR, NEAREST};
MNN_PUBLIC VARP _CropAndResize(VARP image, VARP boxes, VARP box_ind, VARP crop_size,
@ -92,6 +97,7 @@ MNN_PUBLIC VARP _GatherND(VARP params, VARP indices);
MNN_PUBLIC VARP _Selu(VARP features, float scale, float alpha);
MNN_PUBLIC VARP _Size(VARP input);
MNN_PUBLIC VARP _Elu(VARP features, float alpha=1.0);
MNN_PUBLIC VARP _Threshold(VARP features, float alpha=1.0);
MNN_PUBLIC VARP _MatrixBandPart(VARP input, VARP num_lower, VARP num_upper);
MNN_PUBLIC std::vector<VARP> _Moments(VARP x, INTS axis, VARP shift, bool keepDims);
MNN_PUBLIC VARP _SetDiff1D(VARP x, VARP y);
@ -123,7 +129,8 @@ MNN_PUBLIC VARP _ZeroGrad(VARP x);
// Int8 Inference
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu);
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, int nbits = 8);
MNN_PUBLIC VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim);
MNN_PUBLIC VARP _FloatToInt8(VARP x, VARP scale, char minValue, char maxValue);
MNN_PUBLIC VARP _Int8ToFloat(VARP x, VARP scale);

102
include/MNN/expr/Scope.hpp Normal file
View File

@ -0,0 +1,102 @@
//
// RuntimeScope.hpp
// MNN
//
// Created by MNN on 2020/10/26.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef MNN_EXPR_SCOPE_HPP_
#define MNN_EXPR_SCOPE_HPP_
#include <cstdio>
#include <vector>
#include <string>
#include <mutex>
#include <MNN/Interpreter.hpp>
namespace MNN {
namespace Express {
template <typename T>
class Scope {
public:
Scope();
virtual ~Scope() = default;
struct ScopedContent {
std::string scope_name;
T content;
};
void EnterScope(const ScopedContent& current);
void EnterScope(const T& current);
void EnterScope(const std::string& scope_name, const T& current);
void ExitScope();
const ScopedContent& Current() const;
int ScopedLevel() const { return scoped_level_; }
private:
std::string MakeScopeName(const std::string& prefix, int level) const;
mutable std::mutex mutex_;
int scoped_level_ = 0;
std::vector<ScopedContent> scoped_contents_;
};
template <typename T>
Scope<T>::Scope() : scoped_level_(0) {
}
template <typename T>
void Scope<T>::EnterScope(const ScopedContent& current) {
std::lock_guard<std::mutex> lock(mutex_);
++scoped_level_;
scoped_contents_.push_back(current);
}
template <typename T>
void Scope<T>::EnterScope(const T& current) {
EnterScope("scope", current);
}
template <typename T>
void Scope<T>::EnterScope(const std::string& scope_name,
const T& current) {
std::lock_guard<std::mutex> lock(mutex_);
int scoped_level = ScopedLevel();
std::string name = MakeScopeName(scope_name, scoped_level++);
ScopedContent content{name, current};
++scoped_level_;
scoped_contents_.push_back(content);
}
template <typename T>
void Scope<T>::ExitScope() {
std::lock_guard<std::mutex> lock(mutex_);
--scoped_level_;
scoped_contents_.resize(scoped_level_);
}
template <typename T>
const typename Scope<T>::ScopedContent& Scope<T>::Current() const {
std::lock_guard<std::mutex> lock(mutex_);
MNN_CHECK(scoped_contents_.size() > 0, "Scope level should not be 0.");
return scoped_contents_.back();
}
template <typename T>
std::string Scope<T>::MakeScopeName(const std::string& prefix,
int level) const {
char s[16];
snprintf(s, 16, "%d", level);
return prefix + "/" + std::string(s);
}
} // namespace Express
} // namespace MNN
#endif // MNN_EXPR_SCOPE_HPP_

View File

@ -1,12 +1,14 @@
# MNN_Windows
# |------- MNN_Windows_lib
# |---------- Dynamic_Library
# |---------- Static_Library
# |------- MNN_Windows_tools
# MNN
# |-- Debug
# | |--- MD
# | |--- MT
# |-- Release
# |--- MD
# |--- MT
$erroractionpreference = "stop"
Set-Variable -Name WINDOWS_PACKAGE_NAME -Value "MNN_Windows"
Set-Variable -Name WINDOWS_PACKAGE_NAME -Value "MNN"
#clear and create package directory
powershell ./schema/generate.ps1
@ -14,32 +16,50 @@ Set-Variable -Name WINDOWS_PACKAGE_PATH -Value "$(pwd)\$WINDOWS_PACKAGE_NAME"
Remove-Item $WINDOWS_PACKAGE_PATH -Recurse -ErrorAction Ignore
mkdir $WINDOWS_PACKAGE_PATH\
cd $WINDOWS_PACKAGE_PATH
mkdir -p MNN_Windows_lib\Dynamic_Library
mkdir -p MNN_Windows_lib\Static_Library
mkdir MNN_Windows_tools
mkdir -p Debug\MD
mkdir -p Debug\MT
mkdir -p Release\MD
mkdir -p Release\MT
cd ..
Remove-Item build -Recurse -ErrorAction Ignore
mkdir build
cd build
pushd build
# tools without dependency, static library without sep_build
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_BUILD_CONVERTER=ON -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_TRAIN=ON -DMNN_BUILD_DEMO=ON -DMNN_BUILD_QUANTOOLS=ON -DMNN_EVALUATION=ON ..
#cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_BUILD_CONVERTER=ON -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_TRAIN=ON -DMNN_BUILD_DEMO=ON -DMNN_BUILD_QUANTOOLS=ON -DMNN_EVALUATION=ON ..
#ninja
#pushd $WINDOWS_PACKAGE_PATH
#cp ..\build\*.exe MNN_Windows_tools
#cp ..\build\*.pdb MNN_Windows_tools
#cp ..\build\MNN.lib MNN_Windows_lib\Static_Library
#popd
Remove-Item CMakeCache.txt -ErrorAction Ignore
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Debug -DMNN_WIN_RUNTIME_MT=ON -DMNN_OPENCL=ON ..
ninja
pushd $WINDOWS_PACKAGE_PATH
cp ..\build\*.exe MNN_Windows_tools
cp ..\build\*.pdb MNN_Windows_tools
cp ..\build\MNN.lib MNN_Windows_lib\Static_Library
cp MNN.lib $WINDOWS_PACKAGE_PATH\Debug\MT
cp MNN.dll $WINDOWS_PACKAGE_PATH\Debug\MT
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Debug\MT
Remove-Item CMakeCache.txt -ErrorAction Ignore
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Debug -DMNN_WIN_RUNTIME_MT=OFF -DMNN_OPENCL=ON ..
ninja
cp MNN.lib $WINDOWS_PACKAGE_PATH\Debug\MD
cp MNN.dll $WINDOWS_PACKAGE_PATH\Debug\MD
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Debug\MD
Remove-Item CMakeCache.txt -ErrorAction Ignore
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Release -DMNN_WIN_RUNTIME_MT=ON -DMNN_OPENCL=ON ..
ninja
cp MNN.lib $WINDOWS_PACKAGE_PATH\Release\MT
cp MNN.dll $WINDOWS_PACKAGE_PATH\Release\MT
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Release\MT
Remove-Item CMakeCache.txt -ErrorAction Ignore
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Release -DMNN_WIN_RUNTIME_MT=OFF -DMNN_OPENCL=ON ..
ninja
cp MNN.lib $WINDOWS_PACKAGE_PATH\Release\MD
cp MNN.dll $WINDOWS_PACKAGE_PATH\Release\MD
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Release\MD
popd
#dynamic library without sep_build
rm .\CMakeCache.txt
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF ..
ninja
cd $WINDOWS_PACKAGE_PATH
cp ..\build\MNN.lib MNN_Windows_lib\Dynamic_Library
cp ..\build\MNN.dll MNN_Windows_lib\Dynamic_Library
cp ..\build\MNN.pdb MNN_Windows_lib\Dynamic_Library
# Compress MNN_Windows_lib and MNN_Windows_tools
Compress-Archive -Path MNN_Windows_lib -DestinationPath MNN_Windows_lib.zip -Update -CompressionLevel Optimal
Compress-Archive -Path MNN_Windows_tools -DestinationPath MNN_Windows_tools.zip -Update -CompressionLevel Optimal

View File

@ -8,15 +8,14 @@ set_target_properties(
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN.so
)
add_library( MNN_Arm82 SHARED IMPORTED GLOBAL)
set_target_properties(
MNN_Arm82
PROPERTIES IMPORTED_LOCATION
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_Arm82.so
)
add_library( MNN_CL SHARED IMPORTED GLOBAL )
set_target_properties( MNN_CL
PROPERTIES IMPORTED_LOCATION
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_CL.so
)
add_library( MNN_Express SHARED IMPORTED GLOBAL )
set_target_properties( MNN_Express
PROPERTIES IMPORTED_LOCATION
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_Express.so
)

View File

@ -5,7 +5,6 @@ adb push ./libMNN_CL.so /data/local/tmp/MNN/libMNN_CL.so
adb push ./libMNN_Vulkan.so /data/local/tmp/MNN/libMNN_Vulkan.so
adb push ./libMNN_GL.so /data/local/tmp/MNN/libMNN_GL.so
adb push ./libMNN_Express.so /data/local/tmp/MNN/libMNN_Express.so
adb push ./libMNN_Arm82.so /data/local/tmp/MNN/libMNN_Arm82.so
adb push ./MNNV2Basic.out /data/local/tmp/MNN/MNNV2Basic.out
adb shell "cd /data/local/tmp/MNN && rm -r output"
adb shell "cd /data/local/tmp/MNN && mkdir output"
@ -18,3 +17,4 @@ adb push ./timeProfile.out /data/local/tmp/MNN/timeProfile.out
adb push ./train.out /data/local/tmp/MNN/train.out
adb push ./benchmark.out /data/local/tmp/MNN/benchmark.out
adb push ./benchmarkExprModels.out /data/local/tmp/MNN/benchmarkExprModels.out
adb push ./run_test.out /data/local/tmp/MNN/run_test.out

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,8 @@
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>$(DEVELOPMENT_LANGUAGE)</string>
<key>CFBundleIdentifier</key>
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>

View File

@ -1,57 +0,0 @@
#!bin/sh
echo "Register Op Begin"
function read_dir(){
str1=`grep -e $2 $1/*.$4|sed s/[[:space:]]//g`
array=(${str1//\;/ })
for var in ${array[@]}; do
`echo $var|awk -F $3 '{
a="___";
b="__();";
c="extern void ";
print(c""a""$3"__"$4""b) >> "extern";
print (a""$3"__"$4""b) >> "call"
}'`
done
}
start=$(date +%s)
SEP='[:(,)]'
FILE_EXTERN_CPP='cpp'
FILE_EXTERN_MM='mm'
SHELL_FOLDER=$(dirname $0)'/../../..'
# handle CPU
CPUFILE=$SHELL_FOLDER/source/backend/cpu/CPUOPRegister.cpp
echo "// This file is generated by Shell for ops register\nnamespace MNN {\n#ifdef MNN_CODEGEN_REGISTER" > $CPUFILE
echo "Start Register CPU"
CPU=$SHELL_FOLDER/source/backend/cpu
CPU_KEY='REGISTER_CPU_OP_CREATOR'
read_dir $CPU $CPU_KEY $SEP $FILE_EXTERN_CPP
cat extern >> $CPUFILE
rm extern
echo '\nvoid registerCPUOps() {' >> $CPUFILE
cat call >> $CPUFILE
echo '}\n#endif\n}' >> $CPUFILE
rm call
# handle Shape
echo "Start Register Shape"
SHAPEFILE=$SHELL_FOLDER/source/shape/ShapeRegister.cpp
SHAPE=$SHELL_FOLDER/source/shape
SHAPE_KEY="REGISTER_SHAPE"
echo "// This file is generated by Shell for ops register\nnamespace MNN {\n#ifdef MNN_CODEGEN_REGISTER" > $SHAPEFILE
read_dir $SHAPE $SHAPE_KEY $SEP $FILE_EXTERN_CPP
cat extern >> $SHAPEFILE
rm extern
echo '\nvoid registerShapeOps() {' >> $SHAPEFILE
cat call >> $SHAPEFILE
echo '}\n#endif\n}' >> $SHAPEFILE
rm call
echo "Register Op End"
dur=$(echo "$(date +%s) - $start" | bc)
printf "Execution time: %.6f seconds" $dur

View File

@ -8,10 +8,14 @@
#import "AppDelegate.h"
#import "MNNTestSuite.h"
#import <MNN/expr/Executor.hpp>
@implementation AppDelegate
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
MNN::BackendConfig config;
// If want to test metal, change MNN_FORWARD_CPU to MNN_FORWARD_METAL
MNN::Express::Executor::getGlobalExecutor()->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 1);
MNNTestSuite::runAll();
return YES;
}

View File

@ -8,6 +8,9 @@ import cv2
def inference():
""" inference mobilenet_v1 using a specific picture """
interpreter = MNN.Interpreter("mobilenet_v1.mnn")
interpreter.setCacheFile('.tempcache')
config = {}
config['precision'] = 'low'
session = interpreter.createSession()
input_tensor = interpreter.getSessionInput(session)
image = cv2.imread('ILSVRC2012_val_00049999.JPEG')

View File

@ -96,8 +96,7 @@ def demo():
train_dataloader = MNN.data.DataLoader(train_dataset, batch_size = 64, shuffle = True)
test_dataloader = MNN.data.DataLoader(test_dataset, batch_size = 100, shuffle = False)
opt = MNN.optim.SGD(0.01, 0.9, 0.0005)
opt.append(model.parameters)
opt = MNN.optim.SGD(model, 0.01, 0.9, 0.0005)
F.set_thread_number(4)

View File

@ -125,8 +125,7 @@ def demo():
net = Net(feature_extractor, num_classes)
opt = MNN.optim.SGD(1e-3, 0.9, 0.00004)
opt.append(net.parameters)
opt = MNN.optim.SGD(net, 1e-3, 0.9, 0.00004)
for epoch in range(10):
train_func(net, train_dataloader, opt, num_classes)

View File

@ -0,0 +1,15 @@
import numpy as np
import MNN
nn = MNN.nn
F = MNN.expr
v0 = F.const([0.3,0.1, -0.3,0.4], [4])
v2 = F.const([0.3,0.1, -0.3,0.4], [4])
v1 = v0 * v0
outputDiff = F.const([0.05, 0.03, 0.02, 0.01], [4])
v0Grad = nn.grad(v1, [v0, v2], [outputDiff], "")
print(v0Grad)
print(v0Grad[0].read())
F.save(v0Grad, "temp.grad")

View File

@ -0,0 +1,36 @@
import numpy as np
import MNN
nn = MNN.nn
F = MNN.expr
class Net(nn.Module):
"""construct a lenet 5 model"""
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.conv(1, 20, [5, 5])
self.conv2 = nn.conv(20, 50, [5, 5])
self.fc1 = nn.linear(800, 500)
self.fc2 = nn.linear(500, 10)
self.step = F.const([10], [], F.NCHW, F.int)
self.lr = F.const([0.0004],[], F.NCHW, F.float)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool(x, [2, 2], [2, 2])
x = F.relu(self.conv2(x))
x = F.max_pool(x, [2, 2], [2, 2])
x = F.reshape(x, [0, -1])
x = F.relu(self.fc1(x))
x = self.fc2(x)
x = F.softmax(x, 1)
return x
model = Net()
F.save(model.parameters, 'mnist.snapshot')
model2 = Net()
model2.load_parameters(F.load_as_list('mnist.snapshot'))
print(model2.lr.read())
print(model2.step.read())

Some files were not shown because too many files have changed in this diff Show More