Github release 1.1.0

This commit is contained in:
Hui Shu 2020-11-05 16:41:56 +08:00
parent 939a80dba8
commit d6795ad031
1296 changed files with 98954 additions and 55065 deletions

8
.gitignore vendored
View File

@ -330,7 +330,6 @@ project/android/.idea/caches/build_file_checksums.ser
# FIXME(haijing): Xcode pre-build stage breaks compilation of flatbuffers by setting envs that do cmake cross-compilation for iOS
# schema/current
schema/private
schema/current
tools/converter/source/IR
benchmark/benchmark.txt
@ -345,18 +344,13 @@ pymnn/android/.idea/modules.xml
pymnn/android/.idea/runConfigurations.xml
pymnn/android/.idea/vcs.xml
pymnn/android/.idea/caches/build_file_checksums.ser
pymnn/src/pybind_private/
buildios
build*/
include/MNN/VCS.h
source/backend/opencl/execution/cl/codegen/opencl_program.cc
source/backend/opencl/execution/cl/opencl_program.cc
# FIXME(haijing): MTL issues.....
# source/backend/metal/MetalOPRegister.mm
source/backend/opengl/AllShader.cpp
include/MNN/backend/opengl/shaders/AllShader.h
source/backend/vulkan/compiler/AllShader.cpp
include/MNN/backend/vulkan/shaders/AllShader.h
.idea
project/ios/ios_64
project/ios/ios_32

View File

@ -49,6 +49,7 @@ include(FindPythonInterp REQUIRED)
option(MNN_USE_SYSTEM_LIB "For opencl and vulkan, use system lib or use dlopen" OFF)
option(MNN_BUILD_HARD "Build -mfloat-abi=hard or not" OFF)
option(MNN_BUILD_SHARED_LIBS "MNN build shared or static lib" ON)
option(MNN_WIN_RUNTIME_MT "MNN use /MT on Windows dll" OFF)
option(MNN_FORBID_MULTI_THREAD "Disable Multi Thread" OFF)
option(MNN_OPENMP "Use OpenMP's thread pool implementation. Does not work on iOS or Mac OS" OFF)
option(MNN_USE_THREAD_POOL "Use MNN's own thread pool implementation" ON)
@ -62,14 +63,14 @@ option(MNN_SUPPORT_TFLITE_QUAN "Enable MNN's tflite quantized op" ON)
option(MNN_DEBUG_MEMORY "MNN Debug Memory Access" OFF)
option(MNN_DEBUG_TENSOR_SIZE "Enable Tensor Size" OFF)
option(MNN_GPU_TRACE "Enable MNN Gpu Debug" OFF)
option(MNN_OPENCL_LWS_TUNE "Enable MNN OpenCL Lws Tuning" ON)
option(MNN_PORTABLE_BUILD "Link the static version of third party libraries where possible to improve the portability of built executables" OFF)
option(MNN_SEP_BUILD "Build MNN Backends and expression seperately. Only works with MNN_BUILD_SHARED_LIBS=ON" ON)
option(NATIVE_LIBRARY_OUTPUT "Native Library Path" OFF)
option(NATIVE_INCLUDE_OUTPUT "Native Include Path" OFF)
option(MNN_AAPL_FMWK "Build MNN.framework instead of traditional .a/.dylib" OFF)
option(MNN_FMA_ENABLE "x86 routine use fma extension" OFF)
option(MNN_WITH_PLUGIN "Build with plugin op support." OFF)
option(MNN_BUILD_MINI "Build MNN-MINI that just supports fixed shape models." OFF)
option(MNN_USE_SSE "Use SSE optimization for x86 if possiable" ON)
IF(NOT MNN_BUILD_SHARED_LIBS)
message(WARNING "Close MNN_SEP_BUILD for static library")
@ -79,27 +80,29 @@ IF(APPLE AND MNN_AAPL_FMWK AND MNN_SEP_BUILD)
message(WARNING "MNN_SEP_BUILD AND MNN_AAPL_FMWK can't coexist. Turning off MNN_SEP_BUILD")
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
ENDIF()
IF(MSVC OR WIN32)
IF(WIN32)
IF(MNN_SEP_BUILD)
message(WARNING "MNN_SEP_BUILD IS TROUBLESOME ON Windows. Forcing OFF...")
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
ENDIF()
SET(MNN_USE_SYSTEM_LIB ON CACHE BOOL "<docstring>" FORCE)
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
# generate optimized (release) exe and library with pdb debug file, https://stackoverflow.com/a/31264946
SET(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
SET(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
IF(MSVC)
# generate optimized (release) exe and library with pdb debug file, https://stackoverflow.com/a/31264946
SET(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
SET(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
ENDIF()
ENDIF()
include(${CMAKE_CURRENT_LIST_DIR}/cmake/macros.cmake)
IF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT MNN_BUILD_SHARED_LIBS AND NOT (MSVC OR WIN32))
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
IF(MNN_BUILD_CONVERTER)
SET(MNN_PORTABLE_BUILD ON CACHE BOOL "<docstring>" FORCE)
@ -117,6 +120,9 @@ endif()
if(MNN_SUPPORT_TFLITE_QUAN)
add_definitions(-DMNN_SUPPORT_TFLITE_QUAN)
endif()
if(MNN_BUILD_MINI)
add_definitions(-DMNN_BUILD_MINI)
endif()
# debug options
if(MNN_DEBUG_MEMORY)
@ -128,9 +134,6 @@ endif()
if(MNN_GPU_TRACE)
add_definitions(-DMNN_GPU_FORCE_FINISH)
endif()
if(MNN_OPENCL_LWS_TUNE)
add_definitions(-DMNN_OPENCL_LWS_TUNE)
endif()
# backend options
option(MNN_METAL "Enable Metal" OFF)
@ -138,11 +141,8 @@ option(MNN_OPENCL "Enable OpenCL" OFF)
option(MNN_OPENGL "Enable OpenGL" OFF)
option(MNN_VULKAN "Enable Vulkan" OFF)
option(MNN_ARM82 "Enable ARM82" OFF)
# codegen register ops
if (MNN_METAL)
add_definitions(-DMNN_CODEGEN_REGISTER)
endif()
option(MNN_CUDA "Enable CUDA" OFF)
option(MNN_TENSORRT "Enable TensorRT" OFF)
# target options
option(MNN_BUILD_BENCHMARK "Build benchmark or not" OFF)
@ -165,11 +165,13 @@ message(STATUS "\tOpenCL: ${MNN_OPENCL}")
message(STATUS "\tOpenGL: ${MNN_OPENGL}")
message(STATUS "\tVulkan: ${MNN_VULKAN}")
message(STATUS "\tARM82: ${MNN_ARM82}")
message(STATUS "\tTensorRT: ${MNN_TENSORRT}")
message(STATUS "\tCUDA: ${MNN_CUDA}")
message(STATUS "\tOpenMP: ${MNN_OPENMP}")
message(STATUS "\tHidden: ${MNN_HIDDEN}")
message(STATUS "\tBuild Path: ${CMAKE_CURRENT_BINARY_DIR}")
if(WIN32)
if(MSVC)
if(${CMAKE_VERSION} VERSION_LESS "3.14.0")
message(FATAL_ERROR "MNN requires CMake 3.14+ to build on Windows!")
endif()
@ -178,14 +180,14 @@ if(WIN32)
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
if (MNN_BUILD_SHARED_LIBS)
if(${flag_var} MATCHES "/MT")
string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
endif()
else ()
if (MNN_WIN_RUNTIME_MT)
if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif()
else ()
if(${flag_var} MATCHES "/MT")
string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
endif()
endif ()
endforeach()
elseif(CMAKE_SYSTEM_NAME MATCHES "^Android" OR CMAKE_SYSTEM_NAME MATCHES "^Linux")
@ -270,6 +272,8 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "^Linux")
endif()
include_directories(${CMAKE_CURRENT_LIST_DIR}/include/
${CMAKE_CURRENT_LIST_DIR}/source/
${CMAKE_CURRENT_LIST_DIR}/express/
${CMAKE_CURRENT_LIST_DIR}/tools/
${CMAKE_CURRENT_LIST_DIR}/schema/current/
${CMAKE_CURRENT_LIST_DIR}/3rd_party/
${CMAKE_CURRENT_LIST_DIR}/3rd_party/flatbuffers/include
@ -293,12 +297,12 @@ FILE(GLOB MNN_CV_SRC ${CMAKE_CURRENT_LIST_DIR}/source/cv/*)
add_library(MNNCV OBJECT ${MNN_CV_SRC})
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCV>)
list(APPEND MNN_TARGETS MNNCV)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(X86_64)|(x64)|(X64)|(amd64)|(AMD64)|(i686)")
if(WIN32 OR MSVC)
target_compile_options(MNNCV PRIVATE /arch:AVX)
else()
target_compile_options(MNNCV PRIVATE -msse3)
target_compile_options(MNNCV PRIVATE -mavx)
if (MNN_USE_SSE)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(X86_64)|(x64)|(X64)|(amd64)|(AMD64)|(i686)")
if (NOT MSVC)
target_compile_options(MNNCV PRIVATE -msse3)
target_compile_options(MNNCV PRIVATE -mavx)
endif()
endif()
endif()
@ -308,11 +312,19 @@ add_library(MNNMath OBJECT ${MNN_Math_SRC})
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMath>)
list(APPEND MNN_TARGETS MNNMath)
# Shape
FILE(GLOB MNN_Shape_SRC ${CMAKE_CURRENT_LIST_DIR}/source/shape/*)
add_library(MNNShape OBJECT ${MNN_Shape_SRC})
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNShape>)
list(APPEND MNN_TARGETS MNNShape)
# Transform
FILE(GLOB MNN_Transform_SRC ${CMAKE_CURRENT_LIST_DIR}/source/shape/* ${CMAKE_CURRENT_LIST_DIR}/source/geometry/*)
add_library(MNNTransform OBJECT ${MNN_Transform_SRC})
IF (NOT MNN_BUILD_MINI)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNTransform>)
ENDIF()
list(APPEND MNN_TARGETS MNNTransform)
# Utils
FILE(GLOB MNN_Utils_SRC ${CMAKE_CURRENT_LIST_DIR}/source/utils/*)
add_library(MNNUtils OBJECT ${MNN_Utils_SRC})
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNUtils>)
list(APPEND MNN_TARGETS MNNUtils)
# Compute
FILE(GLOB MNN_Compute_SRC ${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/compute/*)
@ -327,7 +339,9 @@ list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCPU>)
list(APPEND MNN_TARGETS MNNCPU)
# X86_64 AVX/SSE
if (MNN_USE_SSE)
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/x86_x64/CMakeLists.txt)
endif()
# AArch32/64 Assemblies
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/arm/CMakeLists.txt)
@ -377,7 +391,7 @@ if (NOT APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
if (WIN32)
if (MSVC)
set(OpenMP_C_FLAGS "/openmp ${OpenMP_C_FLAGS}")
set(OpenMP_CXX_FLAGS "/openmp ${OpenMP_CXX_FLAGS}")
endif()
@ -387,20 +401,22 @@ endif()
set(CMAKE_CXX_FLAGS_ORIGIN ${CMAKE_CXX_FLAGS})
set(CMAKE_C_FLAGS_ORIGIN ${CMAKE_C_FLAGS})
if ((NOT (MSVC OR WIN32)) AND MNN_HIDDEN)
if ((NOT MSVC) AND MNN_HIDDEN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden -fvisibility=hidden")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
if (NOT APPLE)
# Omit frame pointer may cause difficult debug
if ((NOT APPLE) AND (NOT WIN32))
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer")
endif()
endif()
if (NOT (MSVC OR WIN32))
if (NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions ")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
endif()
# Metal
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/metal/CMakeLists.txt)
set(MNN_DEPS "")
set(MNN_EXTRA_DEPENDS "")
list(APPEND MNN_DEPS MNN)
# Plugin
@ -409,6 +425,14 @@ if(MNN_WITH_PLUGIN)
include(${CMAKE_CURRENT_LIST_DIR}/source/plugin/CMakeLists.txt)
endif()
# Metal
if(MNN_METAL AND APPLE)
add_definitions(-DMNN_METAL_ENABLED=1)
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/metal/CMakeLists.txt)
list(APPEND MNN_TARGETS MNNMetal)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMetal>)
endif()
# Vulkan
IF(MNN_VULKAN)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/vulkan/)
@ -446,22 +470,34 @@ IF(MNN_OPENGL)
ENDIF()
ENDIF()
# CUDA
IF(MNN_CUDA)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/cuda/)
list(APPEND MNN_TARGETS MNN_CUDA)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_CUDA>)
list(APPEND MNN_EXTRA_DEPENDS ${MNN_CUDA_LIBS})
ENDIF()
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR IOS_ARCH STREQUAL "arm64")
# ARM82 Assemblies
IF(MNN_ARM82)
add_definitions(-DENABLE_ARMV82)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/arm82/)
IF(MNN_SEP_BUILD)
list(APPEND MNN_DEPS MNN_Arm82)
ELSE()
list(APPEND MNN_TARGETS MNN_Arm82)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_Arm82>)
ENDIF()
list(APPEND MNN_TARGETS MNN_Arm82)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_Arm82>)
ENDIF()
ENDIF()
# Express
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/express/)
# TensorRT
IF(MNN_TENSORRT)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/tensorrt/)
list(APPEND MNN_TARGETS MNN_TRT)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_TRT>)
list(APPEND MNN_EXTRA_DEPENDS ${MNN_TRT_LIBS})
ENDIF()
IF(MNN_SEP_BUILD)
add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS})
target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS})
@ -471,7 +507,7 @@ ELSE()
list(APPEND MNN_TARGETS MNNExpress)
IF(MNN_BUILD_SHARED_LIBS)
add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS})
if (MSVC OR WIN32)
if (WIN32)
foreach(TARGET ${MNN_TARGETS})
target_compile_definitions(${TARGET} PRIVATE "-DBUILDING_MNN_DLL")
target_compile_definitions(${TARGET} INTERFACE "-DUSING_MNN_DLL")
@ -484,7 +520,7 @@ ELSE()
ENDIF()
target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS})
ENDIF()
if (MSVC OR WIN32)
if (MSVC)
target_link_options(MNN PRIVATE "/IGNORE:4049,4217")
endif()
@ -504,9 +540,11 @@ if(APPLE)
target_link_libraries(MNN PUBLIC ${FOUNDATION})
find_library(METAL Metal REQUIRED)
target_link_libraries(MNN PUBLIC ${METAL})
find_library(GRAPHIC CoreGraphics)
target_link_libraries(MNN PUBLIC ${GRAPHIC})
ENDIF()
endif()
add_dependencies(MNN MNNCore MNNCV MNNShape MNNMath MNNCompute MNNCPU GenVCSHDR)
add_dependencies(MNN MNNCore MNNCV MNNTransform MNNMath MNNCompute MNNCPU GenVCSHDR)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/tools/converter)
if(CMAKE_SYSTEM_NAME MATCHES "^Linux")
@ -532,12 +570,6 @@ if (NOT MNN_BUILD_SHARED_LIBS)
endif()
endif()
list(APPEND MNN_TARGETS MNN)
FOREACH(TARGET ${MNN_TARGETS})
IF((NOT MSVC) AND (NOT WIN32))
else()
target_compile_definitions(${TARGET} PRIVATE _CRT_SECURE_NO_WARNINGS)
endif()
ENDFOREACH()
list(REMOVE_ITEM MNN_TARGETS MNN)
IF(MNN_BUILD_DEMO)
include(${CMAKE_CURRENT_LIST_DIR}/demo/exec/CMakeLists.txt)

View File

@ -46,6 +46,7 @@ Pod::Spec.new do |s|
'schema/current/*.{h}',\
'3rd_party/flatbuffers/include/flatbuffers/*.{h}',\
'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/geometry/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
'source/shape/*.{h,c,m,mm,cc,hpp,cpp}',\
@ -58,4 +59,4 @@ Pod::Spec.new do |s|
s.pod_target_xcconfig = {'METAL_LIBRARY_FILE_BASE' => 'mnn', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include" "$(PODS_TARGET_SRCROOT)/3rd_party/flatbuffers/include" "$(PODS_TARGET_SRCROOT)/source" "$(PODS_TARGET_SRCROOT)/3rd_party/half"', 'GCC_PREPROCESSOR_DEFINITIONS' => '$(inherited) MNN_CODEGEN_REGISTER=1 MNN_SUPPORT_TFLITE_QUAN=1'}
s.user_target_xcconfig = { 'OTHER_LDFLAGS' => '-force_load $(BUILD_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)/MNN/libMNN.a', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include"' }
end
end

View File

@ -66,7 +66,7 @@ Interpreter由Engine和Backends构成。前者负责模型的加载、计算图
三群:
<img src="doc/DingTalkQR3.png" height="256"/>
<img src="doc/DingTalkQR23.png" height="256"/>
## License
Apache 2.0

View File

@ -0,0 +1,89 @@
//
// CPUBatchMatMul.cpp
// MNN
//
// Created by MNN on 2019/03/25.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "backend/cpu/CPUBatchMatMul.hpp"
#include "backend/cpu/CPUBackend.hpp"
#include "math/Matrix.hpp"
namespace MNN {
CPUBatchMatMul::CPUBatchMatMul(Backend* backend, bool adjX, bool adjY) : Execution(backend) {
mMatMul.reset(new CPUMatMul(backend, adjX, adjY, true));
}
ErrorCode CPUBatchMatMul::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
auto input0 = inputs[0];
auto input1 = inputs[1];
auto output = outputs[0];
// Fill output by zero if one of inputs is empty.
if (input0->elementSize() == 0 || input1->elementSize() == 0) {
return NO_ERROR;
}
auto dimensions = input0->dimensions();
mMatrixA.reset(Tensor::createDevice<float>({input0->length(input0->dimensions()-2), input0->length(input0->dimensions()-1)}));
mMatrixB.reset(Tensor::createDevice<float>({input1->length(input1->dimensions()-2), input1->length(input0->dimensions()-1)}));
mMatrixC.reset(Tensor::createDevice<float>({output->length(output->dimensions()-2), output->length(output->dimensions()-1)}));
mTempInputs = {mMatrixA.get(), mMatrixB.get()};
mTempOutputs = {mMatrixC.get()};
auto res = backend()->onAcquireBuffer(mMatrixA.get(), Backend::DYNAMIC);
res = res && backend()->onAcquireBuffer(mMatrixB.get(), Backend::DYNAMIC);
res = res && backend()->onAcquireBuffer(mMatrixC.get(), Backend::DYNAMIC);
if (!res) {
return OUT_OF_MEMORY;
}
int batch = 1;
for (int i = 0; i < dimensions - 2; ++i) {
batch *= input0->length(i);
}
mBatch = batch;
auto code = mMatMul->onResize(mTempInputs, mTempOutputs);
backend()->onReleaseBuffer(mMatrixA.get(), Backend::DYNAMIC);
backend()->onReleaseBuffer(mMatrixB.get(), Backend::DYNAMIC);
backend()->onReleaseBuffer(mMatrixC.get(), Backend::DYNAMIC);
return code;
}
ErrorCode CPUBatchMatMul::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
auto input0 = inputs[0];
auto input1 = inputs[1];
auto output = outputs[0];
// Fill output by zero if one of inputs is empty.
if (input0->elementSize() == 0 || input1->elementSize() == 0) {
::memset(output->host<float>(), 0, output->size());
return NO_ERROR;
}
const int dimensions = input0->dimensions();
MNN_ASSERT(dimensions >= 3);
const int input0Stride = input0->length(dimensions - 1) * input0->length(dimensions - 2);
const int input1Stride = input1->length(dimensions - 1) * input1->length(dimensions - 2);
const int outputStride = output->length(dimensions - 1) * output->length(dimensions - 2);
const auto input0Ptr = input0->host<float>();
const auto input1Ptr = input1->host<float>();
float* const outputPtr = output->host<float>();
for (int i = 0; i < mBatch; ++i) {
::memcpy(mMatrixA->host<float>(), input0Ptr + i * input0Stride, input0Stride * sizeof(float));
::memcpy(mMatrixB->host<float>(), input1Ptr + i * input1Stride, input1Stride * sizeof(float));
mMatMul->onExecute(mTempInputs, mTempOutputs);
::memcpy(outputPtr + i * outputStride, mMatrixC->host<float>(), outputStride * sizeof(float));
}
return NO_ERROR;
}
class CPUBatchMatMulCreator : public CPUBackend::Creator {
public:
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
const MNN::Op* op, Backend* backend) const override {
return new CPUBatchMatMul(backend, op->main_as_BatchMatMulParam()->adjX(), op->main_as_BatchMatMulParam()->adjY());
}
};
REGISTER_CPU_OP_CREATOR(CPUBatchMatMulCreator, OpType_BatchMatMul);
} // namespace MNN

View File

@ -0,0 +1,35 @@
//
// CPUBatchMatMul.hpp
// MNN
//
// Created by MNN on 2019/03/25.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef CPUBatchMatMul_hpp
#define CPUBatchMatMul_hpp
#include "backend/cpu/CPUMatMul.hpp"
namespace MNN {
class CPUBatchMatMul : public Execution {
public:
CPUBatchMatMul(Backend *backend, bool adjX, bool adjY);
virtual ~CPUBatchMatMul() = default;
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
private:
int mBatch;
std::shared_ptr<Execution> mMatMul;
std::vector<Tensor*> mTempInputs;
std::vector<Tensor*> mTempOutputs;
std::shared_ptr<Tensor> mMatrixA;
std::shared_ptr<Tensor> mMatrixB;
std::shared_ptr<Tensor> mMatrixC;
};
} // namespace MNN
#endif /* CPUBatchMatMul_hpp */

View File

@ -18,7 +18,6 @@
#include "backend/cpu/compute/ConvOpt.h"
#include "backend/cpu/CPUBackend.hpp"
#include "backend/cpu/compute/ConvolutionFloatFactory.h"
#include "math/Vec4.hpp"
#define MIN_CON_PLANESIZE 256

View File

@ -10,7 +10,9 @@
#include <math.h>
#include "backend/cpu/CPUBackend.hpp"
#include "core/Macro.h"
#include "math/Vec4.hpp"
#include "math/Vec.hpp"
using Vec4 = MNN::Math::Vec<float, 4>;
namespace MNN {
@ -39,12 +41,12 @@ ErrorCode CPUCosineSimilarity::onExecute(const std::vector<Tensor*>& inputs, con
const auto x1ChannelPtr = x1DataBatchPtr + j;
const auto x2ChannelPtr = x2DataBatchPtr + j;
Math::Vec4 innerProduct(.0f);
Math::Vec4 x1Square(.0f);
Math::Vec4 x2Square(.0f);
Vec4 innerProduct(.0f);
Vec4 x1Square(.0f);
Vec4 x2Square(.0f);
for (int c = 0; c < channel; ++c) {
Math::Vec4 x1Data = Math::Vec4::load(x1ChannelPtr + c * channleStride);
Math::Vec4 x2Data = Math::Vec4::load(x2ChannelPtr + c * channleStride);
Vec4 x1Data = Vec4::load(x1ChannelPtr + c * channleStride);
Vec4 x2Data = Vec4::load(x2ChannelPtr + c * channleStride);
auto x1Xx2 = x1Data * x2Data;
innerProduct = innerProduct + x1Xx2;
x1Square = x1Square + x1Data * x1Data;

View File

@ -12,8 +12,8 @@
#include "core/Concurrency.h"
#include "core/Macro.h"
#include "math/Vec4.hpp"
using MNN::Math::Vec4;
#include "math/Vec.hpp"
using Vec4 = MNN::Math::Vec<float, 4>;
namespace MNN {

View File

@ -21,7 +21,7 @@ public:
auto parameter = op->main_as_InnerProduct();
int outputCount = parameter->outputCount();
int srcCount = parameter->weight()->size() / outputCount;
mWeight.reset(CPUConvolution::reorderWeightSize(srcCount, outputCount, 1, 4));
mWeight.reset(CPUConvolution::reorderWeightSize(srcCount, outputCount, 1, 4, 4));
if (mWeight.get() == nullptr) {
mValid = false;
return;

View File

@ -180,6 +180,14 @@ ErrorCode CPULSTM::onResize(const std::vector<Tensor *> &inputs, const std::vect
::memcpy(mBiasC->host<float>(), mLSTM->bias()->float32s()->data(), mBiasC->size());
::memcpy(mWeightH->host<float>(), mLSTM->weightH()->float32s()->data(), mWeightH->size());
}
if (mGateHaveBias) {
// Merge bias
auto biasPtr = mBiasC->host<float>();
auto biasPtr2 = biasPtr + 4 * numUnits;
for (int i=0; i<4*numUnits; ++i) {
biasPtr[i] = biasPtr[i] + biasPtr2[i];
}
}
}
if (inputs.size() > 1) {
@ -260,16 +268,8 @@ ErrorCode CPULSTM::onExecute(const std::vector<Tensor *> &inputs, const std::vec
MNN_CONCURRENCY_END();
float* biasStartPtr = mBiasC->host<float>();
if(!mGateHaveBias){
biasStartPtr = nullptr;
}
mRetriveOutputFunction(mGates.host<float>(), biasStartPtr);
float* recurrenceBiasStartPtr = mBiasC->host<float>();
if(mGateHaveBias){
recurrenceBiasStartPtr += 4 * numUnits;
}
// tranform
const float *contData = nullptr;
if (inputs.size() > 1) {
@ -330,14 +330,11 @@ ErrorCode CPULSTM::onExecute(const std::vector<Tensor *> &inputs, const std::vec
}
// add bias
auto biasPtr = recurrenceBiasStartPtr + oc;
I = sigmoid(*biasPtr + I);
biasPtr = biasPtr + numUnits;
F = sigmoid(*biasPtr + F);
biasPtr = biasPtr + numUnits;
O = sigmoid(*biasPtr + O);
biasPtr = biasPtr + numUnits;
G = tanhf(*biasPtr + G);
//MNN_PRINT("%f, %f, %f, %f\n", I, O, F, G);
I = sigmoid(I);
F = sigmoid(F);
O = sigmoid(O);
G = tanhf(G);
auto newCell = F * cellData[oc] + I * G;
cellData[oc] = newCell;

View File

@ -0,0 +1,311 @@
//
// CPUSoftmax.cpp
// MNN
//
// Created by MNN on 2018/07/16.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "backend/cpu/CPUSoftmax.hpp"
#include <math.h>
#include "backend/cpu/CPUBackend.hpp"
#include "backend/cpu/compute/CommonOptFunction.h"
#include "core/Concurrency.h"
#include "core/Macro.h"
#include "core/TensorUtils.hpp"
#ifdef MNN_USE_NEON
#include <arm_neon.h>
#endif
namespace MNN {
int CPUSoftmax::_softmax1(const float *srcData, float *dstData, int outside, int channel, int threadNum) {
// Max and sub
MNN_CONCURRENCY_BEGIN(tId, threadNum)
{
const float *srcY = srcData + tId * channel;
float *dstY = dstData + tId * channel;
for (int y = (int)tId; y < outside; y += threadNum, srcY += channel * threadNum, dstY += channel * threadNum) {
float maxValue = srcY[0];
{
int c = 1;
#ifdef MNN_USE_NEON
#if !(defined(__ARM_FEATURE_FMA) && defined(__aarch64__))
#define vmaxvq_f32(v) \
({ \
float __m = v[0]; \
for (int i = 1; i < 4; i++) { \
if (v[i] > __m) \
__m = v[i]; \
} \
__m; \
})
#endif
if (c + 3 < channel) {
float32x4_t maxx4 = vld1q_f32(srcY + c);
c += 4;
for (; c + 3 < channel; c += 4) {
maxx4 = vmaxq_f32(maxx4, vld1q_f32(srcY + c));
}
float value = vmaxvq_f32(maxx4);
if (value > maxValue)
maxValue = value;
}
#endif
for (; c < channel; ++c) {
float value = srcY[c];
if (value > maxValue)
maxValue = value;
}
}
for (int c = 0; c < channel; ++c) {
dstY[c] = -srcY[c] + maxValue;
}
}
}
MNN_CONCURRENCY_END();
//Exp
auto schedule = ((CPUBackend*)backend())->multiThreadDivide(channel * outside);
int sizeDivide = schedule.first;
int scheduleNumber = schedule.second;
MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
int start = sizeDivide * (int)tId;
int realSize = sizeDivide;
if (tId == scheduleNumber -1 ) {
realSize = channel * outside - start;
}
if (realSize > 0) {
MNNExp(dstData + start, dstData + start, realSize);
}
}
MNN_CONCURRENCY_END();
// Sum and div
MNN_CONCURRENCY_BEGIN(tId, threadNum);
{
float *dstY = dstData + tId * channel;
for (int y = (int)tId; y < outside; y += threadNum, dstY += channel * threadNum) {
// sum
float sumValue = 0;
for (int c = 0; c < channel; ++c) {
sumValue += dstY[c];
}
// div
{
int c = 0;
#ifdef MNN_USE_NEON
float div = 1.f / sumValue;
for (; c + 3 < channel; c += 4) {
vst1q_f32(dstY + c, vmulq_n_f32(vld1q_f32(dstY + c), div));
}
#endif
for (; c < channel; ++c) {
dstY[c] /= sumValue;
}
}
}
}
MNN_CONCURRENCY_END();
return 0;
}
int CPUSoftmax::_softmaxCommon(const float *srcData, float *dstData, int inside, int outside, int channel,
float *maxValue, float *sumValue, int threadNum) {
if (inside == 1)
return _softmax1(srcData, dstData, outside, channel, threadNum);
const int stepY = inside * channel;
MNN_CONCURRENCY_BEGIN(tId, threadNum);
{
const float *srcY = srcData + tId * stepY;
float *dstY = dstData + tId * stepY;
float *maxValueSub = maxValue + tId * inside;
for (int y = (int)tId; y < outside; y += threadNum, srcY += stepY * threadNum, dstY += stepY * threadNum) {
memcpy(maxValueSub, srcY, sizeof(float) * inside);
const float *src = srcY + inside;
for (int c = 1; c < channel; ++c, src += inside) {
for (int x = 0; x < inside; ++x) {
if (src[x] > maxValueSub[x])
maxValueSub[x] = src[x];
}
}
src = srcY;
float *dst = dstY;
for (int c = 0; c < channel; ++c, src += inside, dst += inside) {
for (int x = 0; x < inside; ++x) {
dst[x] = -src[x] + maxValueSub[x];
}
}
}
}
MNN_CONCURRENCY_END();
auto totalSize = channel * inside * outside;
//Exp
auto schedule = ((CPUBackend*)backend())->multiThreadDivide(totalSize);
int sizeDivide = schedule.first;
int scheduleNumber = schedule.second;
MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
int start = sizeDivide * (int)tId;
int realSize = sizeDivide;
if (tId == scheduleNumber -1 ) {
realSize = totalSize - start;
}
if (realSize > 0) {
MNNExp(dstData + start, dstData + start, realSize);
}
}
MNN_CONCURRENCY_END();
MNN_CONCURRENCY_BEGIN(tId, threadNum);
{
const float *srcY = srcData + tId * stepY;
float *dstY = dstData + tId * stepY;
float *sumValueSub = sumValue + tId * inside;
for (int y = (int)tId; y < outside; y += threadNum, srcY += stepY * threadNum, dstY += stepY * threadNum) {
memset(sumValueSub, 0, sizeof(float) * inside);
float *dst = dstY;
for (int c = 0; c < channel; ++c, dst += inside) {
for (int x = 0; x < inside; ++x) {
sumValueSub[x] += dst[x];
}
}
dst = dstY;
for (int c = 0; c < channel; ++c, dst += inside) {
for (int x = 0; x < inside; ++x) {
dst[x] /= sumValueSub[x];
}
}
}
}
MNN_CONCURRENCY_END();
return 0;
}
ErrorCode CPUSoftmax::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
auto input = inputs[0];
const int dimensions = input->buffer().dimensions;
const auto layout = TensorUtils::getDescribe(input)->dimensionFormat;
mNeedUnpackC4 = layout == MNN_DATA_FORMAT_NC4HW4;
if (mNeedUnpackC4) {
int totalSize = 1;
for (int i = 1; i < dimensions; ++i) {
totalSize *= input->length(i);
}
mStorage.buffer().dim[0].extent = input->length(0);
mStorage.buffer().dim[1].extent = totalSize;
TensorUtils::getDescribe(&mStorage)->dimensionFormat = MNN_DATA_FORMAT_NHWC;
mStorage.buffer().dimensions = 2;
mStorage.buffer().type = input->getType();
backend()->onAcquireBuffer(&mStorage, Backend::DYNAMIC);
}
int inside = 1;
int dims = input->buffer().dimensions;
for (int i = mAxis + 1; i < dims; ++i) {
inside *= input->length(i);
}
if (inside != 1) { // not run _softmax1, we need maxValue Tensor and sumValue Tensor.
int threadNum = ((CPUBackend *)backend())->threadNumber();
mMaxValue.buffer().dim[0].extent = inside * threadNum;
mMaxValue.buffer().dimensions = 1;
mMaxValue.setType(DataType_DT_FLOAT);
backend()->onAcquireBuffer(&mMaxValue, Backend::DYNAMIC);
mSumValue.buffer().dim[0].extent = inside * threadNum;
mSumValue.buffer().dimensions = 1;
mSumValue.setType(DataType_DT_FLOAT);
backend()->onAcquireBuffer(&mSumValue, Backend::DYNAMIC);
backend()->onReleaseBuffer(&mMaxValue, Backend::DYNAMIC);
backend()->onReleaseBuffer(&mSumValue, Backend::DYNAMIC);
}
if (mNeedUnpackC4) {
backend()->onReleaseBuffer(&mStorage, Backend::DYNAMIC);
}
return NO_ERROR;
}
ErrorCode CPUSoftmax::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
MNN_ASSERT(1 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto inputTensor = inputs[0];
auto outputTensor = outputs[0];
const auto inputDataPtr = inputTensor->host<float>();
auto outputDataPtr = outputTensor->host<float>();
const int batch = inputTensor->batch();
const auto dims = inputTensor->buffer().dimensions;
float *tempData = nullptr;
if (mNeedUnpackC4) {
tempData = mStorage.host<float>();
}
int areaInput = 1;
for (int i = 2; i < dims; ++i) {
areaInput *= inputTensor->length(i);
}
int inside = 1;
int outside = 1;
int channel = 1;
for (int i = 0; i < mAxis; ++i) {
outside *= inputTensor->length(i);
}
channel = inputTensor->length(mAxis);
for (int i = mAxis + 1; i < dims; ++i) {
inside *= inputTensor->length(i);
}
int threadNum = ((CPUBackend *)backend())->threadNumber();
if (!mNeedUnpackC4) {
_softmaxCommon(inputDataPtr, outputDataPtr, inside, outside, channel, mMaxValue.host<float>(),
mSumValue.host<float>(), threadNum);
return NO_ERROR;
}
auto outputSize = outputTensor->elementSize();
int batchSize = outputSize / batch;
for (int batchIndex = 0; batchIndex < batch; ++batchIndex) {
auto inputData = inputDataPtr + batchIndex * batchSize;
MNNUnpackC4(outputDataPtr + batchIndex * mStorage.length(1), inputData, areaInput, inputTensor->channel());
}
_softmaxCommon(outputDataPtr, tempData, inside, outside, channel, mMaxValue.host<float>(), mSumValue.host<float>(), threadNum);
for (int batchIndex = 0; batchIndex < batch; ++batchIndex) {
auto outputData = outputDataPtr + batchIndex * batchSize;
auto tempPtr = tempData + batchIndex * mStorage.length(1);
MNNPackC4(outputData, tempPtr, areaInput, outputTensor->channel());
}
return NO_ERROR;
}
CPUSoftmax::CPUSoftmax(Backend *b, int axis) : MNN::Execution(b), mAxis(axis), mStorage(2), mNeedUnpackC4(false) {
// nothing to do
}
class CPUSoftmaxCreator : public CPUBackend::Creator {
public:
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
const MNN::Op *op, Backend *backend) const override {
auto axis = op->main_as_Axis()->axis();
if (axis < 0) {
axis = inputs[0]->dimensions() + axis;
}
return new CPUSoftmax(backend, axis);
}
};
REGISTER_CPU_OP_CREATOR(CPUSoftmaxCreator, OpType_Softmax);
} // namespace MNN

View File

@ -0,0 +1,35 @@
//
// CPUSoftmax.hpp
// MNN
//
// Created by MNN on 2018/07/16.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef CPUSoftmax_hpp
#define CPUSoftmax_hpp
#include "core/Execution.hpp"
namespace MNN {
class CPUSoftmax : public Execution {
public:
CPUSoftmax(Backend *b, int axis);
virtual ~CPUSoftmax() = default;
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
private:
int _softmaxCommon(const float *srcData, float *dstData, int inside, int outside, int channel, float *maxValue,
float *sumValue, int threadNum);
int _softmax1(const float *srcData, float *dstData, int outside, int channel, int threadNum);
int mAxis;
Tensor mStorage;
Tensor mMaxValue;
Tensor mSumValue;
bool mNeedUnpackC4;
};
} // namespace MNN
#endif /* CPUSoftmax_hpp */

View File

@ -13,10 +13,8 @@
#include "backend/cpu/compute/ConvOpt.h"
#include "core/Macro.h"
#include "core/TensorUtils.hpp"
#include "math/Vec4.hpp"
using namespace MNN::Math;
typedef Vec4 float4;
#include "math/Vec.hpp"
using Vec4 = MNN::Math::Vec<float, 4>;
#define SOURCE_BLOCK 64
#define WEIGHT_BLOCK 256

View File

@ -0,0 +1,128 @@
//
// GeometryCropAndResize.cpp
// MNN
//
// Created by MNN on 2020/08/5.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "geometry/GeometryComputer.hpp"
#include "core/OpCommonUtils.hpp"
#include "geometry/GeometryComputerUtils.hpp"
#include "ConvertUtils.hpp"
namespace MNN {
class GeometryCropAndResize : public GeometryComputer {
public:
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& res) const override {
MNN_ASSERT(4 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto img = inputs[0];
auto boxes = inputs[1];
auto box_ind = inputs[2];
auto crop_size = inputs[3];
auto output = outputs[0];
auto extrapolation = op->main_as_CropAndResize()->extrapolationValue();
auto method = op->main_as_CropAndResize()->method();
// resizeType of Interp : 1-NEAREST, 2-BILINEAR
const int resizeType = method == CropAndResizeMethod_BILINEAR ? 2 : 1;
int batch = img->length(0), ih = img->length(1), iw = img->length(2),
depth = img->length(3), boxNum = boxes->length(0);
const int cropHeight = crop_size->host<uint32_t>()[0],
cropWidth = crop_size->host<uint32_t>()[1];
auto des = TensorUtils::getDescribe(output);
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
des->dimensionFormat = MNN_DATA_FORMAT_NHWC;
des->regions.clear();
des->regions.reserve(boxNum);
for (int i = 0; i < boxNum; i++) {
const float y1 = boxes->host<float>()[i*4];
const float x1 = boxes->host<float>()[i*4+1];
const float y2 = boxes->host<float>()[i*4+2];
const float x2 = boxes->host<float>()[i*4+3];
const int ind = box_ind->host<uint32_t>()[i];
const float ch = (y2 - y1) * (ih - 1), cw = (x2 - x1) * (iw - 1);
const float yScale = ch / static_cast<float>(cropHeight - 1);
const float xScale = cw / static_cast<float>(cropWidth - 1);
const float yOffset = y1 * (ih - 1), xOffset = x1 * (iw - 1);
// select croped image from images, convert it's format from NHWC to NC4HW4
std::shared_ptr<Tensor> cropValue(new Tensor);
{
cropValue->buffer().type = halide_type_of<float>();
cropValue->buffer().dimensions = 4;
cropValue->setLength(0, 1);
cropValue->setLength(1, depth);
cropValue->setLength(2, ih);
cropValue->setLength(3, iw);
auto des = TensorUtils::getDescribe(cropValue.get());
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
des->regions.clear();
Tensor::InsideDescribe::Region region;
region.origin = img;
region.size[1] = depth;
region.size[2] = ih * iw;
region.src.offset = ind * ih * iw * depth;
region.dst.offset = 0;
region.src.stride[1] = 1;
region.src.stride[2] = depth;
region.dst.stride[1] = ih * iw;
region.dst.stride[2] = 1;
des->regions.emplace_back(std::move(region));
res.extras.emplace_back(cropValue);
}
// using Interp Op deal with crop and resize for selected image
std::shared_ptr<Tensor> resizeValue;
{
resizeValue.reset(Tensor::createDevice<float>({1, depth, cropHeight, cropWidth}));
auto des = TensorUtils::getDescribe(resizeValue.get());
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
std::unique_ptr<OpT> interp(new OpT);
interp->type = OpType_Interp;
interp->main.type = OpParameter_Interp;
interp->main.value = new InterpT;
interp->main.AsInterp()->widthScale = xScale;
interp->main.AsInterp()->heightScale = yScale;
interp->main.AsInterp()->widthOffset = xOffset;
interp->main.AsInterp()->heightOffset = yOffset;
interp->main.AsInterp()->alignCorners = false;
interp->main.AsInterp()->resizeType = resizeType;
auto cmd = GeometryComputerUtils::makeCommand(interp.get(), {cropValue.get()}, {resizeValue.get()});
res.extras.emplace_back(resizeValue);
res.command.emplace_back(cmd);
}
// convert resize image's format from NC4HW4 to NHWC, add it to output's batch
{
Tensor::InsideDescribe::Region region;
region.origin = resizeValue.get();
region.size[1] = cropHeight * cropWidth;
region.size[2] = depth;
region.src.offset = 0;
region.dst.offset = i * cropHeight * cropWidth * depth;
region.src.stride[1] = 1;
region.src.stride[2] = cropHeight * cropWidth;
region.dst.stride[1] = depth;
region.dst.stride[2] = 1;
des->regions.emplace_back(std::move(region));
}
}
return true;
}
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
//return {false};
return {true};
}
};
static void _create() {
std::shared_ptr<GeometryComputer> comp(new GeometryCropAndResize);
// GeometryComputer::registerGeometryComputer(comp, {OpType_CropAndResize});
}
REGISTER_GEOMETRY(GeometryCropAndResize, _create);
} // namespace MNN

View File

@ -0,0 +1,304 @@
//
// GeometryGather.cpp
// MNN
//
// Created by MNN on 2020/06/09.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "geometry/GeometryComputer.hpp"
#include "core/OpCommonUtils.hpp"
namespace MNN {
class GeometryGather : public DefaultGeometryComputer {
public:
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
MNN_ASSERT(inputs.size() == 2);
MNN_ASSERT(1 == outputs.size());
auto embedding = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
const int firstDimStride = embedding->buffer().dim[0].stride;
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && firstDimStride != 0) {
std::vector<bool> res(outputs.size(), true);
return res;
}
return std::vector<bool>(outputs.size(), false);
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
MNN_ASSERT(2 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto embedding = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
const int firstDimStride = embedding->buffer().dim[0].stride;
if (TensorUtils::getDescribe(indices)->usage != MNN::Tensor::InsideDescribe::CONSTANT || firstDimStride == 0) {
Command cmd;
cmd.op = op;
cmd.inputs = std::move(inputs);
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
auto bytes = embedding->buffer().type.bytes();
const size_t indicesCount = indices->elementSize();
const auto limit = embedding->length(0);
const int* indicesData = indices->host<int32_t>();
auto outputDes = TensorUtils::getDescribe(output);
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
for (int i = 0; i < indicesCount; i++) {
if (indicesData[i] < 0 || indicesData[i] > limit) {
MNN_PRINT("Gather indice error\n");
return false;
}
Tensor::InsideDescribe::Region slice;
slice.origin = embedding;
slice.size[0] = 1;
slice.size[1] = 1;
slice.size[2] = firstDimStride;
slice.src.offset = firstDimStride * indicesData[i];
slice.dst.offset = i * firstDimStride;
slice.src.stride[0] = 1;
slice.src.stride[1] = 1;
slice.src.stride[2] = 1;
slice.dst.stride[0] = 1;
slice.dst.stride[1] = 1;
slice.dst.stride[2] = 1;
outputDes->regions.emplace_back(std::move(slice));
}
return true;
}
};
class GeometryGatherND : public DefaultGeometryComputer {
public:
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
MNN_ASSERT(inputs.size() == 2);
MNN_ASSERT(1 == outputs.size());
auto params = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
int mSliceN = 1;
int mSliceSize = 1;
for (int i = 0; i < indices->dimensions() - 1; ++i) {
mSliceN *= indices->length(i);
}
auto indiceNd = indices->length(indices->dimensions() - 1);
std::vector<int> mDimsToCount;
mDimsToCount.resize(indiceNd);
for (int i = indiceNd; i < params->dimensions(); ++i) {
mSliceSize *= params->length(i);
}
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && mSliceSize != 0) {
std::vector<bool> res(outputs.size(), true);
return res;
} else {
std::vector<bool> res(outputs.size(), false);
return res;
}
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
MNN_ASSERT(2 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto params = inputs[0];
auto indice = inputs[1];
auto output = outputs[0];
int mSliceN = 1;
int mSliceSize = 1;
for (int i = 0; i < indice->dimensions() - 1; ++i) {
mSliceN *= indice->length(i);
}
auto indiceNd = indice->length(indice->dimensions() - 1);
std::vector<int> mDimsToCount;
mDimsToCount.resize(indiceNd);
for (int i = indiceNd; i < params->dimensions(); ++i) {
mSliceSize *= params->length(i);
}
if (TensorUtils::getDescribe(indice)->usage != MNN::Tensor::InsideDescribe::CONSTANT || mSliceSize == 0) {
Command cmd;
cmd.op = op;
cmd.inputs = std::move(inputs);
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
auto paramSize = params->elementSize();
for (int i = 0; i < indiceNd; ++i) {
mDimsToCount[i] = paramSize / params->length(i);
paramSize = mDimsToCount[i];
}
mDimsToCount.resize(indiceNd);
auto indiceData = indice->host<int32_t>();
auto outputDes = TensorUtils::getDescribe(output);
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
for (int i = 0; i < mSliceN; i++) {
int fromPos = 0;
for (int j = 0; j < indiceNd; ++j) {
fromPos += mDimsToCount[j] * indiceData[i * indiceNd + j];
}
Tensor::InsideDescribe::Region slice;
slice.origin = params;
slice.size[0] = 1;
slice.size[1] = 1;
slice.size[2] = mSliceSize;
slice.src.offset = fromPos;
slice.dst.offset = i * mSliceSize;
slice.src.stride[0] = 1;
slice.src.stride[1] = 1;
slice.src.stride[2] = 1;
slice.dst.stride[0] = 1;
slice.dst.stride[1] = 1;
slice.dst.stride[2] = 1;
outputDes->regions.emplace_back(std::move(slice));
}
return true;
}
};
class GeometryGatherV2 : public DefaultGeometryComputer {
public:
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
MNN_ASSERT(inputs.size() >= 2);
MNN_ASSERT(1 == outputs.size());
auto params = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
int axis = 0;
if (inputs.size() == 3) {
const Tensor* axisTensor = inputs[2];
axis = axisTensor->host<int32_t>()[0];
}
MNN_ASSERT(axis > -params->buffer().dimensions && axis < params->buffer().dimensions);
if (axis < 0) {
axis = params->buffer().dimensions + axis;
}
const int gatherDimSize = params->buffer().dim[axis].extent;
const int N = indices->elementSize();
MNN_ASSERT(gatherDimSize <= std::numeric_limits<int32_t>::max());
int inside = 1;
for (int i = axis + 1; i < params->dimensions(); ++i) {
inside *= params->length(i);
}
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && inside != 0) {
std::vector<bool> res(outputs.size(), true);
return res;
}
return std::vector<bool>(outputs.size(), false);
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
MNN_ASSERT(inputs.size() >= 2);
MNN_ASSERT(1 == outputs.size());
auto params = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
int axis = 0;
if (inputs.size() == 3) {
const Tensor* axisTensor = inputs[2];
axis = axisTensor->host<int32_t>()[0];
}
MNN_ASSERT(axis > -params->buffer().dimensions && axis < params->buffer().dimensions);
if (axis < 0) {
axis = params->buffer().dimensions + axis;
}
const int gatherDimSize = params->buffer().dim[axis].extent;
const int N = indices->elementSize();
MNN_ASSERT(gatherDimSize <= std::numeric_limits<int32_t>::max());
int inside = 1;
int outside = 1;
for (int i = 0; i < axis; ++i) {
outside *= params->length(i);
}
for (int i = axis + 1; i < params->dimensions(); ++i) {
inside *= params->length(i);
}
if (TensorUtils::getDescribe(indices)->usage != MNN::Tensor::InsideDescribe::CONSTANT || inside == 0) {
Command cmd;
cmd.op = op;
cmd.inputs = std::move(inputs);
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
const int limit = params->length(axis);
auto bytes = output->buffer().type.bytes();
const int insideStride = inside;
const int outputOutsideStride = inside * N;
const int inputOutsideStride = inside * inputs[0]->length(axis);
const int* indicesPtr = indices->host<int32_t>();
auto outputDes = TensorUtils::getDescribe(output);
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
for (int o = 0; o < outside; ++o) {
for (int i = 0; i < N; i++) {
if (indicesPtr[i] < 0 || indicesPtr[i] > limit) {
continue;
}
Tensor::InsideDescribe::Region slice;
slice.origin = params;
slice.size[0] = 1;
slice.size[1] = 1;
slice.size[2] = insideStride;
slice.src.offset = inputOutsideStride * o + insideStride * indicesPtr[i];
slice.dst.offset = outputOutsideStride * o + i * insideStride;
slice.src.stride[0] = 1;
slice.src.stride[1] = 1;
slice.src.stride[2] = 1;
slice.dst.stride[0] = 1;
slice.dst.stride[1] = 1;
slice.dst.stride[2] = 1;
outputDes->regions.emplace_back(std::move(slice));
}
}
return true;
}
};
static void _create() {
// std::shared_ptr<GeometryComputer> comp(new GeometryGather);
// GeometryComputer::registerGeometryComputer(comp, {OpType_Gather});
//
// std::shared_ptr<GeometryComputer> comp2(new GeometryGatherND);
// GeometryComputer::registerGeometryComputer(comp2, {OpType_GatherND});
//
// std::shared_ptr<GeometryComputer> comp3(new GeometryGatherV2);
// GeometryComputer::registerGeometryComputer(comp3, {OpType_GatherV2});
}
REGISTER_GEOMETRY(GeometryGather, _create);
} // namespace MNN

View File

@ -0,0 +1,214 @@
//
// GeometrySoftmax.cpp
// MNN
//
// Created by MNN on 2020/06/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "geometry/GeometryComputer.hpp"
#include "core/OpCommonUtils.hpp"
#include "geometry/GeometryComputerUtils.hpp"
namespace MNN {
class GeometrySoftmax : public GeometryComputer {
public:
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
auto axis = op->main_as_Axis()->axis();
if (axis < 0) {
axis = inputs[0]->dimensions() + axis;
}
if (axis == 1) {
return std::vector<bool>(outputs.size(), false);
}
return std::vector<bool>(outputs.size(), true);
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& res) const override {
MNN_ASSERT(1 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto input = inputs[0];
auto output = outputs[0];
auto dims = input->buffer().dimensions;
auto axis = op->main_as_Axis()->axis();
if (axis < 0) {
axis = inputs[0]->dimensions() + axis;
}
if (axis == 1) {
Command cmd;
cmd.op = op;
cmd.inputs = std::move(inputs);
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
int inside = 1;
int outside = 1;
int channel = 1;
for (int i = 0; i < axis; ++i) {
outside *= input->length(i);
}
channel = input->length(axis);
for (int i = axis + 1; i < dims; ++i) {
inside *= input->length(i);
}
//input transform to NCHW format
std::shared_ptr<Tensor> tmpInput;
{
tmpInput.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto outputDes = TensorUtils::getDescribe(tmpInput.get());
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region desReg;
desReg.size[0] = outside;
desReg.size[1] = channel;
desReg.size[2] = inside;
desReg.dst.offset = 0;
desReg.dst.stride[0] = channel*inside;
desReg.dst.stride[1] = inside;
desReg.dst.stride[2] = 1;
desReg.src.offset = 0;
desReg.src.stride[0] = channel*inside;
desReg.src.stride[1] = inside;
desReg.src.stride[2] = 1;
desReg.origin = input;
outputDes->regions.emplace_back(std::move(desReg));
res.extras.emplace_back(tmpInput);
}
//reduction max, axis=1
std::shared_ptr<Tensor> maxValue;
{
maxValue.reset(Tensor::createDevice<float>({outside, 1, inside}));
res.extras.emplace_back(maxValue);
res.command.emplace_back(GeometryComputerUtils::makeReduce(ReductionType_MAXIMUM, tmpInput.get(), maxValue.get()));
}
//broadcast reduction axis dim
std::shared_ptr<Tensor> maxBroadValue;
{
maxBroadValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto outputDes = TensorUtils::getDescribe(maxBroadValue.get());
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region desReg;
desReg.size[0] = outside;
desReg.size[1] = channel;
desReg.size[2] = inside;
desReg.dst.offset = 0;
desReg.dst.stride[0] = channel*inside;
desReg.dst.stride[1] = inside;
desReg.dst.stride[2] = 1;
desReg.src.offset = 0;
desReg.src.stride[0] = inside;
desReg.src.stride[1] = 0;
desReg.src.stride[2] = 1;
desReg.origin = maxValue.get();
outputDes->regions.emplace_back(std::move(desReg));
res.extras.emplace_back(maxBroadValue);
}
//sub
std::shared_ptr<Tensor> subMaxValue;
{
subMaxValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto cmd = GeometryComputerUtils::makeBinary(BinaryOpOperation_SUB, tmpInput.get(), maxBroadValue.get(), subMaxValue.get());
res.extras.emplace_back(subMaxValue);
res.command.emplace_back(std::move(cmd));
}
//exp
std::shared_ptr<Tensor> expValue;
{
expValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto cmd = GeometryComputerUtils::makeUnary(UnaryOpOperation_EXP, subMaxValue.get(), expValue.get());
res.extras.emplace_back(expValue);
res.command.emplace_back(std::move(cmd));
}
//reduction sum, axis=2, only support NCHW
std::shared_ptr<Tensor> sumValue;
{
sumValue.reset(Tensor::createDevice<float>({outside, 1, inside}));
res.extras.emplace_back(sumValue);
res.command.emplace_back(GeometryComputerUtils::makeReduce(ReductionType_SUM, expValue.get(), sumValue.get()));
}
//broadcast reduction axis dim
std::shared_ptr<Tensor> sumBroadValue;
{
sumBroadValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto outputDes = TensorUtils::getDescribe(sumBroadValue.get());
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region desReg;
desReg.size[0] = outside;
desReg.size[1] = channel;
desReg.size[2] = inside;
desReg.dst.offset = 0;
desReg.dst.stride[0] = channel*inside;
desReg.dst.stride[1] = inside;
desReg.dst.stride[2] = 1;
desReg.src.offset = 0;
desReg.src.stride[0] = inside;
desReg.src.stride[1] = 0;
desReg.src.stride[2] = 1;
desReg.origin = sumValue.get();
outputDes->regions.emplace_back(std::move(desReg));
res.extras.emplace_back(sumBroadValue);
}
//div
std::shared_ptr<Tensor> tmpOutput;
{
tmpOutput.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto cmd = GeometryComputerUtils::makeBinary(BinaryOpOperation_REALDIV, expValue.get(), sumBroadValue.get(), tmpOutput.get());
res.extras.emplace_back(tmpOutput);
res.command.emplace_back(std::move(cmd));
}
//transform to output
{
auto outputDes = TensorUtils::getDescribe(output);
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region desReg;
desReg.size[0] = outside;
desReg.size[1] = channel;
desReg.size[2] = inside;
desReg.dst.offset = 0;
desReg.dst.stride[0] = channel*inside;
desReg.dst.stride[1] = inside;
desReg.dst.stride[2] = 1;
desReg.src.offset = 0;
desReg.src.stride[0] = channel*inside;
desReg.src.stride[1] = inside;
desReg.src.stride[2] = 1;
desReg.origin = tmpOutput.get();
outputDes->regions.emplace_back(std::move(desReg));
}
return true;
}
};
static void _create() {
// std::shared_ptr<GeometryComputer> comp(new GeometrySoftmax);
// GeometryComputer::registerGeometryComputer(comp, {OpType_Softmax});
}
REGISTER_GEOMETRY(GeometrySoftmax, _create);
} // namespace MNN

View File

@ -7,7 +7,7 @@ add_executable(benchmarkExprModels.out ${CMAKE_CURRENT_LIST_DIR}/benchmarkExprMo
target_include_directories(benchmarkExprModels.out PRIVATE "${CMAKE_CURRENT_LIST_DIR}/exprModels" ${CMAKE_CURRENT_SOURCE_DIR}/)
target_link_libraries(benchmarkExprModels.out ${MNN_DEPS})
if ((MSVC OR WIN32) AND NOT MNN_BUILD_SHARED_LIBS)
if (MSVC AND NOT MNN_BUILD_SHARED_LIBS)
foreach (DEPEND ${MNN_DEPS})
target_link_options(benchmark.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>)
target_link_options(benchmarkExprModels.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>)

View File

@ -124,6 +124,7 @@ std::vector<float> doBench(Model& model, int loop, int warmup = 10, int forward
const auto bufferSize = revertor->getBufferSize();
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromBuffer(modelBuffer, bufferSize));
revertor.reset();
net->setSessionMode(MNN::Interpreter::Session_Release);
MNN::ScheduleConfig config;
config.numThread = numberThread;
config.type = static_cast<MNNForwardType>(forward);

View File

@ -90,6 +90,7 @@ static std::vector<float> runNet(VARP netOutput, const ScheduleConfig& config, i
const void* buf = builder.GetBufferPointer();
size_t size = builder.GetSize();
std::unique_ptr<Interpreter> net(Interpreter::createFromBuffer(buf, size));
net->setSessionMode(MNN::Interpreter::Session_Release);
auto session = net->createSession(config);
net->releaseModel();
auto inputTensor = net->getSessionInput(session, NULL);

View File

@ -1,84 +0,0 @@
import os
import sys
major_py_ver = sys.version_info.major
def convert_string_to_hex_list(code_str):
hex_list = []
for i in range(len(code_str)):
hex_ = hex(ord(code_str[i]))
hex_list.append(hex_)
return hex_list
def opencl_codegen():
cl_kernel_dir = sys.argv[1]
output_path = sys.argv[2]
print("Generating OpenCL Kernels in "+cl_kernel_dir+" to "+output_path)
if not os.path.exists(cl_kernel_dir):
print(cl_kernel_dir + " doesn't exist!")
#common.h
common_header_code = ""
#quantized_common.h
quantized_common_header_code = ""
#activation_common.h
activation_common_header_code = ""
for file_name in os.listdir(cl_kernel_dir):
file_path = os.path.join(cl_kernel_dir, file_name)
if file_path[-2:] == ".h" and file_name[:-2] == "quantized_common":
with open(file_path, "r") as f:
quantized_common_header_code += f.read()
elif file_path[-2:] == ".h" and file_name[:-2] == "activation_common":
with open(file_path, "r") as f:
activation_common_header_code += f.read()
opencl_code_maps = {}
for file_name in os.listdir(cl_kernel_dir):
file_path = os.path.join(cl_kernel_dir, file_name)
if file_path[-3:] == ".cl":
with open(file_path, "r") as f:
code_str = ""
for line in f.readlines():
if "#include <activation_common.h>" in line:
code_str += common_header_code
code_str += activation_common_header_code
elif "#include <quantized_common.h>" in line:
code_str += common_header_code
code_str += quantized_common_header_code
elif "#include <common.h>" in line:
code_str += common_header_code
else:
code_str += line
opencl_code_maps[file_name[:-3]] = convert_string_to_hex_list(code_str)
#source model
opencl_source_map = "#include <map> \n"
opencl_source_map += "#include <string> \n"
opencl_source_map += "#include <vector> \n"
opencl_source_map += "namespace MNN { \n"
opencl_source_map += "extern const std::map<std::string, std::vector<unsigned char>> OpenCLProgramMap = \n { \n"
if major_py_ver == 2:
items = opencl_code_maps.iteritems()
else:
items = opencl_code_maps.items()
for file_name, file_source in items:
opencl_source_map += "{\n \""
opencl_source_map += file_name
opencl_source_map += "\", \n"
opencl_source_map += " { "
for source_hex in file_source:
opencl_source_map += source_hex
opencl_source_map += ","
opencl_source_map += " } "
opencl_source_map += "\n }, \n"
opencl_source_map += " }; \n"
opencl_source_map += "} \n"
with open(output_path, "w") as w_file:
w_file.write(opencl_source_map)
print("Generate OpenCL Source done !!! \n")
if __name__ == '__main__':
opencl_codegen()

140
ciscripts/build.sh Normal file
View File

@ -0,0 +1,140 @@
#!/usr/bin/env bash
. ./parse_options.sh || exit 1;
CMAKE=cmake
MAKE=make
ANDROID_NDK=/home/android-ndk-r18b
BUILD_ROOT=`pwd`
# Clean the exist directory other than remove it in order to solve
# the problem "Current working directory cannot be established".
function make_or_clean_dir {
if [ -d $1 ]; then
rm -rf $1/*
else
mkdir $1
fi
}
function build_arm_android_32 {
make_or_clean_dir build_arm_android_32 && cd build_arm_android_32
$CMAKE ../.. \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_ABI="armeabi-v7a" \
-DANDROID_STL=c++_static \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_NATIVE_API_LEVEL=android-21 \
-DANDROID_TOOLCHAIN=clang \
-DMNN_USE_LOGCAT=true \
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
-DNATIVE_LIBRARY_OUTPUT=. \
-DNATIVE_INCLUDE_OUTPUT=. \
-DMNN_VULKAN=$USE_VULKAN \
-DMNN_OPENCL=$USE_OPENCL \
-DMNN_OPENGL=$USE_OPENGL \
-DMNN_USE_THREAD_POOL=$USE_THREAD_POOL || exit 1;
$MAKE -j $build_threads || exit 1;
cd $BUILD_ROOT; true;
}
function build_arm_android_64 {
make_or_clean_dir build_arm_android_64 && cd build_arm_android_64
$CMAKE ../.. \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_ABI="arm64-v8a" \
-DANDROID_STL=c++_static \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_NATIVE_API_LEVEL=android-21 \
-DANDROID_TOOLCHAIN=clang \
-DMNN_USE_LOGCAT=true \
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
-DNATIVE_LIBRARY_OUTPUT=. \
-DNATIVE_INCLUDE_OUTPUT=. \
-DMNN_ARM82=ON \
-DMNN_VULKAN=$USE_VULKAN \
-DMNN_OPENCL=$USE_OPENCL \
-DMNN_OPENGL=$USE_OPENGL \
-DMNN_USE_THREAD_POOL=$USE_THREAD_POOL || exit 1;
$MAKE -j $build_threads || exit 1;
cd $BUILD_ROOT; true;
}
function build_arm_linux_32 {
cd $BUILD_ROOT; true;
}
function build_arm_linux_64 {
cd $BUILD_ROOT; true;
}
function build_x86_linux {
make_or_clean_dir build_x86_linux && cd build_x86_linux
$CMAKE ../.. \
-DCMAKE_BUILD_TYPE=Release \
-DMNN_BUILD_TRAIN=ON \
-DMNN_SEP_BUILD=OFF \
-DMNN_BUILD_DEMO=ON \
-DMNN_BUILD_QUANTOOLS=ON \
-DMNN_EVALUATION=ON \
-DMNN_BUILD_CONVERTER=ON \
-DMNN_SUPPORT_TFLITE_QUAN=ON \
-DMNN_BUILD_TEST=ON \
-DMNN_OPENCL=$USE_OPENCL \
-DMNN_VULKAN=$USE_VULKAN \
-DMNN_OPENMP=$USE_OPENMP \
-DMNN_USE_THREAD_POOL=OFF \
-DMNN_BUILD_BENCHMARK=ON || exit 1;
$MAKE -j $build_threads || exit 1;
cd $BUILD_ROOT; true;
}
function build_all {
build_arm_android_32 || exit 1;
build_arm_android_64 || exit 1;
build_arm_linux_32 || exit 1;
build_arm_linux_64 || exit 1;
build_x86_linux || exit 1;
true;
}
function clean {
rm -rf build_arm_android_32
rm -rf build_arm_android_64
rm -rf build_arm_linux_32
rm -rf build_arm_linux_64
rm -rf build_x86_linux
}
function build {
case $platform in
"arm_linux_32")
build_arm_linux_32 || exit 1;
;;
"arm_linux_64")
build_arm_linux_64 || exit 1;
;;
"x86_linux")
build_x86_linux || exit 1;
;;
"arm_android_32")
build_arm_android_32 || exit 1;
;;
"arm_android_64")
build_arm_android_64 || exit 1;
;;
"all")
build_all || exit 1;
;;
*) echo "Invalid platform: $platform" && exit 1;
esac
}
if [ $clean == 1 ]; then
clean
else
build $@
fi
true;

113
ciscripts/parse_options.sh Normal file
View File

@ -0,0 +1,113 @@
#!/usr/bin/env bash
# Valid platform:
# - arm_android_32
# - arm_android_64
# - arm_linux_32
# - arm_linux_64
# - x86_linux
platform="all"
# Option to build with opencl.
use_opencl=0
# Option to build with opengl.
use_opengl=0
# Option to build with vulkan.
use_vulkan=0
# Option to build with openmp multithreads library.
use_openmp=0
build_threads=1
# Option to clear the build history.
clean=0
USE_OPENCL=OFF
USE_VULKAN=OFF
USE_OPENGL=OFF
USE_OPENMP=OFF
USE_THREAD_POOL=ON
function print_usage {
echo -e "Usgae: ./build.sh"
echo -e " --platform=x: Specify build platform x. "
echo -e " All valid platforms are \"arm_android_32\", \"arm_android_64\",
\"arm_linux_32\", \"arm_linux_64\", \"x86_linux\", \"all\"."
echo -e " The default is \"all\"."
echo -e " --use_openmp=true|false: Build with openmp or not."
echo -e " The default is false."
echo -e " --use_opencl=true|false: Build with opencl or not."
echo -e " The default is false."
echo -e " --use_opengl=true|false: Build with opengl or not."
echo -e " The default is false."
echo -e " --use_vulkan=true|false: Build with vulkan or not."
echo -e " The default is false."
echo -e " --job=n: Build with n threads. Default is 1."
}
function parse_platform {
platform=`echo "$1" | awk -F '=' '{print $2}'`
}
function parse_nthreads {
build_threads=`echo "$1" | awk -F '=' '{print $2}'`
}
function parse_bool {
val=`echo "$1" | awk -F '=' '{print $2}'`
if [ $val == "true" ] || [ $val == "1" ]; then
return 1;
else
return 0;
fi
}
[ -z "${1:-}" ] && print_usage && exit 1;
while true; do
[ -z "${1:-}" ] && break;
case "$1" in
--platform=*) parse_platform "$1"; shift 1;
;;
--use_openmp=*) parse_bool "$1"; use_openmp=$?; shift 1;
;;
--use_openmp) use_openmp=true; shift 1;
;;
--use_opencl=*) parse_bool "$1"; use_opencl=$?; shift 1;
;;
--use_opencl) use_opencl=true; shift 1;
;;
--use_opengl=*) parse_bool "$1"; use_opengl=$?; shift 1;
;;
--use_opengl) use_opengl=true; shift 1;
;;
--use_vulkan=*) parse_bool "$1"; use_vulkan=$?; shift 1;
;;
--use_vulkan) use_vulkan=true; shift 1;
;;
--job=*) parse_nthreads "$1"; shift 1;
;;
clean) clean=1; shift 1;
;;
*) break;
esac
done
if [ $use_opencl == 1 ]; then
USE_OPENCL=ON
fi
if [ $use_opengl == 1 ]; then
USE_OPENGL=ON
fi
if [ $use_vulkan == 1 ]; then
USE_VULKAN=ON
fi
if [ $use_openmp == 1 ]; then
USE_OPENMP=ON
USE_THREAD_POOL=OFF
fi
true;

View File

@ -0,0 +1,3 @@
call "C:/Program Files (x86)/Microsoft Visual Studio/2017/BuildTools/VC/Auxiliary/Build/vcvars64.bat"
cmake -G "Ninja" -DCMAKE_BUILD_TYPE=Release ..
ninja

View File

@ -0,0 +1,3 @@
call "C:/Program Files (x86)/Microsoft Visual Studio/2017/BuildTools/VC/Auxiliary/Build/vcvars32.bat"
cmake -G "Ninja" -DCMAKE_BUILD_TYPE=Release ..
ninja

View File

@ -12,3 +12,9 @@ target_link_libraries(segment.out ${MNN_DEPS})
add_executable(expressDemo.out ${CMAKE_CURRENT_LIST_DIR}/expressDemo.cpp)
target_link_libraries(expressDemo.out ${MNN_DEPS})
add_executable(transformerDemo.out ${CMAKE_CURRENT_LIST_DIR}/transformerDemo.cpp)
target_link_libraries(transformerDemo.out ${MNN_DEPS})
add_executable(rasterDemo.out ${CMAKE_CURRENT_LIST_DIR}/rasterDemo.cpp)
target_link_libraries(rasterDemo.out ${MNN_DEPS})

View File

@ -53,7 +53,6 @@ int main(int argc, const char* argv[]) {
MNN_ERROR("Output Not valid\n");
return 0;
}
auto size = outputInfo->size;
//Test Speed
if (testTime > 0){
//Let the frequence up
@ -82,6 +81,7 @@ int main(int argc, const char* argv[]) {
}
{
auto size = outputInfo->size;
auto outputPtr = output->readMap<float>();
if (nullptr == outputPtr) {
MNN_ERROR("Output Not valid read error\n");

251
demo/exec/rasterDemo.cpp Normal file
View File

@ -0,0 +1,251 @@
//
// rasterDemo.cpp
// MNN
//
// Created by MNN on 2020/10/14.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <fstream>
#include <sstream>
#include <iostream>
#include <chrono>
#include <MNN/MNNDefine.h>
#include <MNN/Tensor.hpp>
#include <MNN/Interpreter.hpp>
#include "MNN_generated.h"
#include "core/TensorUtils.hpp"
#include "core/Execution.hpp"
#include "core/Backend.hpp"
#include "rapidjson/document.h"
#include "rapidjson/stringbuffer.h"
#include "rapidjson/writer.h"
using namespace MNN;
/*
1.Raster will do the index mapping like below:
for (region : regions)
src = region.src, dst = region.dst;
for (i = 0 -> size[0])
for (j = 0 -> size[1])
for (k = 0 -> size[2])
output[dst.offset + i * dst.stride[0] + j * dst.stride[1] + k * dst.stride[2]] =
region.origion[src.offset + i * src.stride[0] + j * src.stride[1] + k * src.stride[2]];
2. Raster Op has a input and a output, but the input is not the real input tensor, it's a
middle tensor whith VIRTUAL type that has many regions point to inputs tensors, like below.
input_0 --> region_0 --\
\
input_1 --> region_1 ---- middle ----> output
/
input_2 --> region_2 --/
3. This example read a json file and construct some Rasters and compute.
The input json file format is as below:
{
"inputs" : [
{
"id" : int,
"type" : "type_name", // float or int
"dims" : [int],
"data" : [int/float] // if null, fill with random number
}
],
"outputs" : [
// same with inputs
],
"regions" : [
{
"id" : int, // points to outputs
"size" : [int],
"src" : {
"offset" : int,
"stride" : [int]
},
"dst" : { // same with src },
"origin" : int // point to inputs
}
]
}
*/
static std::string runRaster(std::string jsonString, int runNum) {
srand(0);
rapidjson::Document document;
document.Parse(jsonString.c_str());
if (document.HasParseError()) {
MNN_ERROR("Invalid Json Format!\n");
return 0;
}
// prepare CPU backend
ScheduleConfig config;
config.type = MNN_FORWARD_CPU;
BackendConfig backendConfig;
backendConfig.precision = BackendConfig::Precision_High;
config.backendConfig = &backendConfig;
Backend::Info compute;
compute.type = config.type;
compute.numThread = config.numThread;
compute.user = config.backendConfig;
const RuntimeCreator* runtimeCreator(MNNGetExtraRuntimeCreator(compute.type));
std::unique_ptr<Runtime> runtime(runtimeCreator->onCreate(compute));
std::unique_ptr<Backend> backend(runtime->onCreate());
// build Op
std::unique_ptr<OpT> opt(new OpT);
opt->type = OpType_Raster;
flatbuffers::FlatBufferBuilder builder(1024);
builder.ForceDefaults(true);
auto len = Op::Pack(builder, opt.get());
builder.Finish(len);
auto buffer = builder.GetBufferPointer();
const Op* op = flatbuffers::GetMutableRoot<Op>(buffer);
// build tensors (NCHW) from json
std::vector<std::unique_ptr<Tensor>> inputs;
std::vector<std::unique_ptr<Tensor>> outputs;
auto readTensors = [&document, &backend](std::vector<std::unique_ptr<Tensor>>& tensors, const char* type) {
if (document.HasMember(type)) {
auto info = document[type].GetArray();
tensors.resize(info.Size());
for (auto iter = info.begin(); iter != info.end(); iter++) {
auto obj = iter->GetObject();
int id = obj["id"].GetInt();
tensors[id].reset(new Tensor(4));
auto tensor = tensors[id].get();
auto dataType = obj["type"].GetString();
bool isFloat = !strcmp(dataType, "float");
tensor->setType(isFloat ? DataType_DT_FLOAT : DataType_DT_INT32);
auto dims = obj["dims"].GetArray();
for (auto d = dims.begin(); d != dims.end(); d++) {
tensor->setLength(d - dims.begin(), d->GetInt());
}
TensorUtils::setLinearLayout(tensor);
backend->onAcquireBuffer(tensor, Backend::STATIC);
TensorUtils::getDescribe(tensor)->backend = backend.get();
auto data = obj["data"].GetArray();
if (!strcmp(type, "inputs")) {
bool hasData = data.Size() == tensor->elementSize();
auto dataIter = data.begin();
for (int i = 0; i < tensor->elementSize(); i++, dataIter++) {
if (isFloat) {
tensor->host<float>()[i] = hasData ? dataIter->GetFloat() : rand() % 10 / 10.0;
} else {
tensor->host<int>()[i] = hasData ? dataIter->GetInt() : rand() % 10;
}
}
}
}
}
};
readTensors(inputs, "inputs");
readTensors(outputs, "outputs");
// build middle tensors' region info from json
std::vector<std::unique_ptr<Tensor>> middles;
middles.resize(outputs.size());
if (document.HasMember("regions")) {
auto info = document["regions"].GetArray();
for (auto iter = info.begin(); iter != info.end(); iter++) {
auto obj = iter->GetObject();
int id = obj["id"].GetInt();
if (middles[id] == nullptr) {
middles[id].reset(new Tensor(4));
}
auto des = TensorUtils::getDescribe(middles[id].get());
des->memoryType = MNN::Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region region;
int origin = obj["origin"].GetInt();
region.origin = inputs[origin].get();
auto size = obj["size"].GetArray();
auto src = obj["src"].GetObject();
auto dst = obj["dst"].GetObject();
auto srcStride = src["stride"].GetArray();
auto dstStride = dst["stride"].GetArray();
for (int i = 0; i < 3; i++) {
region.size[i] = size[i].GetInt();
region.src.stride[i] = srcStride[i].GetInt();
region.dst.stride[i] = dstStride[i].GetInt();
}
region.src.offset = src["offset"].GetInt();
region.dst.offset = dst["offset"].GetInt();
des->regions.push_back(region);
}
}
// build execution of Raster and run them
for (int i = 0; i < outputs.size(); i++) {
std::vector<Tensor*> ins = {middles[i].get()}, outs = {outputs[i].get()};
std::unique_ptr<Execution> exe(backend->onCreate(ins, outs, op));
exe->onResize(ins, outs);
auto t1 = std::chrono::high_resolution_clock::now();
for (int j = 0; j < runNum; j++) {
exe->onExecute(ins, outs);
}
auto t2 = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
double time = time_span.count() * 1000.0 / runNum;
printf("For output_id = %d, run %d times, the average time is %f ms.\n", i, runNum, time);
}
auto writeTensors = [&document](std::vector<std::unique_ptr<Tensor>>& tensors, const char* type) {
auto info = document[type].GetArray();
for (auto iter = info.begin(); iter != info.end(); iter++) {
auto obj = iter->GetObject();
int id = obj["id"].GetInt();
auto data = obj["data"].GetArray();
if (data.Size() == tensors[id]->elementSize()) {
// has data, dont write
return;
}
bool isFloat = !strcmp(obj["type"].GetString(), "float");
data.Reserve(tensors[id]->elementSize(), document.GetAllocator());
for (int i = 0; i < tensors[id]->elementSize(); i++) {
if (isFloat) {
data.PushBack(tensors[id]->host<float>()[i], document.GetAllocator());
} else {
data.PushBack(tensors[id]->host<int>()[i], document.GetAllocator());
}
}
}
};
writeTensors(inputs, "inputs");
writeTensors(outputs, "outputs");
rapidjson::StringBuffer stringBuffer;
rapidjson::Writer<rapidjson::StringBuffer> writer(stringBuffer);
document.Accept(writer);
return stringBuffer.GetString();
}
int main(int argc, const char* argv[]) {
if (argc < 2) {
printf("Usage: ./rasterDemo.out input.json [output.json] [runNum]\ndefault output is input, and default runNum is 100.\n");
return 0;
}
const char* inputFile = argv[1];
const char* outputFile = argv[1];
int runNum = 100;
if (argc >= 3) {
outputFile = argv[2];
}
if (argc >= 4) {
runNum = ::atoi(argv[3]);
}
std::ifstream in(inputFile);
if (in.fail()) {
printf("Invalid input Json File!\n");
return 0;
}
std::ofstream out(outputFile);
if (out.fail()) {
printf("Invalid output Json File!\n");
return 0;
}
std::stringstream ss;
ss << in.rdbuf();
out << runRaster(ss.str(), runNum);
out.close();
printf("Run Raster Done!\n");
return 0;
}

View File

@ -0,0 +1,60 @@
#include <MNN/expr/Module.hpp>
#define MNN_OPEN_TIME_TRACE
#include <MNN/AutoTime.hpp>
#include <MNN/expr/ExprCreator.hpp>
#include <MNN/expr/Executor.hpp>
#include <fstream>
#include <sstream>
#include <stdio.h>
#include<string.h>
using namespace MNN::Express;
using namespace MNN;
using namespace std;
int main(int argc, const char* argv[]) {
if (argc < 2) {
MNN_ERROR("Don't has model name\n");
return 0;
}
BackendConfig config;
//Executor::getGlobalExecutor()->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 4);
auto modelName = argv[1];
std::shared_ptr<Module> model;
model.reset(Module::load({"NmtModel/Placeholder", "NmtModel/Placeholder_1"}, {"NmtModel/transpose_2"}, modelName));
std::vector<int> input0 = {32,16,234,3215,61,135,29,10,24317,4661,4,0};
std::vector<int> input1 = {1,1,1,1,1,1,1,1,1,1,1,1};
auto first = _Input({1, (int)input0.size()}, NHWC, halide_type_of<int>());
::memcpy(first->writeMap<int>(), input0.data(), input0.size() * sizeof(int));
auto second = _Input({1, (int)input1.size()}, NHWC, halide_type_of<int>());
::memcpy(second->writeMap<int>(), input1.data(), input1.size() * sizeof(int));
std::vector<VARP> outputs;
for (int i = 0; i < 2; ++i) {
{
AUTOTIME;
Executor::getGlobalExecutor()->resetProfile();
outputs = model->onForward({first, second});
Executor::getGlobalExecutor()->dumpProfile();
}
std::ostringstream fileNameOs;
std::ostringstream dimInfo;
fileNameOs << i << "_output.txt";
auto info = outputs[0]->getInfo();
for (int d=0; d<info->dim.size(); ++d) {
dimInfo << info->dim[d] << "_";
}
auto fileName = fileNameOs.str();
MNN_PRINT("Output Name: %s, Dim: %s\n", fileName.c_str(), dimInfo.str().c_str());
auto ptr = outputs[0]->readMap<int>();
std::ofstream outputOs(fileName.c_str());
for (int i=0; i<info->size; ++i) {
outputOs << ptr[i] << "\n";
}
}
for (int i = 0; i < 10; ++i) {
AUTOTIME;
outputs = model->onForward({first, second});
}
return 0;
}

View File

@ -53,27 +53,23 @@ static int CompareElements(const LabeledElement *a, const LabeledElement *b) {
if (!_net || !_session) {
return nil;
}
MNN::Tensor *output = _net->getSessionOutput(_session, nullptr);
MNN::Tensor copy(output);
auto input = _net->getSessionInput(_session, nullptr);
MNN::Tensor tensorCache(input);
input->copyToHostTensor(&tensorCache);
// run
NSTimeInterval begin = NSDate.timeIntervalSinceReferenceDate;
// you should set input data for each inference
if (cycles == 1) {
for (int i = 0; i < cycles; i++) {
input->copyFromHostTensor(&tensorCache);
_net->runSession(_session);
} else {
auto input = _net->getSessionInput(_session, nullptr);
MNN::Tensor tensorCache(input);
input->copyToHostTensor(&tensorCache);
for (int i = 0; i < cycles; i++) {
input->copyFromHostTensor(&tensorCache);
_net->runSession(_session);
}
output->copyToHostTensor(&copy);
}
NSTimeInterval cost = NSDate.timeIntervalSinceReferenceDate - begin;
// result
MNN::Tensor *output = _net->getSessionOutput(_session, nullptr);
MNN::Tensor copy(output);
output->copyToHostTensor(&copy);
float *data = copy.host<float>();
LabeledElement objects[1000];
for (int i = 0; i < 1000; i++) {

View File

@ -1,14 +1,21 @@
file(GLOB_RECURSE MNN_EXPR_SRCS "${CMAKE_CURRENT_LIST_DIR}/*.cpp")
file(GLOB_RECURSE MNN_EXPR_SRCS "${CMAKE_CURRENT_LIST_DIR}/*.*")
option(MNN_EXPR_ENABLE_PROFILER "Support profile Expr's op cost" OFF)
option(MNN_EXPR_SHAPE_EAGER "Force compute Expr's shape directly cost" OFF)
IF (MNN_EXPR_ENABLE_PROFILER)
add_definitions(-DMNN_EXPR_ENABLE_PROFILER)
ENDIF()
IF (MNN_EXPR_SHAPE_EAGER)
add_definitions(-DMNN_EXPR_SHAPE_EAGER)
ENDIF()
IF(MNN_SEP_BUILD)
if (MNN_BUILD_FOR_ANDROID_COMMAND)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../")
endif()
add_library(MNN_Express SHARED ${MNN_EXPR_SRCS})
target_link_libraries(MNN_Express MNN)
if (MNN_BUILD_MINI)
target_link_libraries(MNN_Express $<TARGET_OBJECTS:MNNTransform>)
endif()
ELSE()
add_library(MNNExpress OBJECT ${MNN_EXPR_SRCS})
ENDIF()

30
express/Distributions.cpp Normal file
View File

@ -0,0 +1,30 @@
//
// Distributions.cpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "Distributions.hpp"
#include <cmath>
namespace MNN {
namespace Express {
void Distributions::uniform(const int count, const float min, const float max, float *r, std::mt19937 gen) {
std::uniform_real_distribution<float> dis(min, std::nextafter(max, std::numeric_limits<float>::max()));
for (int i = 0; i < count; i++) {
r[i] = dis(gen);
}
}
void Distributions::gaussian(const int count, const float mu, const float sigma, float *r, std::mt19937 gen) {
std::normal_distribution<float> dis(mu, sigma);
for (int i = 0; i < count; i++) {
r[i] = dis(gen);
}
}
} // namespace Express
} // namespace MNN

27
express/Distributions.hpp Normal file
View File

@ -0,0 +1,27 @@
//
// Distributions.hpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef Distributions_hpp
#define Distributions_hpp
#include <MNN/MNNDefine.h>
#include <random>
namespace MNN {
namespace Express {
class Distributions {
public:
static void uniform(const int count, const float min, const float max, float* r, std::mt19937 gen);
static void gaussian(const int count, const float mu, const float sigma, float* r, std::mt19937 gen);
};
} // namespace Express
} // namespace MNN
#endif // Distritutions_hpp

File diff suppressed because it is too large Load Diff

45
express/ExecutorScope.cpp Normal file
View File

@ -0,0 +1,45 @@
//
// ExecutorScope.cpp
// MNN
//
// Created by MNN on 2020/10/26.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <thread>
#include <MNN/expr/Executor.hpp>
#include <MNN/expr/Scope.hpp>
#include <MNN/expr/ExecutorScope.hpp>
namespace MNN {
namespace Express {
typedef std::shared_ptr<Express::Executor> ExecutorRef;
#if !defined(__APPLE__)
thread_local static Scope<ExecutorRef> g_executor_scope;
#else
static Scope<ExecutorRef> g_executor_scope;
#endif
ExecutorScope::ExecutorScope(const std::shared_ptr<Executor>& current) {
g_executor_scope.EnterScope(current);
}
ExecutorScope::ExecutorScope(const std::string& scope_name,
const std::shared_ptr<Executor>& current) {
g_executor_scope.EnterScope(scope_name, current);
}
ExecutorScope::~ExecutorScope() {
g_executor_scope.ExitScope();
}
const std::shared_ptr<Executor> ExecutorScope::Current() {
if (g_executor_scope.ScopedLevel() > 0) {
return g_executor_scope.Current().content;
}
return Executor::getGlobalExecutor();
}
} // namespace Express
} // namespace MNN

View File

@ -8,23 +8,33 @@
#define FLATBUFFERS_PREFER_PRINTF
#include <MNN/expr/Expr.hpp>
#include <MNN/expr/Executor.hpp>
#include <MNN/expr/ExprCreator.hpp>
#include <map>
#include "core/MNNMemoryUtils.h"
#include "Utils.hpp"
#include <map>
#include "core/FileLoader.hpp"
#include <MNN/expr/Executor.hpp>
#include "core/TensorUtils.hpp"
#include "MNN_generated.h"
//#define MNN_OPEN_TIME_TRACE
#include "MNN/AutoTime.hpp"
#include "MNN/expr/ExecutorScope.hpp"
//#define MNN_EXPRESS_ERROR_REPORT
static inline std::string numberToString(int index) {
char s[10];
snprintf(s, 10, "%d", index);
return std::string(s);
}
static bool HasUnknownDim(const std::vector<int>& dims) {
for (const int& dim : dims) {
if (dim < 0) {
return true;
}
}
return false;
}
namespace MNN {
namespace Express {
void Variable::Info::syncSize() {
@ -87,8 +97,7 @@ bool VARP::fix(VARP::InputType type) const {
}
Expr::Expr(int outputSize) {
mInside.reset(new Inside);
mInside->mOutputInfos.resize(outputSize);
mInside.reset(new Inside(outputSize));
mOutputNames.resize(outputSize);
}
@ -117,27 +126,46 @@ void Expr::_addLinkForInputs(EXPRP expr) {
}
}
}
EXPRP Expr::create(Variable::Info&& info) {
EXPRP Expr::create(Variable::Info&& info, const void* ptr, VARP::InputType type, bool copy) {
EXPRP expr(new Expr(1));
expr->mOp = nullptr;
auto originPtr = info.ptr;
auto originPtr = ptr;
expr->mInside->mOutputInfos[0] = std::move(info);
auto& dstInfo = expr->mInside->mOutputInfos[0];
expr->mInside->mInfoDirty = false;
dstInfo.syncSize();
if (dstInfo.size > 0) {
expr->mExtraBuffer.reset(new char[dstInfo.size * dstInfo.type.bytes()], std::default_delete<char[]>());
expr->mInside->mOutputInfos[0].ptr = expr->mExtraBuffer.get();
expr->mInside->mInfoDirty = false;
Utils::copyInfoToTensor(expr->mInside->mOutputTensors[0], expr->mInside->mOutputInfos.data());
expr->mType = type;
if (type == VARP::CONSTANT) {
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::CONSTANT;
} else if (type == VARP::INPUT) {
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::INPUT;
} else {
expr->mInside->mOutputInfos[0].ptr = nullptr;
expr->mInside->mInfoDirty = true;
// VARP::TRAINABLE
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::TRAINABLE;
}
if (dstInfo.size > 0 && copy) {
auto res = Utils::allocMemoryForHostTensor(expr->mInside->mOutputTensors[0]);
if (!res) {
MNN_ASSERT(false);
return nullptr;
}
} else {
expr->mInside->mOutputTensors[0]->buffer().host = nullptr;
}
if (nullptr == originPtr) {
expr->mType = VARP::INPUT;
if (type == VARP::INPUT && dstInfo.size > 0) {
expr->mInside->mContentDirty = true;
}
return expr;
}
expr->mType = VARP::CONSTANT;
::memcpy(expr->mInside->mOutputInfos[0].ptr, originPtr, dstInfo.size * dstInfo.type.bytes());
expr->mInside->mContentDirty = false;
if (copy) {
::memcpy(expr->mInside->mOutputTensors[0]->buffer().host, originPtr, dstInfo.size * dstInfo.type.bytes());
} else {
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->memoryType = Tensor::InsideDescribe::MEMORY_OUTSIDE;
expr->mInside->mOutputTensors[0]->buffer().host = (uint8_t*)originPtr;
}
return expr;
}
EXPRP Expr::create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize) {
@ -147,8 +175,7 @@ EXPRP Expr::create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP
expr->mOp = flatbuffers::GetMutableRoot<Op>(extra.first.get());
expr->mOpBufferSize = extra.second;
expr->mInputs = std::move(inputs);
expr->mInside->mInputInfos.resize(expr->mInputs.size());
expr->mInside->mReq = Executor::getGlobalExecutor()->getRequirement(expr.get());
expr->mInside->mReq = ExecutorScope::Current()->getRequirement(expr.get());
_addLinkForInputs(expr);
return expr;
}
@ -161,34 +188,34 @@ EXPRP Expr::create(const OpT* op, std::vector<VARP> inputs, int outputSize) {
info.dim[0] = 1;
}
info.order = Utils::revertFormat(op->main.AsInput()->dformat);
info.ptr = nullptr;
info.type = Utils::revertDataType(op->main.AsInput()->dtype);
return create(std::move(info));
return create(std::move(info), nullptr, VARP::INPUT);
}
if (OpType_Const == op->type || OpType_TrainableParam == op->type) {
Variable::Info info;
info.dim = op->main.AsBlob()->dims;
info.order = Utils::revertFormat(op->main.AsBlob()->dataFormat);
info.ptr = nullptr;
void* ptr = nullptr;
info.type = Utils::revertDataType(op->main.AsBlob()->dataType);
switch (op->main.AsBlob()->dataType) {
case DataType_DT_INT8:
info.ptr = (void*)op->main.AsBlob()->int8s.data();
ptr = (void*)op->main.AsBlob()->int8s.data();
break;
case DataType_DT_INT32:
info.ptr = (void*)op->main.AsBlob()->int32s.data();
ptr = (void*)op->main.AsBlob()->int32s.data();
break;
case DataType_DT_UINT8:
info.ptr = (void*)op->main.AsBlob()->uint8s.data();
ptr = (void*)op->main.AsBlob()->uint8s.data();
break;
case DataType_DT_FLOAT:
info.ptr = (void*)op->main.AsBlob()->float32s.data();
ptr = (void*)op->main.AsBlob()->float32s.data();
break;
default:
break;
}
auto expr = create(std::move(info));
if (OpType_TrainableParam == op->type) {
//MNN_ASSERT(nullptr != ptr);
auto expr = create(std::move(info), ptr, VARP::CONSTANT);
if (OpType_TrainableParam == op->type && nullptr != ptr) {
expr->mType = VARP::TRAINABLE;
}
return expr;
@ -213,7 +240,7 @@ bool Expr::requireInfo() {
return false;
}
if (nullptr == mOp) {
return mInside->mOutputInfos[0].size > 0;
return !HasUnknownDim(mInside->mOutputInfos[0].dim);
}
bool ready = true;
for (int i = 0; i < mInputs.size(); ++i) {
@ -221,8 +248,8 @@ bool Expr::requireInfo() {
// The Variable is set nullptr by api
return false;
}
mInside->mInputInfos[i] = mInputs[i]->getInfo();
if (nullptr == mInside->mInputInfos[i] && (!mInside->mReq.supportError[i])) {
auto inputInfo = mInputs[i]->getInfo();
if (nullptr == inputInfo) {
#ifdef MNN_EXPRESS_ERROR_REPORT
MNN_ERROR("%s, %d input not ready\n", mName.c_str(), i);
#endif
@ -233,15 +260,19 @@ bool Expr::requireInfo() {
for (int i = 0; i < mInputs.size(); ++i) {
auto& v = mInputs[i];
if (mInside->mReq.shapeNeedContent[i]) {
// `readInternal` maybe return nullptr if element count is 0.
v->readInternal(true);
// For shape need content, the content must not be nullptr
auto ptr = v->readInternal(true);
if (nullptr == ptr) {
ready = false;
break;
}
}
}
if (!ready) {
return false;
}
//MNN_PRINT("Info %s, %p Start\n", mName.c_str(), this);
auto res = Executor::getGlobalExecutor()->computeInfo(this);
auto res = ExecutorScope::Current()->computeInfo(this);
//MNN_PRINT("Info Compute %s\n", mName.c_str());
if (NO_ERROR == res) {
@ -261,6 +292,14 @@ const std::vector<WeakEXPRP>& Variable::toExprs() const {
VARP Variable::create(EXPRP expr, int index) {
VARP res(new Variable(expr, index));
#ifdef MNN_EXPR_SHAPE_EAGER
auto info = expr->requireInfo();
if (!info) {
#ifdef MNN_EXPRESS_ERROR_REPORT
MNN_ERROR("Can't compute shape\n");
#endif
}
#endif
return res;
}
void Expr::replace(EXPRP old, EXPRP from) {
@ -307,16 +346,22 @@ void Expr::replace(EXPRP old, EXPRP from) {
old->mValid = from->mValid;
old->mInside = from->mInside;
old->mInputs = from->mInputs;
std::vector<Expr*> visited;
old->visitOutputs([&](EXPRP expr, int index) {
if (expr->mInside->mInfoDirty && expr->mValid && !expr->mInside->mLinkCache) {
if (expr->visited()) {
return false;
}
visited.emplace_back(expr.get());
expr->setVisited(true);
expr->mInside->mCache.reset();
expr->mInside->mCacheOffset = 0;
expr->mValid = true;
expr->mInside->mInfoDirty = true;
return true;
});
for (auto e : visited) {
e->setVisited(false);
}
}
void Variable::setName(const std::string& name) {
@ -351,7 +396,7 @@ bool Variable::input(VARP src) {
info = tempInfo.get();
}
auto dstInfo = getInfo();
bool needChange = nullptr == dstInfo || info->order != dstInfo->order || info->dim.size() != dstInfo->dim.size();
bool needChange = nullptr == dstInfo || info->order != dstInfo->order || info->dim.size() != dstInfo->dim.size() || info->type != dstInfo->type;
if (!needChange) {
for (int i=0; i<info->dim.size(); ++i) {
if (dstInfo->dim[i] != info->dim[i]) {
@ -362,22 +407,19 @@ bool Variable::input(VARP src) {
}
if (!mFrom->mInside->mCache) {
Executor::getGlobalExecutor()->makeCache({mFrom}, false);
ExecutorScope::Current()->makeCache({mFrom}, false);
}
if (needChange) {
bool needAlloc = info->size * info->type.bytes() > mFrom->mInside->mOutputInfos[0].size * mFrom->mInside->mOutputInfos[0].type.bytes();
mFrom->mInside->mOutputInfos[0] = *info;
if (needAlloc) {
mFrom->mExtraBuffer.reset(new char[info->size * info->type.bytes()], std::default_delete<char[]>());
}
mFrom->mInside->mOutputInfos[0].ptr = mFrom->mExtraBuffer.get();
mFrom->mInside->mCache->setShapeDirty(0, mFrom->outputInfo(0));
Utils::releaseMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
Utils::copyInfoToTensor(mFrom->inside()->mOutputTensors[0], mFrom->inside()->mOutputInfos.data());
Utils::allocMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
}
if (info->size) {
auto dstPtr = writeInternal(false);
auto srcPtr = src->readMap<void>();
if (nullptr == dstPtr || nullptr == srcPtr) {
MNN_ERROR("Alloc memory error or compute src error in Variable::Input\n");
//MNN_ERROR("Alloc memory error or compute src error in Variable::Input\n");
return false;
}
::memcpy(dstPtr, srcPtr, info->size * info->type.bytes());
@ -387,7 +429,7 @@ bool Variable::input(VARP src) {
} else {
informDirty();
}
mFrom->mInside->mCache->setContentReady();
mFrom->mInside->mContentDirty = false;
return true;
}
@ -396,23 +438,44 @@ void Variable::replace(VARP dst, VARP src) {
dst->setExpr(nullptr, 0);
return;
}
if (nullptr == dst) {
dst.mContent = src.mContent;
return;
}
if (src->mFrom.get() == dst->mFrom.get()) {
dst->mFromIndex = src->mFromIndex;
return;
}
if (src->mFrom->outputSize() != dst->mFrom->outputSize()) {
// Can't replace Expr, Just replace VARP
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
src->mFrom->mTo.emplace_back(expr);
return false;
});
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
std::vector<Expr*> visited;
dst->mFrom->visitOutputs([src, dst, &visited](EXPRP expr, int index) {
if (expr->visited()) {
return false;
}
expr->setVisited(true);
visited.emplace_back(expr.get());
expr->mInside->mCache.reset();
expr->mInside->mCacheOffset = 0;
expr->mValid = true;
expr->mInside->mInfoDirty = true;
expr->mInside->mContentDirty = true;
return true;
});
for (auto v : visited) {
v->setVisited(false);
}
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
for (int i =0; i< expr->inputs().size(); ++i) {
auto input = expr->inputs()[i];
if (input == dst) {
expr->mInputs[i] = src;
}
}
src->mFrom->mTo.emplace_back(expr);
return false;
});
dst->mFrom = src->mFrom;
dst->mFromIndex = src->mFromIndex;
return;
@ -452,15 +515,19 @@ bool Variable::resize(INTS dims) {
}
info.dim = dims;
info.syncSize();
mFrom->mExtraBuffer.reset(new char[info.size * info.type.bytes()], std::default_delete<char[]>());
info.ptr = mFrom->mExtraBuffer.get();
mFrom->mValid = true;
mFrom->mInside->mInputInfos.clear();
auto cache = mFrom->mInside->mCache;
if (nullptr != cache) {
cache->setShapeDirty(0, mFrom->outputInfo(0));
Utils::copyInfoToTensor(mFrom->inside()->mOutputTensors[0], mFrom->inside()->mOutputInfos.data());
Utils::releaseMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
if (0 >= info.size) {
return false;
}
bool res = Utils::allocMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
if (!res) {
return false;
}
mFrom->mValid = true;
mFrom->inside()->mInfoDirty = false;
mFrom->inside()->mContentDirty = true;
mFrom->visitOutputs([](EXPRP expr, int index) { return expr->setInfoDirty(); });
return true;
}
@ -478,11 +545,12 @@ void Expr::visit(EXPRP expr, const std::function<bool(EXPRP)>& before, const std
void* Variable::readInternal(bool forShape) {
if (nullptr == mFrom->get()) {
if (VARP::INPUT == mFrom->mType) {
if (nullptr == mFrom->mInside->mCache) {
if (mFrom->mInside->mContentDirty) {
return nullptr;
}
}
return mFrom->outputInfo(mFromIndex)->ptr;
//MNN_ASSERT(nullptr != mFrom->inside()->mOutputTensors[0]->buffer().host);
return mFrom->inside()->mOutputTensors[0]->buffer().host;
}
auto res = mFrom->requireInfo();
if (false == res) {
@ -490,21 +558,26 @@ void* Variable::readInternal(bool forShape) {
}
auto cache = mFrom->inside()->mCache;
if (nullptr == cache) {
Executor::getGlobalExecutor()->makeCache({mFrom}, forShape);
ExecutorScope::Current()->makeCache({mFrom}, forShape);
cache = mFrom->inside()->mCache;
}
if (nullptr == cache) {
return nullptr;
}
if (NO_ERROR != Executor::getGlobalExecutor()->runCache(cache)) {
if (NO_ERROR != ExecutorScope::Current()->runCache(cache)) {
return nullptr;
}
cache->syncOutput(mFrom->mInside->mCacheOffset + mFromIndex, mFrom->outputInfo(mFromIndex));
return mFrom->outputInfo(mFromIndex)->ptr;
return Executor::mapOutput(cache.get(), mFrom->mInside->mCacheOffset + mFromIndex, mFrom->mInside->mOutputTensors[mFromIndex]);
}
void Variable::informDirty() {
mFrom->visitOutputs([](EXPRP expr, int index) {
std::vector<Expr*> visited;
mFrom->visitOutputs([&visited](EXPRP expr, int index) {
if (expr->visited()) {
return false;
}
visited.emplace_back(expr.get());
expr->setVisited(true);
if (expr->inside()->mReq.shapeNeedContent.empty()) {
// Not init
return false;
@ -514,28 +587,32 @@ void Variable::informDirty() {
expr->visitOutputs([](EXPRP e, int index) { return e->setInfoDirty(); });
return false;
}
if (expr->inside()->mContentDirty) {
return false;
}
expr->inside()->mContentDirty = true;
if (expr->inside()->mReq.contentNeedContent[index]) {
if (expr->inside()->mCache != nullptr) {
expr->inside()->mCache->setContentDirty();
Executor::setContentDirty(expr->inside()->mCache.get());
}
return true;
}
return false;
});
for (auto e : visited) {
e->setVisited(false);
}
}
void Variable::prepareCompute(const std::vector<VARP>& vars, bool forceCpu) {
std::vector<EXPRP> exprs;
for (auto v : vars) {
if (v->expr().first->inside()->mCache == nullptr) {
if (!v->expr().first->visited()) {
v->expr().first->inside()->mCache = nullptr;
v->expr().first->requireInfo();
v->expr().first->setVisited(true);
exprs.emplace_back(v->expr().first);
}
}
Executor::getGlobalExecutor()->makeCache(std::move(exprs), forceCpu);
for (auto v : vars) {
v->expr().first->setVisited(false);
}
ExecutorScope::Current()->makeCache(std::move(exprs), forceCpu);
}
void* Variable::writeInternal(bool inform) {
@ -545,16 +622,8 @@ void* Variable::writeInternal(bool inform) {
if (inform) {
informDirty();
}
auto cache = mFrom->mInside->mCache;
if (nullptr == cache) {
Executor::getGlobalExecutor()->makeCache({mFrom});
cache = mFrom->mInside->mCache;
}
if (nullptr == cache) {
return nullptr;
}
mFrom->mInside->mCache->setContentReady();
return mFrom->mInside->mOutputInfos[0].ptr;
mFrom->mInside->mContentDirty = false;
return mFrom->inside()->mOutputTensors[0]->host<void>();
}
void Variable::unMap() {
@ -591,25 +660,30 @@ bool Expr::setInfoDirty() {
mInside->mContentDirty = true;
mValid = true;
if (mInside->mCache != nullptr) {
mInside->mCache->setShapeDirty(0, nullptr);
Executor::setShapeDirty(mInside->mCache.get());
}
for (auto o : mInside->mOutputTensors) {
Utils::releaseMemoryForHostTensor(o);
}
return true;
}
std::vector<VARP> Variable::load(const char* fileName) {
FileLoader loader(fileName);
if (!loader.valid()) {
MNN_ERROR("Error for open %s\n", fileName);
return {};
}
loader.read();
if (!loader.valid()) {
return {};
}
AutoStorage<uint8_t> buffer;
loader.merge(buffer);
if (buffer.get() == nullptr) {
return {};
{
FileLoader loader(fileName);
if (!loader.valid()) {
MNN_ERROR("Error for open %s\n", fileName);
return {};
}
loader.read();
if (!loader.valid()) {
return {};
}
loader.merge(buffer);
if (buffer.get() == nullptr) {
return {};
}
}
return load(buffer.get(), buffer.size());
}
@ -722,6 +796,7 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
} else {
MNN_ASSERT(1 == expr->outputSize());
auto& info = expr->mInside->mOutputInfos[0];
auto ptr = expr->mInside->mOutputTensors[0]->host<void>();
op.reset(new OpT);
if (expr->mType != VARP::INPUT) {
auto blob = new BlobT;
@ -730,16 +805,20 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
if (info.type.code == halide_type_float) {
blob->dataType = DataType_DT_FLOAT;
blob->float32s.resize(info.size);
::memcpy(blob->float32s.data(), info.ptr, info.size * sizeof(float));
} else if (info.type.code == halide_type_int) {
::memcpy(blob->float32s.data(), ptr, info.size * sizeof(float));
} else if (info.type.code == halide_type_int && info.type.bits == 32) {
blob->dataType = DataType_DT_INT32;
blob->int32s.resize(info.size);
::memcpy(blob->int32s.data(), info.ptr, info.size * sizeof(int));
}
else if (info.type.code == halide_type_uint && info.type.bits == 8) {
::memcpy(blob->int32s.data(), ptr, info.size * sizeof(int));
} else if (info.type.code == halide_type_int && info.type.bits == 8) {
blob->dataType = DataType_DT_INT8;
blob->int8s.resize(info.size);
auto pptr = (int8_t *)ptr;
::memcpy(blob->int8s.data(), ptr, info.size * sizeof(int8_t));
} else if (info.type.code == halide_type_uint && info.type.bits == 8) {
blob->dataType = DataType_DT_UINT8;
blob->uint8s.resize(info.size);
::memcpy(blob->uint8s.data(), info.ptr, info.size * sizeof(uint8_t));
::memcpy(blob->uint8s.data(), ptr, info.size * sizeof(uint8_t));
}
op->type = OpType_Const;
if (expr->mType == VARP::TRAINABLE) {
@ -781,12 +860,12 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
auto op = dest->oplists[index].get();
auto tensorIndexOffset = varIndexInfo[expr];
for (int v=0; v<expr->outputSize(); ++v) {
auto const tensorIndex = tensorIndexOffset + v;
if (dest->tensorName[tensorIndex].empty()) {
auto subindex = tensorIndexOffset + v;
if (dest->tensorName[subindex].empty()) {
if (v == 0) {
dest->tensorName[tensorIndex] = op->name;
dest->tensorName[subindex] = op->name;
} else {
dest->tensorName[tensorIndex] = op->name + numberToString(v);
dest->tensorName[subindex] = op->name + numberToString(v);
}
}
}

210
express/Initializer.cpp Normal file
View File

@ -0,0 +1,210 @@
//
// Initializer.cpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "Initializer.hpp"
#include <MNN/expr/ExprCreator.hpp>
#include <cmath>
#include <vector>
#include "Distributions.hpp"
#include "RandomGenerator.hpp"
namespace MNN {
namespace Express {
Express::VARP Initializer::createConstVar(Express::INTS dim, Express::Dimensionformat format) {
auto res = Express::_Input(dim, format, halide_type_of<float>());
this->onExecute(res);
res.fix(Express::VARP::CONSTANT);
return res;
}
class ConstantInitializer : public Initializer {
public:
ConstantInitializer(float value) : mConstant(value) {
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
auto ptr = p->writeMap<float>();
for (int i = 0; i < count; i++) {
ptr[i] = mConstant;
}
}
private:
float mConstant;
};
Initializer* Initializer::constValue(float value) {
return new ConstantInitializer(value);
}
class UniformInitializer : public Initializer {
public:
UniformInitializer(float min = 0, float max = 1) {
mMin = min;
mMax = max;
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
Distributions::uniform(count, mMin, mMax, p->writeMap<float>(), RandomGenerator::generator());
}
private:
float mMin;
float mMax;
};
Initializer* Initializer::uniform(float minValue, float maxValue) {
return new UniformInitializer(minValue, maxValue);
}
class XavierInitializer : public Initializer {
public:
XavierInitializer(VarianceNorm norm = FANIN) {
mNorm = norm;
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
const std::vector<int> dims = p->getInfo()->dim;
// referenced from Caffe
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
int fanIn = count / dims[0];
int fanOut = dims.size() > 1 ? count / dims[1] : count;
float n = fanIn; // default: FANIN
if (mNorm == VarianceNorm::AVERAGE) {
n = (fanIn + fanOut) / 2.0f;
} else if (mNorm == VarianceNorm::FANOUT) {
n = fanOut;
}
float scale = sqrtf(3.0f / n);
Distributions::uniform(count, -scale, scale, p->writeMap<float>(), RandomGenerator::generator());
}
private:
VarianceNorm mNorm;
};
Initializer* Initializer::xavier(VarianceNorm norm) {
return new XavierInitializer(norm);
}
class GaussianInitializer : public Initializer {
public:
GaussianInitializer(float mean = 0, float std = 1) {
mMean = mean;
mStd = std;
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
Distributions::gaussian(count, mMean, mStd, p->writeMap<float>(), RandomGenerator::generator());
}
private:
float mMean;
float mStd;
};
Initializer* Initializer::gauss(float mean, float std) {
return new GaussianInitializer(mean, std);
}
class MSRAInitializer : public Initializer {
public:
MSRAInitializer(VarianceNorm norm = FANIN) {
mNorm = norm;
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
const std::vector<int> dims = p->getInfo()->dim;
// referenced from Caffe
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
int fanIn = count / dims[0];
int fanOut = dims.size() > 1 ? count / dims[1] : count;
float n = fanIn; // default: FANIN
if (mNorm == VarianceNorm::AVERAGE) {
n = (fanIn + fanOut) / 2.0f;
} else if (mNorm == VarianceNorm::FANOUT) {
n = fanOut;
}
float std = sqrtf(2.0f / n);
Distributions::gaussian(count, 0.0f, std, p->writeMap<float>(), RandomGenerator::generator());
}
private:
VarianceNorm mNorm;
};
Initializer* Initializer::MSRA(VarianceNorm norm) {
return new MSRAInitializer(norm);
}
class BilinearInitializer : public Initializer {
public:
BilinearInitializer() = default;
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
const std::vector<int> dims = p->getInfo()->dim;
MNN_ASSERT(dims.size() == 4);
MNN_ASSERT(dims[2] == dims[3]); // NCHW, H == W
// referenced from Caffe
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
int f = ceilf(dims[3] / 2.0f);
float c = (dims[3] - 1) / (2.0f * f);
auto ptr = p->writeMap<float>();
for (int i = 0; i < count; i++) {
float x = i % dims[3];
float y = (i / dims[3]) % dims[2];
ptr[i] = (1 - std::fabs(x / f - c)) * (1 - std::fabs(y / f - c));
}
}
};
Initializer* Initializer::bilinear() {
return new BilinearInitializer();
}
class PositiveUnitball : public Initializer {
public:
PositiveUnitball() = default;
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
const std::vector<int> dims = p->getInfo()->dim;
auto ptr = p->writeMap<float>();
Distributions::uniform(count, 0, 1, ptr, RandomGenerator::generator());
int dim = count / dims[0];
for (int i = 0; i < dims[0]; i++) {
float sum = 0;
for (int j = 0; j < dim; j++) {
sum += ptr[i * dim + j];
}
for (int j = 0; j < dim; j++) {
ptr[i * dim + j] = ptr[i * dim + j] / sum;
}
}
}
};
Initializer* Initializer::positiveUnitball() {
return new PositiveUnitball();
}
} // namespace Express
} // namespace MNN

43
express/Initializer.hpp Normal file
View File

@ -0,0 +1,43 @@
//
// Initializer.hpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef Initializer_hpp
#define Initializer_hpp
#include <MNN/expr/Expr.hpp>
namespace MNN {
namespace Express {
class RandomGenerator;
class MNN_PUBLIC Initializer {
public:
Initializer() = default;
virtual ~Initializer() = default;
Express::VARP createConstVar(Express::INTS dim, Express::Dimensionformat format = Express::NCHW);
virtual void onExecute(Express::VARP p) = 0;
static Initializer* constValue(float value);
static Initializer* uniform(float minValue = 0.0f, float maxValue = 1.0f);
enum VarianceNorm {
FANIN,
FANOUT,
AVERAGE,
};
static Initializer* xavier(VarianceNorm norm = FANIN);
static Initializer* gauss(float mean = 0.0f, float std = 1.0f);
static Initializer* MSRA(VarianceNorm norm = FANIN);
static Initializer* bilinear();
static Initializer* positiveUnitball();
};
} // namespace Express
} // namespace MNN
#endif // Initializer_hpp

View File

@ -30,7 +30,18 @@ static DataType _convertDataType(halide_type_t type) {
}
return DataType_DT_INVALID;
}
static VARP _checkNC4HW4(VARP x) {
#ifdef MNN_EXPR_SHAPE_EAGER
auto info = x->getInfo();
if (nullptr != info && info->order == NC4HW4) {
return _Convert(x, NCHW);
}
#endif
return x;
}
static VARP _Binary(VARP x, VARP y, BinaryOpOperation operation) {
x = _checkNC4HW4(x);
y = _checkNC4HW4(y);
std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_BinaryOp;
op->type = OpType_BinaryOp;
@ -49,6 +60,7 @@ static VARP _Unary(VARP x, UnaryOpOperation operation) {
return (Variable::create(Expr::create(op.get(), {x})));
}
static VARP _Reduce(VARP x, INTS dim, ReductionType type, bool keepDim) {
x = _checkNC4HW4(x);
std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_ReductionParam;
op->type = OpType_Reduction;
@ -60,6 +72,7 @@ static VARP _Reduce(VARP x, INTS dim, ReductionType type, bool keepDim) {
return (Variable::create(Expr::create(op.get(), {x})));
}
static VARP _ReduceMutable(VARP x, VARP dim, ReductionType type, bool keepDim) {
x = _checkNC4HW4(x);
std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_ReductionParam;
op->type = OpType_Reduction;
@ -955,6 +968,7 @@ Returns:
A variable of type int.
*/
VARP _ArgMax(VARP input, int axis) {
input = _checkNC4HW4(input);
std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_ArgMax;
op->type = OpType_ArgMax;
@ -976,6 +990,7 @@ Returns:
A variable of type int.
*/
VARP _ArgMin(VARP input, int axis) {
input = _checkNC4HW4(input);
std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_ArgMax;
op->type = OpType_ArgMin;

View File

@ -5,6 +5,7 @@
// Created by MNN on 2019/08/20.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef MergeOptimizer_hpp
#define MergeOptimizer_hpp

View File

@ -54,16 +54,14 @@ VARP _Input(INTS shape, Dimensionformat data_format, halide_type_t dtype) {
info.dim = std::move(shape);
info.order = data_format;
info.type = dtype;
info.ptr = nullptr;
return (Variable::create(Expr::create(std::move(info))));
return (Variable::create(Expr::create(std::move(info), nullptr, VARP::INPUT)));
}
VARP _Scalar(const void* ptr, halide_type_t type) {
Variable::Info info;
info.dim = {};
info.order = NHWC;
info.type = type;
info.ptr = (void*)ptr;
return (Variable::create(Expr::create(std::move(info))));
return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
}
/*create a constant variable.
Args:
@ -79,8 +77,7 @@ VARP _Const(const void* ptr, INTS shape, Dimensionformat format, halide_type_t t
info.dim = std::move(shape);
info.order = format;
info.type = type;
info.ptr = (void*)ptr;
return (Variable::create(Expr::create(std::move(info))));
return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
}
VARP _Const(float value, INTS shape, Dimensionformat format) {
@ -93,8 +90,8 @@ VARP _Const(float value, INTS shape, Dimensionformat format) {
for (int i = 0; i < info.size; ++i) {
values[i] = value;
}
info.ptr = (void*)values.data();
return (Variable::create(Expr::create(std::move(info))));
auto ptr = (void*)values.data();
return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
}
VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format, halide_type_t type) {
@ -107,6 +104,23 @@ VARP _TrainableParam(float value, INTS dims, Dimensionformat format) {
v.fix(VARP::TRAINABLE);
return v;
}
VARP _InnerProduct(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS outputShape) {
std::unique_ptr<OpT> ipOp(new OpT);
ipOp->type = OpType_InnerProduct;
ipOp->main.type = OpParameter_InnerProduct;
ipOp->main.value = new InnerProductT;
auto ipParam = ipOp->main.AsInnerProduct();
ipParam->outputCount = outputShape[1];
if(!bias.empty()) {
ipParam->biasTerm = 1;
}
ipParam->weightSize = weight.size();
ipParam->weight = std::move(weight);
ipParam->bias = std::move(bias);
return (Variable::create(Expr::create(ipOp.get(), {x})));
}
VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads) {
std::unique_ptr<OpT> convOp(new OpT);
@ -183,7 +197,7 @@ VARP _Conv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS
return (Variable::create(Expr::create(convOp.get(), {x})));
}
VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6) {
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6, int nbits) {
std::unique_ptr<OpT> convOp(new OpT);
convOp->type = OpType_Convolution;
if (channel[0] == channel[1] && channel[0] == group) {
@ -285,6 +299,42 @@ VARP _Deconv(VARP weight, VARP bias, VARP x, PaddingMode pad, INTS stride, INTS
return (Variable::create(Expr::create(std::move(convOp), {x, weight})));
}
VARP _Deconv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6) {
std::unique_ptr<OpT> convOp(new OpT);
convOp->type = OpType_Deconvolution;
if (channel[0] == channel[1] && channel[0] == group) {
convOp->type = OpType_DeconvolutionDepthwise;
}
convOp->main.type = OpParameter_Convolution2D;
convOp->main.value = new Convolution2DT;
auto conv2D = convOp->main.AsConvolution2D();
conv2D->common.reset(new Convolution2DCommonT);
conv2D->common->padMode = _convertPadMode(pad);
if (pads.size() == 2) {
conv2D->common->padX = pads[0];
conv2D->common->padY = pads[1];
} else {
conv2D->common->pads = std::move(pads);
}
conv2D->common->strideX = stride[0];
conv2D->common->strideY = stride[1];
conv2D->common->group = group;
conv2D->common->outputCount = channel[1];
conv2D->common->inputCount = channel[0];
conv2D->common->dilateX = dilate[0];
conv2D->common->dilateY = dilate[1];
conv2D->common->kernelX = kernelSize[0];
conv2D->common->kernelY = kernelSize[1];
conv2D->common->relu6 = relu6;
conv2D->common->relu = relu;
MNN_ASSERT(weight.size() == channel[1] * (channel[0] / group) * kernelSize[0] * kernelSize[1]);
conv2D->weight = std::move(weight);
MNN_ASSERT(bias.size() == channel[1]);
conv2D->bias = std::move(bias);
return (Variable::create(Expr::create(convOp.get(), {x})));
}
static VARP _Pool(VARP x, INTS kernel, INTS stride, PoolType type, PaddingMode pad, INTS pads) {
std::unique_ptr<OpT> pool(new OpT);
pool->type = OpType_Pooling;
@ -381,9 +431,13 @@ x: A variable.
Returns:
output: A variable with the same type as `x`.
*/
VARP _Relu6(VARP x) {
VARP _Relu6(VARP x, float minValue, float maxValue) {
std::unique_ptr<OpT> relu(new OpT);
relu->type = OpType_ReLU6;
relu->main.value = new Relu6T;
relu->main.type = OpParameter_Relu6;
relu->main.AsRelu6()->maxValue = maxValue;
relu->main.AsRelu6()->minValue = minValue;
return (Variable::create(Expr::create(relu.get(), {x})));
}
/*Given an input value x, it computes the output as x if x > 0 and slopes * x if x <= 0.
@ -746,9 +800,12 @@ input: A variable.
Returns:
A variable of Halide_Type_Int.
*/
VARP _Shape(VARP input) {
VARP _Shape(VARP input, bool nchw) {
std::unique_ptr<OpT> shape(new OpT);
shape->type = OpType_Shape;
if (nchw) {
shape->defaultDimentionFormat = MNN_DATA_FORMAT_NCHW;
}
return (Variable::create(Expr::create(std::move(shape), {input})));
}
/*Stacks a list of rank-R variables into one rank-(R+1) variable.
@ -906,6 +963,21 @@ VARP _Elu(VARP features, float alpha) {
op->main.value = eluParam;
return (Variable::create(Expr::create(std::move(op), {features})));
}
/*Given an input value x, it computes the output as 1.0 if x > threshold and 0.0 if x <= threshold.
features: A variable of type Halide_Type_Float
threshold: threshold value
Returns:
A variable. Has the same type as features.
*/
VARP _Threshold(VARP features, float threshold) {
std::unique_ptr<OpT> op(new OpT);
op->type = OpType_Threshold;
auto eluParam = new ELUT;
op->main.type = OpParameter_ELU;
eluParam->alpha = threshold;
op->main.value = eluParam;
return (Variable::create(Expr::create(std::move(op), {features})));
}
/*Computes the size of the variable
Args:
input: A variable of type Halide_Type_Float or Halide_Type_Int
@ -1049,7 +1121,6 @@ std::vector<VARP> _Moments(VARP x, INTS axis, VARP shift, bool keepDims) {
op->main.type = OpParameter_MomentsParam;
momentsParam->dim = axis;
momentsParam->keepDims = keepDims;
momentsParam->dType = (MNN::DataType)Utils::convertDataType(x->getInfo()->type);
op->main.value = momentsParam;
EXPRP expr = Expr::create(std::move(op), {x}, 2);
std::vector<VARP> res;
@ -1405,7 +1476,7 @@ VARP _ZeroGrad(VARP x) {
}
VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu) {
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, int nbits) {
std::unique_ptr<OpT> convOp(new OpT);
convOp->type = OpType_ConvInt8;
if (channel[0] == channel[1] && channel[0] == group) {
@ -1433,9 +1504,16 @@ VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<fl
conv2D->symmetricQuan->bias = std::move(bias);
conv2D->symmetricQuan->scale = std::move(scale);
conv2D->symmetricQuan->weight = std::move(weight);
conv2D->symmetricQuan->nbits = nbits;
return (Variable::create(Expr::create(convOp.get(), {x})));
}
VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim) {
std::unique_ptr<MNN::OpT> cosineSimilarityOp(new MNN::OpT);
cosineSimilarityOp->type = MNN::OpType_CosineSimilarity;
return (Variable::create(Expr::create(std::move(cosineSimilarityOp), {input0, input1, inputDim})));
}
VARP _FloatToInt8(VARP x, VARP scale, char minValue/*For future*/, char maxValue/*For future*/) {
auto xInfo = x->getInfo();
auto scaleInfo = scale->getInfo();

View File

@ -22,28 +22,7 @@ Optimizer::Parameters::~Parameters() {
}
}
std::shared_ptr<Optimizer> Optimizer::create(Config config) {
const int numThread = config.numThread;
auto forwardType = config.forwardType;
if (forwardType != MNN_FORWARD_ALL) {
if (MNNGetExtraBackendCreator(forwardType) == nullptr) {
return nullptr;
}
return std::shared_ptr<Optimizer>(new MergeOptimizer(config.forwardType, numThread, nullptr));
}
auto device = config.device;
if (CPU == device) {
return std::shared_ptr<Optimizer>(new MergeOptimizer(MNN_FORWARD_CPU, numThread, nullptr));
}
if (GPU == device) {
std::vector<MNNForwardType> types {MNN_FORWARD_METAL, MNN_FORWARD_OPENCL, MNN_FORWARD_VULKAN, MNN_FORWARD_OPENGL};
for (auto type : types) {
auto creator = MNNGetExtraBackendCreator(type);
if (nullptr != creator) {
return std::shared_ptr<Optimizer>(new MergeOptimizer(type, numThread, nullptr));
}
}
}
// Do nothing
return nullptr;
}

View File

@ -0,0 +1,45 @@
//
// RandomGenerator.hpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef RandomGenerator_hpp
#define RandomGenerator_hpp
#include <MNN/MNNDefine.h>
#include <random>
namespace MNN {
namespace Express {
class MNN_PUBLIC RandomGenerator {
private:
RandomGenerator(int seed = std::random_device()()) {
mSeed = seed;
mGenerator.seed(mSeed);
}
~RandomGenerator() = default;
RandomGenerator(RandomGenerator &);
RandomGenerator &operator=(const RandomGenerator &);
private:
int mSeed;
std::mt19937 mGenerator;
public:
static std::mt19937 &generator(int seed = std::random_device()()) {
static RandomGenerator rng(seed);
return rng.mGenerator;
}
};
} // namespace Express
} // namespace MNN
#endif // RandomGenerator_hpp

View File

@ -10,8 +10,24 @@
#include <map>
#include "MNN_generated.h"
#include "core/TensorUtils.hpp"
#include "core/MNNMemoryUtils.h"
namespace MNN {
namespace Express {
Expr::Inside::Inside(int outputSize) {
mOutputInfos.resize(outputSize);
mOutputTensors.resize(outputSize);
for (int i=0; i<outputSize; ++i) {
mOutputTensors[i] = new Tensor;
TensorUtils::getDescribe(mOutputTensors[i])->memoryType = Tensor::InsideDescribe::MEMORY_HOST;
}
}
Expr::Inside::~Inside() {
for (auto t : mOutputTensors) {
delete t;
}
}
#define CONVERT(src, dst, f)\
if (f == src) return dst;
@ -61,7 +77,6 @@ void Utils::copyInfoToTensor(Tensor* dest, const Variable::Info* source) {
}
dest->buffer().dimensions = (int)source->dim.size();
dest->buffer().type = source->type;
dest->buffer().host = (uint8_t*)source->ptr;
TensorUtils::getDescribe(dest)->dimensionFormat = (MNN_DATA_FORMAT)Utils::convertFormat(source->order);
TensorUtils::setLinearLayout(dest);
}
@ -70,7 +85,31 @@ void Utils::copyTensorToInfo(Variable::Info* shape, const Tensor* tensor) {
shape->dim = tensor->shape();
shape->size = tensor->elementSize();
shape->order = Utils::revertFormat(TensorUtils::getDescribe(tensor)->dimensionFormat);
shape->ptr = tensor->host<float>();
}
bool Utils::allocMemoryForHostTensor(Tensor* dest) {
if (nullptr != dest->buffer().host) {
return true;
}
if (TensorUtils::getDescribe(dest)->memoryType != Tensor::InsideDescribe::MEMORY_HOST) {
return false;
}
auto size = dest->size();
if (0 >= size) {
return false;
}
dest->buffer().host = (uint8_t*)MNNMemoryAllocAlign(size, MNN_MEMORY_ALIGN_DEFAULT);
return dest->buffer().host != nullptr;
}
bool Utils::releaseMemoryForHostTensor(Tensor* dest) {
if (nullptr == dest->buffer().host) {
return true;
}
if (TensorUtils::getDescribe(dest)->memoryType != Tensor::InsideDescribe::MEMORY_HOST) {
return false;
}
MNNMemoryFreeAlign(dest->buffer().host);
dest->buffer().host = nullptr;
return true;
}
} // namespace Express

View File

@ -15,15 +15,16 @@
namespace MNN {
namespace Express {
struct Expr::Inside {
std::vector<const Variable::Info*> mInputInfos;
Inside(int outputSize);
~ Inside();
std::vector<Variable::Info> mOutputInfos;
std::vector<Tensor*> mOutputTensors;
Executor::Requirement mReq;
std::shared_ptr<Executor::ComputeCache::Unit> mUnit;
std::shared_ptr<Executor::Unit> mUnit;
std::shared_ptr<Executor::ComputeCache> mCache;
int mCacheOffset = 0;
bool mInfoDirty = true;
bool mContentDirty = true;
bool mLinkCache = false;
};
class Utils {
public:
@ -33,6 +34,8 @@ public:
static int convertFormat(Dimensionformat format);
static Express::Dimensionformat revertFormat(int format);
static halide_type_t revertDataType(DataType dataType);
static bool allocMemoryForHostTensor(Tensor* dest);
static bool releaseMemoryForHostTensor(Tensor* dest);
};
} // namespace Express
} // namespace MNN

View File

@ -10,7 +10,7 @@
#include <MNN/expr/ExprCreator.hpp>
using namespace MNN::Express;
namespace MNN {
namespace Train {
namespace Express {
FixModule::FixModule(std::vector<Express::VARP> output, std::vector<Express::VARP> parameters,
std::vector<std::pair<Express::VARP, Express::Dimensionformat>> inputs) {
for (auto p : parameters) {
@ -34,5 +34,19 @@ std::vector<Express::VARP> FixModule::onForward(const std::vector<Express::VARP>
}
return mOutput;
}
} // namespace Train
Module* FixModule::clone(CloneContext* ctx) const {
FixModule* module(new FixModule);
for (auto& it : mInputs) {
VARP v = ctx->getOrClone(it.first);
module->mInputs.push_back(std::make_pair(v, it.second));
}
for (auto& it : mOutput) {
VARP v = ctx->getOrClone(it);
module->mOutput.push_back(v);
}
return this->cloneBaseTo(ctx, module);
}
} // namespace Express
} // namespace MNN

View File

@ -8,9 +8,9 @@
#ifndef FixModule_hpp
#define FixModule_hpp
#include "Module.hpp"
#include <MNN/expr/Module.hpp>
namespace MNN {
namespace Train {
namespace Express {
class FixModule : public Module {
public:
@ -20,10 +20,14 @@ public:
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
virtual void onClearCache() override;
private:
FixModule() = default;
Module* clone(CloneContext* ctx) const override;
std::vector<std::pair<Express::VARP, Express::Dimensionformat>> mInputs;
std::vector<Express::VARP> mOutput;
};
} // namespace Train
} // namespace Express
} // namespace MNN
#endif

112
express/module/IfModule.cpp Normal file
View File

@ -0,0 +1,112 @@
//
// IfModule.cpp
// MNN
//
// Created by MNN on 2020/09/01.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "IfModule.hpp"
#include "MNN_generated.h"
namespace MNN {
namespace Express {
static int _findPos(const std::vector<std::string>& names, const std::string& key) {
for (int i=0; i<names.size(); ++i) {
if (names[i] == key) {
return i;
}
}
return -1;
}
std::vector<Express::VARP> IfModule::onForward(const std::vector<Express::VARP>& inputs) {
std::vector<Express::VARP> outputs(mOutputFromElse.size());
MNN_ASSERT(mOutputFromThen.size() == mOutputFromElse.size());
if (inputs[0]->readMap<int>()[0] > 0) {
std::vector<Express::VARP> subInputs(mInputForThen.size());
for (auto& p : mInputForThen) {
subInputs[p.first] = inputs[p.second];
}
auto subOutputs = mThen->onForward(subInputs);
for (int i=0; i<mOutputFromThen.size(); ++i) {
outputs[i] = subOutputs[mOutputFromThen[i]];
}
} else {
std::vector<Express::VARP> subInputs(mInputForElse.size());
for (auto& p : mInputForElse) {
subInputs[p.first] = inputs[p.second];
}
auto subOutputs = mElse->onForward(subInputs);
for (int i=0; i<mOutputFromElse.size(); ++i) {
outputs[i] = subOutputs[mOutputFromElse[i]];
}
}
return outputs;
}
IfModule* IfModule::create(const Op* op, const std::map<std::string, SubGraph>& subGraph) {
auto module = new IfModule;
auto ifParam = op->main_as_IfParam();
auto& thenG = subGraph.find(ifParam->then_graph()->str())->second;
auto& elseG = subGraph.find(ifParam->else_graph()->str())->second;
module->mElse = elseG.m;
module->mThen = thenG.m;
if (nullptr != op->name()) {
module->setName(op->name()->str());
}
/** Compute map index
std::vector<std::pair<int, int>> mInputForThen;
// First mElse' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForElse;
std::vector<int> mOutputFromThen;
std::vector<int> mOutputFromElse;
*/
// Map Inputs
for (int i=0; i<ifParam->aliases_inputs()->size(); ++i) {
auto index = i;
auto data = ifParam->aliases_inputs()->GetAs<StringVec>(i);
if (nullptr == data->data()) {
continue;
}
for (int s=0; s<data->data()->size(); ++s) {
auto name = data->data()->GetAsString(s)->str();
auto thenPos = _findPos(thenG.inputs, name);
if (thenPos >= 0) {
module->mInputForThen.emplace_back(std::make_pair(thenPos, i));
}
auto elsePos = _findPos(elseG.inputs, name);
if (elsePos >= 0) {
module->mInputForElse.emplace_back(std::make_pair(elsePos, i));
}
}
}
// Map outputs
auto output = ifParam->aliases_outputs();
module->mOutputFromThen.resize(output->size());
module->mOutputFromElse.resize(output->size());
for (int i=0; i<output->size(); ++i) {
auto data = output->GetAs<StringVec>(i);
MNN_ASSERT(data->data()->size() == 2);
auto thenPos = _findPos(thenG.outputs, data->data()->GetAsString(0)->str());
MNN_ASSERT(thenPos >= 0);
auto elsePos = _findPos(elseG.outputs, data->data()->GetAsString(1)->str());
module->mOutputFromThen[i] = thenPos;
module->mOutputFromElse[i] = elsePos;
}
return module;
}
Module* IfModule::clone(CloneContext* ctx) const {
IfModule* module(new IfModule);
module->mInputForThen = mInputForThen;
module->mInputForElse = mInputForElse;
module->mOutputFromThen = mOutputFromThen;
module->mOutputFromElse = mOutputFromElse;
module->mThen.reset(mThen->clone(ctx));
module->mElse.reset(mElse->clone(ctx));
return this->cloneBaseTo(ctx, module);
}
} // namespace Express
} // namespace MNN

View File

@ -0,0 +1,43 @@
//
// IfModule.hpp
// MNN
//
// Created by MNN on 2020/09/01.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef IfModule_hpp
#define IfModule_hpp
#include <MNN/expr/Module.hpp>
namespace MNN {
namespace Express {
class IfModule : public Module {
public:
virtual ~ IfModule() {
// Do nothing
}
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
static IfModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
private:
IfModule(){}
Module* clone(CloneContext* ctx) const override;
// First mThen' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForThen;
// First mElse' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForElse;
std::vector<int> mOutputFromThen;
std::vector<int> mOutputFromElse;
std::shared_ptr<Module> mThen;
std::shared_ptr<Module> mElse;
};
}
}
#endif /* IfModule_hpp */

182
express/module/Module.cpp Normal file
View File

@ -0,0 +1,182 @@
//
// Module.cpp
// MNN
//
// Created by MNN on 2019/11/25.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <MNN/expr/Module.hpp>
#include <MNN/expr/ExprCreator.hpp>
#include "FixModule.hpp"
#include "PipelineModule.hpp"
#include "core/FileLoader.hpp"
namespace MNN {
namespace Express {
class EmptyModule : public Module {
public:
EmptyModule(const std::vector<Express::VARP>& parameters) {
for (auto p : parameters) {
addParameter(p);
}
}
virtual ~EmptyModule() {
// Do nothing
}
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override {
return {};
}
protected:
EmptyModule() = default;
Module* clone(Module::CloneContext* ctx) const override {
EmptyModule* module(new EmptyModule);
return this->cloneBaseTo(ctx, module);
}
};
Module* Module::createEmpty(const std::vector<Express::VARP>& parameters) {
return new EmptyModule(parameters);
}
Express::VARP Module::forward(Express::VARP input) {
return this->onForward({input})[0];
}
std::vector<Express::VARP> Module::parameters() const {
std::vector<Express::VARP> result;
_collectParameters(result);
return result;
}
bool Module::loadParameters(const std::vector<Express::VARP>& parameters) {
std::vector<Express::VARP> result;
_collectParameters(result);
if (parameters.empty() || parameters.size() != result.size()) {
MNN_ERROR("Error parameters, empty or parameter size not match \n");
return false;
}
for (int i=0; i<parameters.size(); ++i) {
if (nullptr != result[i].get()) {
// Check Origin parameter's size
auto dstInfo = result[i]->getInfo();
auto srcInfo = parameters[i]->getInfo();
if (dstInfo->dim.size() != srcInfo->dim.size() || dstInfo->order != srcInfo->order) {
MNN_ERROR("Error parameters %d, dim size or order not match \n", i);
return false;
}
if (dstInfo->size != srcInfo->size || dstInfo->type != srcInfo->type) {
MNN_ERROR("Error parameters %d, size or type not match \n", i);
return false;
}
}
Variable::replace(result[i], parameters[i]);
}
return true;
}
void Module::setIsTraining(const bool isTraining) {
mIsTraining = isTraining;
for (auto c : mChildren) {
c->setIsTraining(isTraining);
}
}
bool Module::getIsTraining() {
return mIsTraining;
}
void Module::registerModel(const std::vector<std::shared_ptr<Module>>& children) {
mChildren.insert(mChildren.begin(), children.begin(), children.end());
}
int Module::addParameter(VARP parameter) {
auto res = mParameters.size();
mParameters.emplace_back(parameter);
return (int)res;
}
void Module::setParameter(Express::VARP parameter, int index) {
if (index < 0 || index >= mParameters.size()) {
MNN_ERROR("Module error: index out of range: %d - %d:\n", index, (int)mParameters.size());
return;
}
mParameters[index] = parameter;
}
void Module::_collectParameters(std::vector<Express::VARP>& result) const {
for (auto p : mParameters) {
result.push_back(p);
}
for (auto c : mChildren) {
c->_collectParameters(result);
}
}
void Module::clearCache() {
for (auto c : mChildren) {
c->clearCache();
}
this->onClearCache();
}
Module* Module::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const char* fileName, bool dynamic) {
AutoStorage<uint8_t> buffer;
{
FileLoader loader(fileName);
if (!loader.valid()) {
MNN_ERROR("Error for open %s\n", fileName);
return {};
}
loader.read();
if (!loader.valid()) {
return {};
}
loader.merge(buffer);
if (buffer.get() == nullptr) {
return {};
}
}
return load(inputs, outputs, buffer.get(), buffer.size(), dynamic);
}
Module* Module::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic) {
return PipelineModule::load(inputs, outputs, buffer, length, dynamic);
}
EXPRP Module::CloneContext::getOrClone(EXPRP expr) {
auto it = mExprMap.find(expr.get());
if (it == mExprMap.end()) {
// EXPRP replica = expr->clone(shareParams);
// TODO(hjchen2): Clone expr.
EXPRP replica = expr;
it = mExprMap.emplace(expr.get(), replica).first;
}
return it->second;
}
VARP Module::CloneContext::getOrClone(VARP var) {
auto it = mVarMap.find(var.get());
if (it != mVarMap.end()) {
// TODO(hjchen2): Clone variable.
VARP replica = var;
it = mVarMap.emplace(var.get(), replica).first;
}
return it->second;
}
Module* Module::clone(const Module* module, const bool shareParams) {
CloneContext context(shareParams);
return module->clone(&context);
}
Module* Module::cloneBaseTo(CloneContext* ctx, Module* module) const {
for (const Express::VARP& var : mParameters) {
module->mParameters.push_back(ctx->getOrClone(var));
}
module->mIsTraining = mIsTraining;
module->mName = mName;
module->mType = mType;
return module;
}
} // namespace Express
} // namespace MNN

View File

@ -6,9 +6,11 @@
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "NN.hpp"
#include <MNN/expr/NN.hpp>
#include "Distributions.hpp"
#include "FixModule.hpp"
#include "WhileModule.hpp"
#include "IfModule.hpp"
#include "Initializer.hpp"
#include "MNN_generated.h"
#include "RandomGenerator.hpp"
@ -17,7 +19,7 @@
using namespace MNN::Express;
namespace MNN {
namespace Train {
namespace Express {
static VARP _activate(VARP x, NN::ActivationFunctionType type) {
switch (type) {
case NN::None:
@ -58,6 +60,14 @@ public:
}
private:
DropoutModule() = default;
Module* clone(CloneContext* ctx) const override {
DropoutModule* module(new DropoutModule);
module->mDropRatio = mDropRatio;
return this->cloneBaseTo(ctx, module);
}
float mDropRatio;
};
@ -80,8 +90,8 @@ public:
mRunningVariance = _Const(bnPa->varData()->data(), {1, mChannels, 1, 1}, NCHW);
addParameter(mScale);
addParameter(mBias);
addParameter(mRunningVariance);
addParameter(mRunningMean);
mRunningVariancePos = addParameter(mRunningVariance);
mRunningMeanPos = addParameter(mRunningMean);
mReductionDims = {0, 2, 3};
setType("BatchNorm");
}
@ -110,8 +120,8 @@ public:
addParameter(mScale);
addParameter(mBias);
addParameter(mRunningVariance);
addParameter(mRunningMean);
mRunningVariancePos = addParameter(mRunningVariance);
mRunningMeanPos = addParameter(mRunningMean);
setType("BatchNorm");
}
@ -156,9 +166,8 @@ public:
mRunningVariance = _Const(mMomentum) * mRunningVariance + _Const(1 - mMomentum) * sampleVar;
outputData->setName(name());
outputData = _Convert(outputData, dimFormat);
Variable::prepareCompute({inputs[0], outputData, mRunningMean, mRunningVariance});
mRunningMean.fix(Express::VARP::CONSTANT);
mRunningVariance.fix(Express::VARP::CONSTANT);
setParameter(mRunningMean, mRunningMeanPos);
setParameter(mRunningVariance, mRunningVariancePos);
return {outputData};
}
auto rStd = _Const(1.0f) / _Sqrt(mRunningVariance + _Const(mEps));
@ -180,12 +189,31 @@ public:
}
private:
BatchNormModule() = default;
Module* clone(CloneContext* ctx) const override {
BatchNormModule* module(new BatchNormModule);
module->mMomentum = mMomentum;
module->mEps = mEps;
module->mScale = ctx->getOrClone(mScale);
module->mBias = ctx->getOrClone(mBias);
module->mRunningMean = ctx->getOrClone(mRunningMean);
module->mRunningVariance = ctx->getOrClone(mRunningVariance);
module->mRunningMeanPos = mRunningMeanPos;
module->mRunningVariancePos = mRunningVariancePos;
module->mChannels = mChannels;
module->mReductionDims = mReductionDims;
return this->cloneBaseTo(ctx, module);
}
float mMomentum = 0.99;
float mEps = 1e-5;
VARP mScale = nullptr;
VARP mBias = nullptr;
VARP mRunningMean = nullptr;
VARP mRunningVariance = nullptr;
int mRunningMeanPos = -1;
int mRunningVariancePos = -1;
int mChannels;
std::vector<int> mReductionDims;
};
@ -246,7 +274,18 @@ public:
tempOutput->setName(name());
return {tempOutput};
}
private:
ConvModule() = default;
Module* clone(CloneContext* ctx) const override {
ConvModule* module(new ConvModule);
module->mParameter = mParameter;
module->mParameter.weight = ctx->getOrClone(mParameter.weight);
module->mParameter.bias = ctx->getOrClone(mParameter.bias);
return this->cloneBaseTo(ctx, module);
}
NN::ConvParameters mParameter;
};
static std::tuple<VARP, VARP, int> _initParameters(const NN::ConvOption& option, bool hasBias,
@ -533,7 +572,23 @@ public:
}
private:
const NN::ConvOption mOption;
ConvOctaveModule() = default;
Module* clone(CloneContext* ctx) const override {
ConvOctaveModule* module(new ConvOctaveModule);
module->mOption = mOption;
module->mLLW = ctx->getOrClone(mLLW);
module->mLHW = ctx->getOrClone(mLHW);
module->mHLW = ctx->getOrClone(mHLW);
module->mHHW = ctx->getOrClone(mHHW);
module->mLBias = ctx->getOrClone(mLBias);
module->mHBias = ctx->getOrClone(mHBias);
module->mSplitInput = mSplitInput;
module->mGroup = mGroup;
return this->cloneBaseTo(ctx, module);
}
NN::ConvOption mOption;
VARP mLLW;
VARP mLHW;
VARP mHLW;
@ -555,7 +610,7 @@ Module* NN::ConvOctave(const ConvParameters& parameters,
module->setName(parameters.name);
return module;
}
Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr) {
Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr, const std::map<std::string, SubGraph>& subgraphs) {
if (nullptr == expr->get()) {
return nullptr;
}
@ -565,6 +620,12 @@ Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr) {
if (expr->get()->type() == OpType_Dropout) {
return new DropoutModule(0.3f);
}
if (expr->get()->type() == OpType_While) {
return WhileModule::create(expr->get(), subgraphs);
}
if (expr->get()->type() == OpType_If) {
return IfModule::create(expr->get(), subgraphs);
}
return nullptr;
}
@ -621,6 +682,9 @@ public:
auto limit = (float)(1 << (bits - 1)) - 1.0f;
mLimitScale = _Scalar<float>(1.0f / limit);
mClampValue = _Scalar<float>(limit);
mInputScalePos = addParameter(mInputScale);
mOutputScalePos = addParameter(mOutputScale);
setType("ConvBNReluFused");
}
@ -632,31 +696,16 @@ public:
tempX = _Convert(tempX, NCHW);
}
auto originX = tempX;
VARP scale;
if (mFeatureScaleStatMethod == NN::PerTensor) {
scale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
} else {
auto originSize = originX->getInfo()->size;
auto batch = originX->getInfo()->dim[0];
auto channel = originX->getInfo()->dim[1];
if (originSize / batch / channel < 10) {
// Too small data
//MNN_PRINT("%d - %d - %d\n", originSize, batch, channel);
std::vector<int> dims = {1, channel, 1, 1};
auto dimVar = _Const(dims.data(), {4}, NCHW, halide_type_of<int32_t>());
auto singleScale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
scale = _Fill(dimVar, singleScale);
} else {
//MNN_PRINT("%d - %d - %d\n", originSize, batch, channel);
scale = _Maximum(_ReduceMax(_Abs(tempX), {0, 2, 3}, true), _Scalar<float>(0.0001f)) * mLimitScale;
}
}
scale.fix(VARP::CONSTANT);
VARP scale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
if (useScale == nullptr) {
tempX = _Round(tempX * _Reciprocal(scale)) * scale;
} else {
tempX = _Round(tempX * _Reciprocal(useScale)) * useScale;
}
// Break the grad by use cast
tempX = _Cast<float>(tempX);
// Move grad from tempX to originX
tempX = _Convert(tempX + _ZeroGrad(originX), originFormat);
return std::make_pair(tempX, scale);
}
@ -684,18 +733,16 @@ public:
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override {
VARP res;
if (getIsTraining()) {
Variable::prepareCompute({inputs[0]});
auto x = _Convert(inputs[0], NCHW);
// simulate weight quant
auto weightScale = _Maximum(_ReduceMax(_Abs(mWeight), {1, 2, 3}, true), _Scalar<float>(1E-6)) * mLimitScale;
weightScale.fix(VARP::CONSTANT);
auto weightTemp = _Round(mWeight * _Reciprocal(weightScale)) * weightScale;
weightTemp = weightTemp + _ZeroGrad(mWeight);
// simulate input quant to get original input scale
auto inputPair = fakeQuantFeature(x);
mInputScale = updateScale(mInputScale, inputPair.second);
mInputScale.fix(VARP::CONSTANT);
setParameter(mInputScale, mInputScalePos);
// simulate output quant to get original output scale
res = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride,
@ -709,10 +756,9 @@ public:
res = _activate(res, mActivation);
Variable::prepareCompute({conv, res});
auto outputPair = fakeQuantFeature(res);
mOutputScale = updateScale(mOutputScale, outputPair.second);
mOutputScale.fix(VARP::CONSTANT);
setParameter(mOutputScale, mOutputScalePos);
res = outputPair.first;
} else {
if (nullptr == mInputScale) {
@ -725,6 +771,7 @@ public:
auto x = _Convert(inputs[0], NCHW);
auto inputPair = fakeQuantFeature(x);
mInputScale = inputPair.second;
setParameter(mInputScale, mInputScalePos);
inputPair.first.fix(VARP::CONSTANT);
auto simuRes = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride,
@ -737,6 +784,7 @@ public:
Variable::prepareCompute({simuRes});
auto outputPair = fakeQuantFeature(simuRes);
mOutputScale = outputPair.second;
setParameter(mOutputScale, mOutputScalePos);
outputPair.first.fix(VARP::CONSTANT);
}
@ -772,12 +820,7 @@ public:
{
std::vector<int> dims = {x->getInfo()->dim[1]};
auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>());
VARP channelScale;
if (mFeatureScaleStatMethod == NN::PerTensor) {
channelScale = _Reciprocal(_Fill(dimVar, mInputScale));
} else {
channelScale = _Reciprocal(mInputScale);
}
VARP channelScale = _Reciprocal(_Fill(dimVar, mInputScale));
x = _FloatToInt8(x, channelScale, -127, 127);// TODO add clamp
}
@ -824,12 +867,7 @@ public:
{
std::vector<int> dims = {res->getInfo()->dim[1]};
auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>());
VARP channelScale;
if (mFeatureScaleStatMethod == NN::PerTensor) {
channelScale = _Fill(dimVar, mOutputScale);
} else {
channelScale = mOutputScale;
}
VARP channelScale = _Fill(dimVar, mOutputScale);
res = _Int8ToFloat(res, channelScale);
}
}
@ -838,6 +876,34 @@ public:
}
private:
ConvBNReluFusedModule() = default;
Module* clone(CloneContext* ctx) const override {
ConvBNReluFusedModule* module(new ConvBNReluFusedModule);
module->mConvParameter = mConvParameter;
module->mConvParameter.weight = ctx->getOrClone(mConvParameter.weight);
module->mConvParameter.bias = ctx->getOrClone(mConvParameter.bias);
module->mOption = mOption;
module->mGroup = mGroup;
module->mWeight = ctx->getOrClone(mWeight);
module->mBias = ctx->getOrClone(mBias);
module->mActivation = mActivation;
module->mLimitScale = ctx->getOrClone(mLimitScale);
module->mInputScalePos = mInputScalePos;
module->mOutputScalePos = mOutputScalePos;
module->mInputScale = ctx->getOrClone(mInputScale);
module->mOutputScale = ctx->getOrClone(mOutputScale);
module->mClampValue = ctx->getOrClone(mClampValue);
module->mMomentum = mMomentum;
module->mFeatureScaleStatMethod = mFeatureScaleStatMethod;
module->mScaleUpdateMethod = mScaleUpdateMethod;
if (mBatchNorm) {
module->mBatchNorm.reset(mBatchNorm->clone(ctx));
module->registerModel({module->mBatchNorm});
}
return this->cloneBaseTo(ctx, module);
}
NN::ConvParameters mConvParameter;
NN::ConvOption mOption;
int mGroup;
@ -846,6 +912,8 @@ private:
NN::ActivationFunctionType mActivation = NN::ActivationFunctionType::None;
std::shared_ptr<Module> mBatchNorm = nullptr;
VARP mLimitScale;
int mInputScalePos = -1;
int mOutputScalePos = -1;
VARP mInputScale = nullptr;
VARP mOutputScale = nullptr;
VARP mClampValue;
@ -870,5 +938,5 @@ Module* NN::ConvInt8(const ConvParameters& para, int bits, NN::FeatureScaleStatM
return new ConvBNReluFusedModule({conv}, featureMethod, method, bits);
}
} // namespace Train
} // namespace Express
} // namespace MNN

View File

@ -0,0 +1,761 @@
//
// PipelineModule.cpp
// MNN
//
// Created by MNN on 2020/01/09.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "PipelineModule.hpp"
#include "MNN_generated.h"
#include <set>
#include <vector>
#include "StaticModule.hpp"
#include "IfModule.hpp"
#include "WhileModule.hpp"
using namespace MNN::Express;
namespace MNN {
namespace Express {
//#define DYNAMIC
#define PIPELINE_MODULE "_pipeline_module__"
class ExprModule : public Module {
public:
ExprModule(EXPRP expr) {
mExpr = expr;
setName(expr->name());
mInputs = expr->inputs();
auto op = mExpr->get();
if (op) {
auto typeName = EnumNameOpType(op->type());
setType(typeName);
}
for (int i = 0; i < mInputs.size(); ++i) {
auto inputExpr = mInputs[i]->expr().first;
if (inputExpr->get() != nullptr) {
mInputs[i] = nullptr;
mInputIndexes.emplace_back(i);
continue;
}
switch (inputExpr->inputType()) {
case VARP::INPUT:
mInputs[i] = nullptr;
mInputIndexes.emplace_back(i);
break;
case VARP::CONSTANT:
break;
case VARP::TRAINABLE:
addParameter(mInputs[i]);
break;
default:
break;
}
}
}
virtual std::vector<VARP> onForward(const std::vector<VARP>& inputs) override {
MNN_ASSERT(mInputIndexes.size() == inputs.size());
if (nullptr == mExpr->get()) {
return {Variable::create(mExpr)};
}
std::vector<VARP> tempInputs = mInputs;
for (int i = 0; i < inputs.size(); ++i) {
tempInputs[mInputIndexes[i]] = inputs[i];
}
std::vector<VARP> outputVars;
auto newExpr = Expr::create(mExpr->extra(), std::move(tempInputs), mExpr->outputSize());
newExpr->setName(mExpr->name());
for (int i = 0; i < mExpr->outputSize(); ++i) {
outputVars.emplace_back(Variable::create(newExpr, i));
}
return outputVars;
}
const std::vector<int>& inputIndexes() const {
return mInputIndexes;
}
private:
Module* clone(CloneContext* ctx) const override {
ExprModule* module(new ExprModule(ctx->getOrClone(mExpr)));
for (const VARP& var : mInputs) {
module->mInputs.push_back(ctx->getOrClone(var));
}
module->mInputIndexes = mInputIndexes;
return this->cloneBaseTo(ctx, module);
}
EXPRP mExpr;
std::vector<VARP> mInputs;
std::vector<int> mInputIndexes;
};
Module* PipelineModule::extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph) {
std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(EXPRP)> transformFunction;
if (fortrain) {
transformFunction =
[&subGraph](EXPRP source) {
if (source->get() == nullptr) {
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
}
std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
if (nullptr != m) {
m->setName(source->name());
return std::make_pair(std::vector<int>{}, m);
}
auto convExtracted = NN::Utils::ExtractConvolution(source);
if (convExtracted.weight == nullptr) {
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
}
std::shared_ptr<Module> module(NN::Conv(convExtracted));
module->setName(source->name());
return std::make_pair(std::vector<int>{0}, module);
};
} else {
transformFunction = [&subGraph](EXPRP source) {
if (source->get() == nullptr) {
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
}
std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
if (nullptr != m) {
m->setName(source->name());
return std::make_pair(std::vector<int>{}, m);
}
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
};
}
return new PipelineModule(inputs, outputs, transformFunction);
}
PipelineModule::PipelineModule(std::vector<VARP> inputs, std::vector<VARP> outputs, const Transformer& transformFunction) {
setType(PIPELINE_MODULE);
std::vector<EXPRP> executeOrder;
std::set<EXPRP> inputExpr;
for (auto v : inputs) {
inputExpr.insert(v->expr().first);
}
for (auto output : outputs) {
Expr::visit(output->expr().first,
[&executeOrder, &inputExpr](EXPRP expr) {
if (expr->visited()) {
return false;
}
if (inputExpr.find(expr)!= inputExpr.end()) {
expr->setVisited(true);
executeOrder.emplace_back(expr);
return false;
}
return true;
},
[&executeOrder](EXPRP expr) {
//FUNC_PRINT_ALL(var->name().c_str(), s);
if (!expr->visited()) {
executeOrder.emplace_back(expr);
expr->setVisited(true);
}
return true;
});
}
for (auto expr : executeOrder) {
expr->setVisited(false);
}
// Set Indexes
std::map<EXPRP, int> indexes;
int currentIndexes = 0;
for (auto expr : executeOrder) {
indexes[expr] = currentIndexes;
currentIndexes += expr->outputSize();
}
std::set<EXPRP> inputSets;
mInputIndexes.clear();
mStackSize = currentIndexes;
for (auto v : inputs) {
auto inputExpr = v->expr();
mInputIndexes.emplace_back(indexes[inputExpr.first] + inputExpr.second);
inputSets.insert(inputExpr.first);
}
// Create All SubModule
for (auto expr : executeOrder) {
if (inputSets.find(expr) != inputSets.end()) {
continue;
}
std::pair<std::vector<int>, std::shared_ptr<Module> > moduleResult;
bool extracted = false;
if (!transformFunction) {
moduleResult = std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
} else {
moduleResult = transformFunction(expr);
}
if (moduleResult.second == nullptr) {
std::shared_ptr<Module> module(new ExprModule(expr));
moduleResult.first = ((ExprModule*)module.get())->inputIndexes();
moduleResult.second = module;
} else {
extracted = true;
}
auto subInputs = expr->inputs();
auto& exprInputIndexes = moduleResult.first;
std::vector<int> inputIndexes;
if (exprInputIndexes.empty() && extracted) {
inputIndexes.resize(subInputs.size());
for (int i = 0; i < inputIndexes.size(); ++i) {
auto inputExpr = subInputs[i]->expr();
inputIndexes[i] = indexes[inputExpr.first] + inputExpr.second;
}
} else {
inputIndexes.resize(exprInputIndexes.size());
for (int i = 0; i < inputIndexes.size(); ++i) {
auto inputExpr = subInputs[exprInputIndexes[i]]->expr();
inputIndexes[i] = indexes[inputExpr.first] + inputExpr.second;
}
}
std::vector<int> outputIndexes(expr->outputSize());
for (int i = 0; i < outputIndexes.size(); ++i) {
outputIndexes[i] = indexes[expr] + i;
}
mSubModules.emplace_back(std::make_tuple(moduleResult.second, inputIndexes, outputIndexes));
registerModel({moduleResult.second});
}
mOutputIndexes.clear();
for (auto output : outputs) {
auto outputExpr = output->expr();
mOutputIndexes.emplace_back(indexes[outputExpr.first] + outputExpr.second);
}
}
bool PipelineModule::turnQuantize(Module* module, const int bit, NN::FeatureScaleStatMethod featureScaleStatMethod, NN::ScaleUpdateMethod scaleUpdateMethod) {
if (nullptr == module || module->type() != PIPELINE_MODULE) {
MNN_ERROR("Invalide module for quantized\n");
return false;
}
((PipelineModule*)module)->toTrainQuant(bit, featureScaleStatMethod, scaleUpdateMethod);
return true;
}
std::vector<int> PipelineModule::countOutputReference(std::vector<int> outputIndices) {
MNN_ASSERT(outputIndices.size() > 0);
std::vector<int> countResult(outputIndices.size(), 0);
for (int i = 0; i < mSubModules.size(); i++) {
auto &m = mSubModules[i];
auto& theModule = std::get<0>(m);
auto name = theModule->name();
auto &inputIndices = std::get<1>(m);
for (int j = 0; j < inputIndices.size(); j++) {
int index = inputIndices[j];
for (int k = 0; k < countResult.size(); k++) {
if (index == outputIndices[k]) {
countResult[k]++;
}
}
}
}
return countResult;
}
void PipelineModule::toTrainQuant(const int bits, NN::FeatureScaleStatMethod featureScaleStatMethod,
NN::ScaleUpdateMethod scaleUpdateMethod) {
std::vector<int> needEraseIndices;
for (int i = 0; i < mSubModules.size(); i++) {
auto& m = mSubModules[i];
auto& theModule = std::get<0>(m);
auto moduleType = theModule->type();
//auto& inputIndices = std::get<1>(m);
auto& outputIndices = std::get<2>(m);
if (moduleType == "Conv" && i < mSubModules.size() - 1) {
auto& p1 = mSubModules[i+1];
auto p1Module = std::get<0>(p1);
auto& p1ModuleType = p1Module->type();
auto& p1InputIndices = std::get<1>(p1);
auto& p1OutputIndices = std::get<2>(p1);
auto convOutputCount = countOutputReference(outputIndices);
bool convSingleOutputReference = ((outputIndices.size() == 1) && (convOutputCount[0] == 1));
// only conv
if ((!convSingleOutputReference) || (p1ModuleType == "Conv") ||
(p1ModuleType != "BatchNorm" && p1ModuleType != "ReLU" && p1ModuleType != "ReLU6")) {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
continue;
}
// conv + bn + ?
if (p1ModuleType == "BatchNorm") {
bool convBnConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
if (!convBnConnected) {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
continue;
}
// last conv + bn
if (i == mSubModules.size() - 2) {
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
}
// maybe there is a relu or relu6 after conv + bn
auto& p2 = mSubModules[i+2];
auto& p2Module = std::get<0>(p2);
auto p2ModuleType = p2Module->type();
auto& p2InputIndices = std::get<1>(p2);
auto& p2OutputIndices = std::get<2>(p2);
auto bnOutputCount = countOutputReference(p1OutputIndices);
bool bnSingleOutputReference = ((p1OutputIndices.size() == 1) && (bnOutputCount[0] == 1));
// only conv + bn
if ((!bnSingleOutputReference) || (p2ModuleType != "ReLU" && p2ModuleType != "ReLU6")) {
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
} else { // conv + bn + relu or conv + bn + relu6
bool convBnReluConnected = ((bnSingleOutputReference) && (p2InputIndices.size() == 1) && (p2InputIndices[0] == p1OutputIndices[0]));
if (!convBnReluConnected) {
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
}
theModule.reset(NN::ConvBNReluFused({theModule, p1Module, p2Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p2OutputIndices;
needEraseIndices.emplace_back(i + 1);
needEraseIndices.emplace_back(i + 2);
continue;
}
}
// conv + relu or conv + relu6
if (p1ModuleType == "ReLU" || p1ModuleType == "ReLU6") {
bool convReluConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
if (!convReluConnected) {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
continue;
}
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
}
}
if (i == mSubModules.size() - 1 && moduleType == "Conv") {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
}
}
// erase useless submodules
const int eraseSize = needEraseIndices.size();
int alreadyErasedCount = 0;
for (int i = 0; i < eraseSize; i++) {
auto position = needEraseIndices[i] - alreadyErasedCount;
auto type = std::get<0>(mSubModules[position])->type();
MNN_ASSERT(type == "BatchNorm" || type == "ReLU" || type == "ReLU6");
mSubModules.erase(mSubModules.begin() + position);
alreadyErasedCount++;
}
}
std::vector<VARP> PipelineModule::onForward(const std::vector<VARP>& inputs) {
std::vector<VARP> mStack(mStackSize);
for (int i = 0; i < mInputIndexes.size(); ++i) {
mStack[mInputIndexes[i]] = inputs[i];
}
for (int index = 0; index < mSubModules.size(); ++index) {
auto& m = mSubModules[index];
std::vector<VARP> tempInputs(std::get<1>(m).size());
for (int i = 0; i < tempInputs.size(); ++i) {
tempInputs[i] = mStack[std::get<1>(m)[i]];
MNN_ASSERT(nullptr != tempInputs[i]);
}
std::vector<VARP> tempOutputs = std::get<0>(m)->onForward(tempInputs);
MNN_ASSERT(tempOutputs.size() == std::get<2>(m).size());
for (int i = 0; i < tempOutputs.size(); ++i) {
mStack[std::get<2>(m)[i]] = tempOutputs[i];
MNN_ASSERT(nullptr != tempOutputs[i]);
}
}
std::vector<VARP> outputs(mOutputIndexes.size());
for (int i = 0; i < mOutputIndexes.size(); ++i) {
outputs[i] = mStack[mOutputIndexes[i]];
}
return outputs;
}
void PipelineModule::onClearCache() {
// Do nothing
}
static std::map<std::string, SubGraph> _createSubGraph(const MNN::Net* net, bool dynamic) {
std::map<std::string, SubGraph> subGraphMap;
auto subGraphs = net->subgraphs();
if (nullptr == subGraphs) {
return subGraphMap;
}
for (int i=0; i<subGraphs->size(); ++i) {
auto graph = subGraphs->GetAs<SubGraphProto>(i);
std::vector<std::string> subInputs;
std::vector<std::string> subOutputs;
if (nullptr != graph->inputs()) {
for (int v=0; v<graph->inputs()->size(); ++v) {
auto index = graph->inputs()->data()[v];
subInputs.emplace_back(graph->tensors()->GetAsString(index)->str());
}
}
for (int v=0; v<graph->outputs()->size(); ++v) {
auto index = graph->outputs()->data()[v];
subOutputs.emplace_back(graph->tensors()->GetAsString(index)->str());
}
// Pack to Net for loading
std::shared_ptr<Module> submodule;
{
std::unique_ptr<SubGraphProtoT> _tempInfo(graph->UnPack());
std::unique_ptr<NetT> _tempNet(new NetT);
_tempNet->oplists = std::move(_tempInfo->nodes);
_tempNet->tensorName = std::move(_tempInfo->tensors);
flatbuffers::FlatBufferBuilder builder(1024);
auto offset = Net::Pack(builder, _tempNet.get());
builder.Finish(offset);
if (dynamic) {
submodule.reset(PipelineModule::load(subInputs, subOutputs, (const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), dynamic));
} else {
submodule.reset(new StaticModule((const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), subInputs, subOutputs));
}
if (graph->name() != nullptr) {
submodule->setName(graph->name()->str());
}
}
auto key = graph->name()->str();
SubGraph subgraph;
subgraph.inputs = std::move(subInputs);
subgraph.outputs = std::move(subOutputs);
subgraph.m = submodule;
subGraphMap.insert(std::make_pair(key, subgraph));
}
return subGraphMap;
}
struct SubModuleInfo {
std::vector<int> opList;
std::vector<int> inputs;;
std::vector<int> outputs;
std::vector<uint8_t> tensorMask;
};
static std::vector<SubModuleInfo> _createSubModuleInfo(const MNN::Net* net, const std::set<int>& inputIndexes, const std::set<int>& outputIndexes) {
std::vector<SubModuleInfo> submodule;
SubModuleInfo current;
std::vector<int> inputOps;
// Seperate the graph to serveral submodule
for (int i=0; i<net->oplists()->size(); ++i) {
auto op = net->oplists()->GetAs<Op>(i);
// Collect Input
if (op->type() == OpType_Input) {
inputOps.emplace_back(i);
continue;
}
if (op->type() == OpType_If || op->type() == OpType_While) {
if (current.opList.size() > 0) {
// Not empty
submodule.emplace_back(std::move(current));
}
SubModuleInfo controlOp;
controlOp.opList = {i};
submodule.emplace_back(std::move(controlOp));
continue;
}
current.opList.emplace_back(i);
}
if (!current.opList.empty()) {
submodule.emplace_back(std::move(current));
}
/**Compute All SubModule's inputs and outputs*/
// 0: not use, 1: input, 2: output, 3: mid, 4: valid output
for (int moduleIndex=0; moduleIndex < submodule.size(); ++moduleIndex) {
auto& m = submodule[moduleIndex];
if (1 == m.opList.size()) {
// Fast way to determine
auto op = net->oplists()->GetAs<Op>(m.opList[0]);
if (nullptr != op->inputIndexes()) {
m.inputs.resize(op->inputIndexes()->size());
::memcpy(m.inputs.data(), op->inputIndexes()->data(), m.inputs.size() * sizeof(int));
}
if (nullptr != op->outputIndexes()) {
m.outputs.resize(op->outputIndexes()->size());
::memcpy(m.outputs.data(), op->outputIndexes()->data(), m.outputs.size() * sizeof(int));
}
} else {
m.tensorMask = std::vector<uint8_t>(net->tensorName()->size(), 0);
auto& tensorMask = m.tensorMask;
for (auto opIndex : m.opList) {
auto op = net->oplists()->GetAs<Op>(opIndex);
if (nullptr != op->inputIndexes()) {
for (int v=0; v<op->inputIndexes()->size(); ++v) {
auto index = op->inputIndexes()->data()[v];
tensorMask[index] = tensorMask[index] | 1;
}
}
if (nullptr != op->outputIndexes()) {
for (int v=0; v<op->outputIndexes()->size(); ++v) {
auto index = op->outputIndexes()->data()[v];
tensorMask[index] = tensorMask[index] | 2;
}
}
}
for (int i=0; i<tensorMask.size(); ++i) {
if (0 == tensorMask[i]) {
continue;
}
if (1 == tensorMask[i]) {
m.inputs.emplace_back(i);
continue;
}
if (2 == tensorMask[i]) {
m.outputs.emplace_back(i);
continue;
}
if (3 == tensorMask[i]) {
if (outputIndexes.find(i) != outputIndexes.end()) {
m.outputs.emplace_back(i);
}
}
}
}
// Check if the module's input is valid
for (int i=0; i<m.inputs.size(); ++i) {
auto index = m.inputs[i];
if (inputIndexes.find(index) != inputIndexes.end()) {
continue;
}
bool find = false;
for (int sub=0; sub < moduleIndex; ++sub) {
for (auto out : submodule[sub].outputs) {
if (out == index) {
find = true;
break;
}
}
if (find) {
break;
}
}
if (find) {
continue;
}
// Find from module
for (int sub=0; sub < moduleIndex; ++sub) {
if (submodule[sub].tensorMask.empty()) {
continue;
}
if (submodule[sub].tensorMask[index] == 2) {
find = true;
break;
}
if (submodule[sub].tensorMask[index] == 3) {
submodule[sub].outputs.emplace_back(index);
submodule[sub].tensorMask[index] = 2;
find = true;
break;
}
}
MNN_ASSERT(find);
}
}
for (auto& m : submodule) {
m.tensorMask.clear();
}
return submodule;
}
static Module* _createSubModule(const MNN::Net* net, const SubModuleInfo& info, const std::map<std::string, SubGraph>& subs) {
if (1 == info.opList.size()) {
auto op = net->oplists()->GetAs<Op>(info.opList[0]);
if (OpType_If == op->type()) {
return IfModule::create(op, subs);
}
if (OpType_While == op->type()) {
return WhileModule::create(op, subs);
}
MNN_ASSERT(false);
}
std::unique_ptr<NetT> _tempNet(new NetT);
// Copy Tensor Name
_tempNet->tensorName.resize(net->tensorName()->size());
for (int i=0; i<net->tensorName()->size(); ++i) {
_tempNet->tensorName[i] = net->tensorName()->GetAsString(i)->str();
}
// Create Input node
std::vector<std::string> inputNames;
for (auto index : info.inputs) {
std::unique_ptr<OpT> inputOp(new OpT);
inputOp->outputIndexes = {index};
inputOp->type = OpType_Input;
inputOp->main.type = OpParameter_Input;
inputOp->main.value = new InputT;
inputOp->main.AsInput()->dims = {0, 0, -1, -1};
_tempNet->oplists.emplace_back(std::move(inputOp));
inputNames.emplace_back(_tempNet->tensorName[index]);
}
// Create compute node
for (auto opIndex : info.opList) {
std::unique_ptr<OpT> op(net->oplists()->GetAs<Op>(opIndex)->UnPack());
_tempNet->oplists.emplace_back(std::move(op));
}
// Get output names
std::vector<std::string> outputNames;
for (auto index : info.outputs) {
outputNames.emplace_back(_tempNet->tensorName[index]);
}
// Create Net Buffer
flatbuffers::FlatBufferBuilder builder(1024);
auto offset = Net::Pack(builder, _tempNet.get());
builder.Finish(offset);
_tempNet.reset();
return new StaticModule((const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), inputNames, outputNames);
}
Module* PipelineModule::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic) {
// Create Subgraph
auto net = GetNet(buffer);
auto subGraphs = net->subgraphs();
if (nullptr == net->oplists() || nullptr == net->tensorName()) {
MNN_ERROR("Invalid net, for null oplist or tensorName\n");
return nullptr;
}
if (!dynamic) {
if (nullptr == subGraphs) {
// Has no control flow, can just use static module
return new StaticModule(buffer, length, inputs, outputs);
}
}
auto subGraphMap = _createSubGraph(net, dynamic);
if (dynamic) {
// For dynamic mode
auto varMaps = Variable::loadMap(buffer, length);
std::vector<VARP> inputVars(inputs.size());
for (int i=0; i<inputs.size(); ++i) {
inputVars[i] = varMaps[inputs[i]];
}
std::vector<VARP> outputVars(outputs.size());
for (int i=0; i<outputs.size(); ++i) {
outputVars[i] = varMaps[outputs[i]];
}
return extract(inputVars, outputVars, false, subGraphMap);
}
std::set<int> inputIndexes;
std::set<int> outputIndexes;
std::map<std::string, int> inputsMap;
std::map<std::string, int> outputsMap;
for (int i=0; i<net->tensorName()->size(); ++i) {
auto tname = net->tensorName()->GetAsString(i)->str();
for (auto& s : inputs) {
if (tname == s) {
inputIndexes.emplace(i);
inputsMap.insert(std::make_pair(s, i));
break;
}
}
for (auto& s : outputs) {
if (tname == s) {
outputIndexes.emplace(i);
outputsMap.insert(std::make_pair(s, i));
break;
}
}
}
std::vector<int> inputIndexesVec(inputs.size());
for (int i=0; i<inputs.size(); ++i) {
inputIndexesVec[i] = inputsMap[inputs[i]];
}
std::vector<int> outputIndexesVec(outputs.size());
for (int i=0; i<outputs.size(); ++i) {
outputIndexesVec[i] = outputsMap[outputs[i]];
}
auto subModulesInfo = _createSubModuleInfo(net, inputIndexes, outputIndexes);
std::vector<std::shared_ptr<Module>> subModules(subModulesInfo.size());
for (int i=0; i<subModulesInfo.size(); ++i) {
subModules[i].reset(_createSubModule(net, subModulesInfo[i], subGraphMap));
}
auto result = new PipelineModule;
/**
Compute:
std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
std::vector<int> mInputIndexes;
std::vector<int> mOutputIndexes;
int mStackSize = 0;
*/
// Make Stack, first: origin, second: new
std::map<int, int> stackMap;
int stackIndex = 0;
for (auto& m : subModulesInfo) {
for (auto index : m.inputs) {
if (stackMap.find(index) == stackMap.end()) {
stackMap.insert(std::make_pair(index, stackIndex));
stackIndex++;
}
}
for (auto index : m.outputs) {
if (stackMap.find(index) == stackMap.end()) {
stackMap.insert(std::make_pair(index, stackIndex));
stackIndex++;
}
}
}
result->mStackSize = stackMap.size();
for (int i=0; i<subModulesInfo.size(); ++i) {
auto& info = subModulesInfo[i];
// Reindex stack index
std::vector<int> subInputs(info.inputs.size());
for (int i=0; i<info.inputs.size(); ++i) {
subInputs[i] = stackMap[info.inputs[i]];
}
std::vector<int> subOutputs(info.outputs.size());
for (int i=0; i<info.outputs.size(); ++i) {
subOutputs[i] = stackMap[info.outputs[i]];
}
result->mSubModules.emplace_back(std::make_tuple(subModules[i], subInputs, subOutputs));
}
for (int i=0; i<inputIndexesVec.size(); ++i) {
inputIndexesVec[i] = stackMap[inputIndexesVec[i]];
}
for (int i=0; i<outputIndexesVec.size(); ++i) {
outputIndexesVec[i] = stackMap[outputIndexesVec[i]];
}
result->mInputIndexes = std::move(inputIndexesVec);
result->mOutputIndexes = std::move(outputIndexesVec);
return result;
}
Module* PipelineModule::clone(CloneContext* ctx) const {
PipelineModule* module(new PipelineModule);
for (const auto& it : mSubModules) {
const std::shared_ptr<Module>& submodule = std::get<0>(it);
const std::vector<int>& input_indices = std::get<1>(it);
const std::vector<int>& output_indices = std::get<2>(it);
std::shared_ptr<Module> replica_submodule(submodule->clone(ctx));
module->mSubModules.push_back(
std::make_tuple(replica_submodule, input_indices, output_indices));
module->registerModel({replica_submodule});
}
module->mInputIndexes = mInputIndexes;
module->mOutputIndexes = mOutputIndexes;
module->mStackSize = mStackSize;
return this->cloneBaseTo(ctx, module);
}
} // namespace Express
} // namespace MNN

View File

@ -8,16 +8,20 @@
#ifndef PipelineModule_hpp
#define PipelineModule_hpp
#include "Module.hpp"
#include "NN.hpp"
#include <MNN/expr/Module.hpp>
#include <MNN/expr/NN.hpp>
#include <MNN/expr/ExprCreator.hpp>
namespace MNN {
namespace Train {
namespace Express {
class MNN_PUBLIC PipelineModule : public Module {
public:
typedef std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(Express::EXPRP)> Transformer;
static Module* extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain);
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic = false);
static Module* extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph = {});
static Module* extractOrigin(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain) {
return extract(inputs, outputs, fortrain);
}
static bool turnQuantize(Module* module, const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor, NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
void toTrainQuant(const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor,
NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
@ -26,14 +30,18 @@ public:
std::vector<int> countOutputReference(std::vector<int> outputIndices);
private:
PipelineModule(){}
PipelineModule(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs,
const Transformer& transformFunction = {});
Module* clone(CloneContext* ctx) const override;
std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
std::vector<Express::VARP> mStack;
std::vector<int> mInputIndexes;
std::vector<int> mOutputIndexes;
int mStackSize = 0;
};
} // namespace Train
} // namespace Express
} // namespace MNN
#endif

View File

@ -0,0 +1,186 @@
//
// StaticModule.cpp
// MNN
//
// Created by MNN on b'2020/09/10'.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "StaticModule.hpp"
#include <MNN/expr/ExprCreator.hpp>
#include <MNN/AutoTime.hpp>
#include "core/TensorUtils.hpp"
#include "core/Session.hpp"
#include <MNN/expr/Executor.hpp>
#include <MNN/AutoTime.hpp>
#include <MNN/expr/ExecutorScope.hpp>
namespace MNN {
namespace Express {
StaticModule::StaticModule(const void* buffer, size_t length, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool shapeFix) : mInputs(inputs), mOutputs(outputs) {
mShapeFix = shapeFix;
mOutputNumbers = (int)outputs.size();
/** Compute:
std::vector<int, int> mOutputFromTensor;
std::vector<int, int> mOutputFromInput;
*/
for (int i=0; i<outputs.size(); ++i) {
auto& t = outputs[i];
bool fromInput = false;
for (int j=0; j<inputs.size(); ++j) {
if (inputs[j] == t) {
fromInput = true;
mOutputFromInput.emplace_back(std::make_pair(i, j));
break;
}
}
if (fromInput) {
continue;
}
mOutputFromTensor.emplace_back(i);
}
if (mOutputFromTensor.empty()) {
return;
}
mNet.reset(Interpreter::createFromBuffer(buffer, length));
#ifdef MNN_EXPR_ENABLE_PROFILER
mNet->setSessionMode(Interpreter::Session_Debug);
#else
mNet->setSessionMode(Interpreter::Session_Release);
#endif
if (mShapeFix) {
mNet->setSessionMode(Interpreter::Session_Input_Inside);
} else {
mNet->setSessionMode(Interpreter::Session_Input_User);
}
auto rt = Express::ExecutorScope::Current()->getRuntime();
// TODO: Add Config
ScheduleConfig config;
config.numThread = 1;
config.type = rt.first.begin()->first;
config.saveTensors = outputs;
mSession = mNet->createSession(config, rt);
mInputTensors.resize(inputs.size());
for (int i=0; i<inputs.size(); ++i) {
mInputTensors[i] = mNet->getSessionInput(mSession, inputs[i].c_str());
}
mOutputTensors.resize(mOutputFromTensor.size());
for (int i=0; i<mOutputFromTensor.size(); ++i) {
mOutputTensors[i] = mNet->getSessionOutput(mSession, outputs[mOutputFromTensor[i]].c_str());
}
}
StaticModule:: ~ StaticModule() {
// Do nothing
}
std::vector<Express::VARP> StaticModule::onForward(const std::vector<Express::VARP>& inputs) {
AUTOTIME;
std::vector<Express::VARP> outputs(mOutputNumbers);
for (auto& iter : mOutputFromInput) {
outputs[iter.first] = inputs[iter.second];
}
if (mOutputFromTensor.empty()) {
return outputs;
}
MNN_ASSERT(inputs.size() == mInputTensors.size());
for (int i=0; i<inputs.size(); ++i) {
auto info = inputs[i]->getInfo();
mInputTensors[i]->buffer().type = info->type;
auto des = TensorUtils::getDescribe(mInputTensors[i]);
if (info->order == Express::NCHW) {
des->dimensionFormat = MNN_DATA_FORMAT_NCHW;
}
if (info->order == Express::NHWC) {
des->dimensionFormat = MNN_DATA_FORMAT_NHWC;
}
if (info->order == Express::NC4HW4) {
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
}
mNet->resizeTensor(mInputTensors[i], info->dim);
}
if (!mShapeFix) {
for (int i=0; i<inputs.size(); ++i) {
mInputTensors[i]->buffer().host = (uint8_t*)inputs[i]->readMap<void>();
}
// FIXME: Use Interpreter's API
mSession->setNeedResize();
}
mNet->resizeSession(mSession);
if (mShapeFix) {
for (int i=0; i<inputs.size(); ++i) {
// For Shape only usage input, don't alloc memory
if (nullptr != mInputTensors[i]->host<void>()) {
::memcpy(mInputTensors[i]->host<void>(), inputs[i]->readMap<void>(), mInputTensors[i]->size());
}
}
}
#ifdef MNN_EXPR_ENABLE_PROFILER
auto globalExecutor = ExecutorScope::Current();
Timer cost;
TensorCallBackWithInfo beforeCallBack = [&cost] (const std::vector<Tensor*>&, const OperatorInfo* info) {
cost.reset();
return true;
};
TensorCallBackWithInfo afterCallBack = [&cost, globalExecutor] (const std::vector<Tensor*>&, const OperatorInfo* info) {
auto costTimes = (float)cost.durationInUs() / 1000.0f;
globalExecutor->addOpCostTime(info->type(), costTimes);
globalExecutor->addOpFlops(info->type(), info->flops());
return true;
};
mNet->runSessionWithCallBackInfo(mSession, beforeCallBack, afterCallBack);
#else
mNet->runSession(mSession);
#endif
for (int i=0; i<mOutputTensors.size(); ++i) {
Express::Variable::Info info;
info.dim = mOutputTensors[i]->shape();
info.type = mOutputTensors[i]->getType();
auto format = TensorUtils::getDescribe(mOutputTensors[i])->dimensionFormat;
info.order = Express::NHWC;
if (format == MNN_DATA_FORMAT_NCHW) {
info.order = Express::NCHW;
} else if (format == MNN_DATA_FORMAT_NC4HW4) {
info.order = Express::NC4HW4;
}
outputs[mOutputFromTensor[i]] = Express::Variable::create(Express::Expr::create(std::move(info), mOutputTensors[i]->host<void>(), Express::VARP::CONSTANT, true), 0);
//::memcpy(outputs[i]->writeMap<void>(), mOutputTensors[i]->host<void>(), mOutputTensors[i]->size());
}
return outputs;
}
Module* StaticModule::clone(CloneContext* ctx) const {
StaticModule* module(new StaticModule);
module->mInputs = mInputs;
module->mOutputs = mOutputs;
module->mShapeFix = mShapeFix;
module->mOutputNumbers = mOutputNumbers;
module->mOutputFromInput = mOutputFromInput;
module->mOutputFromTensor = mOutputFromTensor;
if (mOutputFromTensor.empty()) {
return this->cloneBaseTo(ctx, module);
}
module->mNet = mNet;
auto rt = Express::ExecutorScope::Current()->getRuntime();
ScheduleConfig config;
config.numThread = 1;
config.type = rt.first.begin()->first;
config.saveTensors = mOutputs;
module->mSession = module->mNet->createSession(config, rt);
module->mInputTensors.resize(mInputs.size());
module->mOutputTensors.resize(mOutputFromTensor.size());
for (int i=0; i<mInputs.size(); ++i) {
module->mInputTensors[i] =
module->mNet->getSessionInput(module->mSession, mInputs[i].c_str());
}
for (int i=0; i<mOutputFromTensor.size(); ++i) {
module->mOutputTensors[i] = module->mNet->getSessionOutput(
module->mSession, mOutputs[mOutputFromTensor[i]].c_str());
}
return this->cloneBaseTo(ctx, module);
}
}
}

View File

@ -0,0 +1,44 @@
//
// StaticModule.hpp
// MNN
//
// Created by MNN on b'2020/09/10'.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef StaticModule_hpp
#define StaticModule_hpp
#include <MNN/expr/Module.hpp>
#include <MNN/Interpreter.hpp>
namespace MNN {
namespace Express {
class StaticModule : public Module {
public:
StaticModule(const void* buffer, size_t length, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool shapeFix = false);
virtual ~ StaticModule();
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
private:
StaticModule() = default;
Module* clone(CloneContext* ctx) const override;
std::vector<std::string> mInputs;
std::vector<std::string> mOutputs;
std::shared_ptr<Interpreter> mNet;
Session* mSession;
std::vector<Tensor*> mInputTensors;
std::vector<Tensor*> mOutputTensors;
bool mShapeFix;
int mOutputNumbers;
// First: outputIndex, Second: outputTensor Index
std::vector<int> mOutputFromTensor;
// First: outputIndex, Second: input var index
std::vector<std::pair<int, int>> mOutputFromInput;
};
}
}
#endif

View File

@ -0,0 +1,186 @@
//
// WhileModule.cpp
// MNN
//
// Created by MNN on b'2020/09/10'.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "WhileModule.hpp"
#include <MNN/expr/ExprCreator.hpp>
#include "MNN_generated.h"
//#define MNN_OPEN_TIME_TRACE
#include <MNN/AutoTime.hpp>
namespace MNN {
namespace Express {
static int _findPos(const std::vector<std::string>& names, const std::string& key) {
for (int i=0; i<names.size(); ++i) {
if (names[i] == key) {
return i;
}
}
return -1;
}
WhileModule* WhileModule::create(const Op* op, const std::map<std::string, SubGraph>& subGraph) {
auto module = new WhileModule;
auto whileParam = op->main_as_WhileParam();
auto& body = subGraph.find(whileParam->body_graph()->str())->second;
auto& cond = subGraph.find(whileParam->cond_graph()->str())->second;
module->mBody = body.m;
module->mCond = cond.m;
/** Compute map index
int mCondInputNumber;
int mBodyInputNumber;
// First mCondInputs' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForCond;
// First mBodyInputs' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForBody;
std::vector<int> mOutputFromBody;
std::vector<std::pair<int, int>> mUpdateForCond;
std::vector<std::pair<int, int>> mUpdateForBody;
std::vector<std::pair<int, int>> mCondUpdateForCond;
std::vector<std::pair<int, int>> mCondUpdateForBody;
*/
// Map Inputs
module->mBodyInputNumber = body.inputs.size();
module->mCondInputNumber = cond.inputs.size();
for (int i=0; i<whileParam->aliases_inputs()->size(); ++i) {
auto index = i;
auto data = whileParam->aliases_inputs()->GetAs<StringVec>(i);
for (int s=0; s<data->data()->size(); ++s) {
auto name = data->data()->GetAsString(s)->str();
auto bodyInputPos = _findPos(body.inputs, name);
if (bodyInputPos >= 0) {
module->mInputForBody.emplace_back(std::make_pair(bodyInputPos, i));
}
auto condInputPos = _findPos(cond.inputs, name);
if (condInputPos >= 0) {
module->mInputForCond.emplace_back(std::make_pair(condInputPos, i));
}
}
}
// Map update
auto update = whileParam->aliases_updates();
std::map<int, int> replaceOutputs;
for (int i=0; i<update->size(); ++i) {
auto data = update->GetAs<StringVec>(i);
int bodyInputPos = -1;
int condInputPos = -1;
int bodyOutputPos = -1;
int condOutputPos = -1;
MNN_ASSERT(2 == data->data()->size());
auto outputName = data->data()->GetAsString(0)->str();
auto inputName = data->data()->GetAsString(1)->str();
bodyInputPos = _findPos(body.inputs, inputName);
condInputPos = _findPos(cond.inputs, inputName);
bodyOutputPos = _findPos(body.outputs, outputName);
condOutputPos = _findPos(cond.outputs, outputName);
auto updateBodyOutputPos = _findPos(body.outputs, inputName);
MNN_ASSERT(bodyOutputPos == -1 || condOutputPos == -1);
if (condOutputPos >= 0) {
if (bodyInputPos >= 0) {
module->mCondUpdateForBody.emplace_back(std::make_pair(bodyInputPos, condOutputPos));
}
if (condInputPos >= 0) {
module->mCondUpdateForCond.emplace_back(std::make_pair(condInputPos, condOutputPos));
}
}
if (bodyOutputPos >= 0) {
if (bodyInputPos >= 0) {
module->mUpdateForBody.emplace_back(std::make_pair(bodyInputPos, bodyOutputPos));
}
if (condInputPos >= 0) {
module->mUpdateForCond.emplace_back(std::make_pair(condInputPos, bodyOutputPos));
}
if (updateBodyOutputPos >= 0) {
replaceOutputs.insert(std::make_pair(updateBodyOutputPos, bodyOutputPos));
}
}
}
// Map outputs
auto output = whileParam->aliases_outputs();
for (int i=0; i<output->size(); ++i) {
auto data = output->GetAsString(i);
auto pos = _findPos(body.outputs, data->str());
MNN_ASSERT(pos >= 0);
if (replaceOutputs.find(pos) != replaceOutputs.end()) {
pos = replaceOutputs[pos];
}
module->mOutputFromBody.emplace_back(pos);
}
return module;
}
std::vector<Express::VARP> WhileModule::onForward(const std::vector<Express::VARP>& inputsI) {
std::vector<Express::VARP> condInputs(mCondInputNumber);
std::vector<Express::VARP> bodyInputs(mBodyInputNumber);
auto& inputs = inputsI;
for (auto& p : mInputForCond) {
condInputs[p.first] = inputs[p.second];
}
for (auto& p : mInputForBody) {
bodyInputs[p.first] = inputs[p.second];
}
std::vector<Express::VARP> outputs(mOutputFromBody.size());
while (true) {
auto res = mCond->onForward(condInputs)[0];
auto resPtr = res->readMap<int>();
if (resPtr[0] <= 0) {
break;
}
auto bodyOutputs = mBody->onForward(bodyInputs);
Express::Variable::prepareCompute(bodyOutputs);
for (int i=0; i<bodyOutputs.size(); ++i) {
auto p = bodyOutputs[i];
if (p->expr().first->get() != nullptr) {
auto ptr = p->readMap<void>();
auto info = p->getInfo();
auto newV = Express::_Input(info->dim, info->order, info->type);
if (nullptr != ptr) {
::memcpy(newV->writeMap<void>(), ptr, info->type.bytes() * info->size);
}
bodyOutputs[i] = newV;
}
}
for (int i=0; i<mOutputFromBody.size(); ++i) {
outputs[i] = bodyOutputs[mOutputFromBody[i]];
}
for (auto& p : mUpdateForCond) {
condInputs[p.first] = bodyOutputs[p.second];
}
for (auto& p : mUpdateForBody) {
bodyInputs[p.first] = bodyOutputs[p.second];
}
for (auto& p : mCondUpdateForCond) {
condInputs[p.first] = res;
}
for (auto& p : mCondUpdateForBody) {
bodyInputs[p.first] = res;
}
}
return outputs;
}
Module* WhileModule::clone(CloneContext* ctx) const {
WhileModule* module(new WhileModule);
module->mCondInputNumber = mCondInputNumber;
module->mBodyInputNumber = mBodyInputNumber;
module->mInputForCond = mInputForCond;
module->mInputForBody = mInputForBody;
module->mOutputFromBody = mOutputFromBody;
module->mUpdateForCond = mUpdateForCond;
module->mUpdateForBody = mUpdateForBody;
module->mCondUpdateForCond = mCondUpdateForCond;
module->mCondUpdateForBody = mCondUpdateForBody;
module->mCond.reset(mCond->clone(ctx));
module->mBody.reset(mBody->clone(ctx));
return this->cloneBaseTo(ctx, module);
}
};
};

View File

@ -0,0 +1,46 @@
//
// WhileModule.hpp
// MNN
//
// Created by MNN on b'2020/09/10'.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef WhileModule_hpp
#define WhileModule_hpp
#include <MNN/expr/Module.hpp>
namespace MNN {
namespace Express {
class WhileModule : public Module {
public:
virtual ~ WhileModule() {
// Do nothing
}
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
static WhileModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
private:
WhileModule(){}
Module* clone(CloneContext* ctx) const override;
int mCondInputNumber;
int mBodyInputNumber;
// First mCondInputs' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForCond;
// First mBodyInputs' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForBody;
std::vector<int> mOutputFromBody;
std::vector<std::pair<int, int>> mUpdateForCond;
std::vector<std::pair<int, int>> mUpdateForBody;
std::vector<std::pair<int, int>> mCondUpdateForCond;
std::vector<std::pair<int, int>> mCondUpdateForBody;
std::shared_ptr<Module> mCond;
std::shared_ptr<Module> mBody;
};
}
}
#endif

View File

@ -11,6 +11,7 @@
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <MNN/ErrorCode.hpp>
#include <MNN/MNNForwardType.h>
@ -67,6 +68,7 @@ class Session;
struct Content;
class Tensor;
class Backend;
class Runtime;
class MNN_PUBLIC OperatorInfo {
struct Info;
@ -89,6 +91,7 @@ protected:
typedef std::function<bool(const std::vector<Tensor*>&, const std::string& /*opName*/)> TensorCallBack;
typedef std::function<bool(const std::vector<Tensor*>&, const OperatorInfo*)> TensorCallBackWithInfo;
typedef std::pair<std::map<MNNForwardType, std::shared_ptr<Runtime>>, std::shared_ptr<Runtime>> RuntimeInfo;
/** net data holder. multiple sessions could share same net. */
class MNN_PUBLIC Interpreter {
@ -108,7 +111,43 @@ public:
static Interpreter* createFromBuffer(const void* buffer, size_t size);
~Interpreter();
enum SessionMode {
/** About CallBack, Default Session_Debug*/
/** runSessionWithCallBack is allowed and can get internal op info*/
Session_Debug = 0,
/** runSessionWithCallBack is not valid and can't get any info of op in session*/
Session_Release = 1,
/** About input tenosr, Default Session_Input_Inside*/
/** The input tensor is alloced by session, input data after session resized*/
Session_Input_Inside = 2,
/** The input tensor is alloced by user, set input data before session resize*/
Session_Input_User = 3,
};
/**
* @brief The API shoud be called before create session.
* @param mode session mode
* @return void
*/
void setSessionMode(SessionMode mode);
/**
* @brief The API shoud be called before create session.
* If the cache exist, try to load cache from file.
* After createSession, try to save cache to file.
* @param cacheFile cache file name
* @param keySize the first `keySize` bytes used as the key to check if the `cacheFile` exists.
* @return void
*/
void setCacheFile(const char* cacheFile, size_t keySize = 128);
public:
/**
* @brief create runtimeInfo seperately with schedule config.
* @param config session schedule configs.
*/
static RuntimeInfo createRuntime(const std::vector<ScheduleConfig>& configs);
/**
* @brief create session with schedule config. created session will be managed in net.
* @param config session schedule config.
@ -116,6 +155,13 @@ public:
*/
Session* createSession(const ScheduleConfig& config);
/**
* @brief create session with schedule config and user-specified runtime.
* @param config session schedule config, runtime runtimeInfo used by the created session.
* @return created session if success, NULL otherwise.
*/
Session* createSession(const ScheduleConfig& config, const RuntimeInfo& runtime);
/**
* @brief create multi-path session with schedule configs. created session will be managed in net.
* @param configs session schedule configs.
@ -123,6 +169,14 @@ public:
*/
Session* createMultiPathSession(const std::vector<ScheduleConfig>& configs);
/**
* @brief create multi-path session with schedule configs and user-specified runtime.
created session will be managed in net.
* @param configs session schedule configs.
* @return created session if success, NULL otherwise.
*/
Session* createMultiPathSession(const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtime);
/**
* @brief release session.
* @param session given session.
@ -204,17 +258,39 @@ public:
*/
Tensor* getSessionOutput(const Session* session, const char* name);
enum SessionInfoCode {
/** memory session used in MB, float* */
MEMORY = 0,
/** float operation needed in session in M, float* */
FLOPS = 1,
/** Backends in session in M, int*, length >= the configs when create session */
BACKENDS = 2,
ALL
};
/**
* @brief get all input tensors.
* @brief get session info
* @param session given session.
* @return all input tensors mapped with name.
* @param code given info code.
* @param void* given info ptr, see SessionInfoCode for detail
* @return true if support the code, false otherwise.
*/
const std::map<std::string, Tensor*>& getSessionOutputAll(const Session* session) const;
bool getSesionInfo(const Session* session, SessionInfoCode code, void* ptr);
/**
* @brief get all output tensors.
* @param session given session.
* @return all output tensors mapped with name.
*/
const std::map<std::string, Tensor*>& getSessionOutputAll(const Session* session) const;
/**
* @brief get all input tensors.
* @param session given session.
* @return all input tensors mapped with name.
*/
const std::map<std::string, Tensor*>& getSessionInputAll(const Session* session) const;
public:

View File

@ -38,13 +38,7 @@
} \
}
#else
#define MNN_ASSERT(x) \
{ \
int res = (x); \
if (!res) { \
MNN_ERROR("Error for %d\n", __LINE__); \
} \
}
#define MNN_ASSERT(x)
#endif
#define FUNC_PRINT(x) MNN_PRINT(#x "=%d in %s, %d \n", x, __func__, __LINE__);

View File

@ -23,8 +23,8 @@ typedef enum {
/*Hand write metal*/
MNN_FORWARD_METAL = 1,
/*Use IOS's MPS instead of hand-write metal, Not Support yet*/
MNN_FORWARD_MPS = 2,
/*NVIDIA GPU API*/
MNN_FORWARD_CUDA = 2,
/*Android / Common Device GPU API*/
MNN_FORWARD_OPENCL = 3,
@ -41,13 +41,13 @@ typedef enum {
MNN_FORWARD_USER_3 = 11,
MNN_FORWARD_ALL,
/* Apply arm extension instruction set to accelerate some Ops, this forward type
is only used in MNN internal, and will be active automatically when user set forward type
to be MNN_FORWARD_CPU and extension instruction set is valid on hardware.
*/
MNN_FORWARD_CPU_EXTENSION
} MNNForwardType;
#ifdef __cplusplus
namespace MNN {

View File

@ -12,6 +12,7 @@
#include <vector>
#include <MNN/HalideRuntime.h>
#include <MNN/MNNDefine.h>
#define MNN_MAX_TENSOR_DIM 6
namespace MNN {

View File

@ -10,6 +10,7 @@
#include <MNN/ErrorCode.hpp>
#include <MNN/expr/Expr.hpp>
#include <MNN/Tensor.hpp>
#include <MNN/Interpreter.hpp>
#include <vector>
#include <mutex>
#include <set>
@ -17,41 +18,19 @@
namespace MNN {
class Backend;
class Execution;
class Runtime;
struct Op;
namespace Express {
class MNN_PUBLIC Executor {
public:
class ComputeCache {
public:
void setShapeDirty(int offset, Variable::Info* info);
void setContentDirty();
void setContentReady();
void syncInput(int offset, const Variable::Info* info);
void syncOutput(int offset, Variable::Info* info);
struct TensorContent {
std::shared_ptr<Tensor> tensor;
int refCount = 0;
void reset();
bool aliveOutside = false;
};
struct Unit;
virtual ~ ComputeCache() {}
ComputeCache() {}
virtual ErrorCode compute() = 0;
virtual ErrorCode resize() = 0;
protected:
// Get the index tensor with the need of needBackend
// If the Tensor don't belong to the backend, need use needBackend to alloc it and return
virtual Tensor* getTensor(int index, bool host) = 0;
void _setShapeDirty();
friend class Executor;
bool mContentDirty = true;
bool mShapeDirty = true;
};
class ComputeCache;
struct Unit;
static void setShapeDirty(ComputeCache* cache);
static void setContentDirty(ComputeCache* cache);
static void* mapOutput(ComputeCache* cache, int offset, Tensor* dest);
struct Requirement {
std::vector<bool> contentNeedContent;
std::vector<bool> shapeNeedContent;
std::vector<bool> supportError;
};
~Executor();
Requirement getRequirement(Expr* expr) const;
@ -65,25 +44,27 @@ public:
};
void gc(GCFlag flag = FULL);
static std::shared_ptr<Executor> getGlobalExecutor();
static std::shared_ptr<Executor> newExecutor(MNNForwardType type,
const BackendConfig& config,
int numberThread);
void resetProfile();
void dumpProfile();
void addOpCostTime(int op, float costTime);
void addOpCostTime(const std::string& type, float costTime);
void addOpFlops(const std::string& type, float flops);
class Profiler;
static RuntimeInfo getRuntime();
private:
void _createSingle(EXPRP expr);
void _create(const std::vector<EXPRP>& outputs, std::set<std::shared_ptr<Executor::ComputeCache>>&& inputCaches, std::vector<ComputeCache::TensorContent>&& tensors, bool forceCPU);
void _makeCache(const std::vector<EXPRP>& outputs, bool forceCPU);
void _create(const std::vector<EXPRP>& outputs, std::set<std::shared_ptr<Executor::ComputeCache>>&& inputCaches, std::set<std::shared_ptr<Expr::Inside>>&& inputNode, bool forceCPU);
void _addToCache(const std::vector<std::shared_ptr<ComputeCache>>& caches);
void _resetCache();
void _visit(EXPRP expr, std::set<std::shared_ptr<Executor::ComputeCache>>& inputCaches, std::vector<ComputeCache::TensorContent>& tensors);
void _visit(EXPRP expr, std::set<std::shared_ptr<Executor::ComputeCache>>& inputCaches, std::set<std::shared_ptr<Expr::Inside>>& inputNode);
Executor(std::shared_ptr<Backend> backend);
std::shared_ptr<Backend> mBackend;
std::shared_ptr<Backend> mBackupBackend;
Executor(std::shared_ptr<Runtime> backend, MNNForwardType type);
std::pair<std::shared_ptr<Runtime>, MNNForwardType> mRuntime;
std::pair<std::shared_ptr<Runtime>, MNNForwardType> mBackupRuntime;
std::mutex mMutex;
std::vector<std::shared_ptr<Tensor>> mStack;
std::vector<Tensor*> mStackInputs;
std::vector<Tensor*> mStackOutputs;
std::shared_ptr<Profiler> mProfiler;
};
} // namespace Express

View File

@ -0,0 +1,33 @@
//
// ExecutorScope.hpp
// MNN
//
// Created by MNN on 2020/10/26.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef MNN_EXPR_EXECUTOR_SCOPE_HPP_
#define MNN_EXPR_EXECUTOR_SCOPE_HPP_
#include <MNN/expr/Executor.hpp>
namespace MNN {
namespace Express {
struct ExecutorScope final {
public:
ExecutorScope() = delete;
explicit ExecutorScope(const ExecutorScope&) = delete;
explicit ExecutorScope(const std::shared_ptr<Executor>& current);
explicit ExecutorScope(const std::string& scope_name,
const std::shared_ptr<Executor>& current);
virtual ~ExecutorScope();
static const std::shared_ptr<Executor> Current();
};
} // namespace MNN
} // namespace Express
#endif // MNN_EXPR_EXECUTOR_SCOPE_HPP_

View File

@ -87,6 +87,7 @@ public:
};
bool fix(InputType type) const;
private:
friend class Variable;
std::shared_ptr<Variable> mContent;
};
inline bool operator==(Variable* src, VARP dst) {
@ -107,7 +108,6 @@ public:
INTS dim;
halide_type_t type;
int size;
void* ptr = nullptr;
void syncSize();
};
const std::string& name() const;
@ -173,7 +173,7 @@ private:
class MNN_PUBLIC Expr {
public:
struct Inside;
static EXPRP create(Variable::Info&& info);
static EXPRP create(Variable::Info&& info, const void* ptr, VARP::InputType type, bool copy = true);
static EXPRP create(const OpT* op, std::vector<VARP> inputs, int outputSize = 1);
static EXPRP create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize = 1);
static EXPRP create(std::unique_ptr<OpT>&& op, std::vector<VARP> inputs, int outputSize = 1) {
@ -188,7 +188,7 @@ public:
return mInputs;
}
int outputSize() const {
return mOutputNames.size();
return (int)mOutputNames.size();
}
static void replace(EXPRP oldExpr, EXPRP newExpr);
bool requireInfo();

View File

@ -8,9 +8,14 @@
#ifndef MNN_Train_Module_hpp
#define MNN_Train_Module_hpp
#include <vector>
#include <unordered_map>
#include <MNN/expr/Expr.hpp>
namespace MNN {
namespace Train {
namespace Express {
class MNN_PUBLIC Module {
public:
Module() = default;
@ -21,9 +26,6 @@ public:
bool loadParameters(const std::vector<Express::VARP>& parameters);
void setIsTraining(const bool isTraining);
bool getIsTraining();
static std::shared_ptr<Module> transform(const std::vector<Express::VARP>& inputs,
const std::vector<Express::VARP>& outputs);
void clearCache();
const std::string& name() const {
@ -38,12 +40,45 @@ public:
void setType(std::string type) {
mType = std::move(type);
}
// Return the parameter index
int addParameter(Express::VARP parameter);
void setParameter(Express::VARP parameter, int index);
static Module* createEmpty(const std::vector<Express::VARP>& parameters);
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic = false);
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const char* fileName, bool dynamic = false);
static Module* clone(const Module* module, const bool shareParams = false);
class CloneContext {
public:
CloneContext() = default;
explicit CloneContext(const bool shareParams)
: mShareParams(shareParams) {}
virtual ~CloneContext() = default;
const bool shareParams() const { return mShareParams; }
EXPRP getOrClone(const EXPRP expr);
VARP getOrClone(const VARP var);
private:
bool mShareParams = false;
std::unordered_map<const Expr*, EXPRP> mExprMap;
std::unordered_map<const Variable*, VARP> mVarMap;
};
virtual Module* clone(CloneContext* ctx) const {
return nullptr;
}
protected:
void registerModel(const std::vector<std::shared_ptr<Module>>& children);
void addParameter(Express::VARP parameter);
virtual void onClearCache() {
}
Module* cloneBaseTo(CloneContext* ctx, Module* module) const;
private:
void _collectParameters(std::vector<Express::VARP>& result) const;
std::vector<std::shared_ptr<Module>> mChildren;
@ -52,6 +87,13 @@ private:
std::string mName;
std::string mType;
};
struct SubGraph {
std::vector<std::string> inputs;
std::vector<std::string> outputs;
std::shared_ptr<Module> m;
};
} // namespace Train
} // namespace MNN

View File

@ -9,11 +9,10 @@
#ifndef MNN_Train_NN_hpp
#define MNN_Train_NN_hpp
#include <MNN/expr/ExprCreator.hpp>
#include "Distributions.hpp"
#include "Module.hpp"
#include <MNN/expr/Module.hpp>
#include <vector>
namespace MNN {
namespace Train {
namespace Express {
class Initializer;
class MNN_PUBLIC NN {
@ -29,7 +28,7 @@ public:
};
enum FeatureScaleStatMethod {
PerTensor = 0,
PerChannel = 1
PerChannel = 1 // Depercerate
};
/* Unlike enum in class, class in class need be dllimport or dllexport explcility.
Compiling in other system will not be affected.
@ -86,7 +85,7 @@ public:
static ConvParameters ExtractConvolution(Express::EXPRP expr);
// Extract BatchNormal and Dropout
static Module* ExtractNotRunableOp(Express::EXPRP expr);
static Module* ExtractNotRunableOp(Express::EXPRP expr, const std::map<std::string, SubGraph>& subgraphs);
};
};

View File

@ -31,25 +31,30 @@ MNN_PUBLIC VARP _Const(const void* ptr, INTS shape = {}, Dimensionformat format
MNN_PUBLIC VARP _TrainableParam(float value, INTS dims, Dimensionformat format);
MNN_PUBLIC VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format,
halide_type_t type = halide_type_of<float>());
MNN_PUBLIC VARP _InnerProduct(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS outputShape);
MNN_PUBLIC VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1},
INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
MNN_PUBLIC VARP _Conv(float weight, float bias, VARP x, INTS channel, INTS kernelSize, PaddingMode pad = VALID,
INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1);
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false, int nbits = 8);
MNN_PUBLIC VARP _Conv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
MNN_PUBLIC VARP _Deconv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1},
INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
MNN_PUBLIC VARP _Deconv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
MNN_PUBLIC VARP _MaxPool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0});
MNN_PUBLIC VARP _AvePool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0});
MNN_PUBLIC VARP _Reshape(VARP x, INTS shape, Dimensionformat original_format = NHWC);
MNN_PUBLIC VARP _Reshape(VARP x, INTS shape, Dimensionformat original_format = NCHW);
MNN_PUBLIC VARP _Reshape(VARP x, VARP shape);
MNN_PUBLIC VARP _Scale(VARP x, int channels, std::vector<float>&& scales, std::vector<float>&& bias);
MNN_PUBLIC VARP _Relu(VARP x, float slope = 0.0f);
MNN_PUBLIC VARP _Relu6(VARP x);
MNN_PUBLIC VARP _Relu6(VARP x, float minValue = 0.0f, float maxValue = 6.0f);
MNN_PUBLIC VARP _PRelu(VARP x, std::vector<float> &&slopes);
MNN_PUBLIC VARP _Softmax(VARP logits, int axis = -1);
MNN_PUBLIC VARP _Softplus(VARP features);
@ -76,7 +81,7 @@ MNN_PUBLIC VARP _Pad(VARP x, VARP paddings, PadValueMode mode = CONSTANT);
MNN_PUBLIC VARP _ExpandDims(VARP input, int axis);
MNN_PUBLIC VARP _ExpandDims(VARP input, VARP axis);
MNN_PUBLIC VARP _Shape(VARP input);
MNN_PUBLIC VARP _Shape(VARP input, bool nchw = false);
MNN_PUBLIC VARP _Stack(VARPS values, int axis=0);
enum InterpolationMethod {BILINEAR, NEAREST};
MNN_PUBLIC VARP _CropAndResize(VARP image, VARP boxes, VARP box_ind, VARP crop_size,
@ -92,6 +97,7 @@ MNN_PUBLIC VARP _GatherND(VARP params, VARP indices);
MNN_PUBLIC VARP _Selu(VARP features, float scale, float alpha);
MNN_PUBLIC VARP _Size(VARP input);
MNN_PUBLIC VARP _Elu(VARP features, float alpha=1.0);
MNN_PUBLIC VARP _Threshold(VARP features, float alpha=1.0);
MNN_PUBLIC VARP _MatrixBandPart(VARP input, VARP num_lower, VARP num_upper);
MNN_PUBLIC std::vector<VARP> _Moments(VARP x, INTS axis, VARP shift, bool keepDims);
MNN_PUBLIC VARP _SetDiff1D(VARP x, VARP y);
@ -123,7 +129,8 @@ MNN_PUBLIC VARP _ZeroGrad(VARP x);
// Int8 Inference
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu);
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, int nbits = 8);
MNN_PUBLIC VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim);
MNN_PUBLIC VARP _FloatToInt8(VARP x, VARP scale, char minValue, char maxValue);
MNN_PUBLIC VARP _Int8ToFloat(VARP x, VARP scale);

102
include/MNN/expr/Scope.hpp Normal file
View File

@ -0,0 +1,102 @@
//
// RuntimeScope.hpp
// MNN
//
// Created by MNN on 2020/10/26.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef MNN_EXPR_SCOPE_HPP_
#define MNN_EXPR_SCOPE_HPP_
#include <cstdio>
#include <vector>
#include <string>
#include <mutex>
#include <MNN/Interpreter.hpp>
namespace MNN {
namespace Express {
template <typename T>
class Scope {
public:
Scope();
virtual ~Scope() = default;
struct ScopedContent {
std::string scope_name;
T content;
};
void EnterScope(const ScopedContent& current);
void EnterScope(const T& current);
void EnterScope(const std::string& scope_name, const T& current);
void ExitScope();
const ScopedContent& Current() const;
int ScopedLevel() const { return scoped_level_; }
private:
std::string MakeScopeName(const std::string& prefix, int level) const;
mutable std::mutex mutex_;
int scoped_level_ = 0;
std::vector<ScopedContent> scoped_contents_;
};
template <typename T>
Scope<T>::Scope() : scoped_level_(0) {
}
template <typename T>
void Scope<T>::EnterScope(const ScopedContent& current) {
std::lock_guard<std::mutex> lock(mutex_);
++scoped_level_;
scoped_contents_.push_back(current);
}
template <typename T>
void Scope<T>::EnterScope(const T& current) {
EnterScope("scope", current);
}
template <typename T>
void Scope<T>::EnterScope(const std::string& scope_name,
const T& current) {
std::lock_guard<std::mutex> lock(mutex_);
int scoped_level = ScopedLevel();
std::string name = MakeScopeName(scope_name, scoped_level++);
ScopedContent content{name, current};
++scoped_level_;
scoped_contents_.push_back(content);
}
template <typename T>
void Scope<T>::ExitScope() {
std::lock_guard<std::mutex> lock(mutex_);
--scoped_level_;
scoped_contents_.resize(scoped_level_);
}
template <typename T>
const typename Scope<T>::ScopedContent& Scope<T>::Current() const {
std::lock_guard<std::mutex> lock(mutex_);
MNN_CHECK(scoped_contents_.size() > 0, "Scope level should not be 0.");
return scoped_contents_.back();
}
template <typename T>
std::string Scope<T>::MakeScopeName(const std::string& prefix,
int level) const {
char s[16];
snprintf(s, 16, "%d", level);
return prefix + "/" + std::string(s);
}
} // namespace Express
} // namespace MNN
#endif // MNN_EXPR_SCOPE_HPP_

View File

@ -1,12 +1,14 @@
# MNN_Windows
# |------- MNN_Windows_lib
# |---------- Dynamic_Library
# |---------- Static_Library
# |------- MNN_Windows_tools
# MNN
# |-- Debug
# | |--- MD
# | |--- MT
# |-- Release
# |--- MD
# |--- MT
$erroractionpreference = "stop"
Set-Variable -Name WINDOWS_PACKAGE_NAME -Value "MNN_Windows"
Set-Variable -Name WINDOWS_PACKAGE_NAME -Value "MNN"
#clear and create package directory
powershell ./schema/generate.ps1
@ -14,32 +16,50 @@ Set-Variable -Name WINDOWS_PACKAGE_PATH -Value "$(pwd)\$WINDOWS_PACKAGE_NAME"
Remove-Item $WINDOWS_PACKAGE_PATH -Recurse -ErrorAction Ignore
mkdir $WINDOWS_PACKAGE_PATH\
cd $WINDOWS_PACKAGE_PATH
mkdir -p MNN_Windows_lib\Dynamic_Library
mkdir -p MNN_Windows_lib\Static_Library
mkdir MNN_Windows_tools
mkdir -p Debug\MD
mkdir -p Debug\MT
mkdir -p Release\MD
mkdir -p Release\MT
cd ..
Remove-Item build -Recurse -ErrorAction Ignore
mkdir build
cd build
pushd build
# tools without dependency, static library without sep_build
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_BUILD_CONVERTER=ON -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_TRAIN=ON -DMNN_BUILD_DEMO=ON -DMNN_BUILD_QUANTOOLS=ON -DMNN_EVALUATION=ON ..
ninja
pushd $WINDOWS_PACKAGE_PATH
cp ..\build\*.exe MNN_Windows_tools
cp ..\build\*.pdb MNN_Windows_tools
cp ..\build\MNN.lib MNN_Windows_lib\Static_Library
popd
#cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_BUILD_CONVERTER=ON -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_TRAIN=ON -DMNN_BUILD_DEMO=ON -DMNN_BUILD_QUANTOOLS=ON -DMNN_EVALUATION=ON ..
#ninja
#pushd $WINDOWS_PACKAGE_PATH
#cp ..\build\*.exe MNN_Windows_tools
#cp ..\build\*.pdb MNN_Windows_tools
#cp ..\build\MNN.lib MNN_Windows_lib\Static_Library
#popd
#dynamic library without sep_build
rm .\CMakeCache.txt
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF ..
Remove-Item CMakeCache.txt -ErrorAction Ignore
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Debug -DMNN_WIN_RUNTIME_MT=ON -DMNN_OPENCL=ON ..
ninja
cd $WINDOWS_PACKAGE_PATH
cp ..\build\MNN.lib MNN_Windows_lib\Dynamic_Library
cp ..\build\MNN.dll MNN_Windows_lib\Dynamic_Library
cp ..\build\MNN.pdb MNN_Windows_lib\Dynamic_Library
cp MNN.lib $WINDOWS_PACKAGE_PATH\Debug\MT
cp MNN.dll $WINDOWS_PACKAGE_PATH\Debug\MT
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Debug\MT
# Compress MNN_Windows_lib and MNN_Windows_tools
Compress-Archive -Path MNN_Windows_lib -DestinationPath MNN_Windows_lib.zip -Update -CompressionLevel Optimal
Compress-Archive -Path MNN_Windows_tools -DestinationPath MNN_Windows_tools.zip -Update -CompressionLevel Optimal
Remove-Item CMakeCache.txt -ErrorAction Ignore
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Debug -DMNN_WIN_RUNTIME_MT=OFF -DMNN_OPENCL=ON ..
ninja
cp MNN.lib $WINDOWS_PACKAGE_PATH\Debug\MD
cp MNN.dll $WINDOWS_PACKAGE_PATH\Debug\MD
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Debug\MD
Remove-Item CMakeCache.txt -ErrorAction Ignore
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Release -DMNN_WIN_RUNTIME_MT=ON -DMNN_OPENCL=ON ..
ninja
cp MNN.lib $WINDOWS_PACKAGE_PATH\Release\MT
cp MNN.dll $WINDOWS_PACKAGE_PATH\Release\MT
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Release\MT
Remove-Item CMakeCache.txt -ErrorAction Ignore
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Release -DMNN_WIN_RUNTIME_MT=OFF -DMNN_OPENCL=ON ..
ninja
cp MNN.lib $WINDOWS_PACKAGE_PATH\Release\MD
cp MNN.dll $WINDOWS_PACKAGE_PATH\Release\MD
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Release\MD
popd

View File

@ -8,15 +8,14 @@ set_target_properties(
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN.so
)
add_library( MNN_Arm82 SHARED IMPORTED GLOBAL)
set_target_properties(
MNN_Arm82
PROPERTIES IMPORTED_LOCATION
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_Arm82.so
)
add_library( MNN_CL SHARED IMPORTED GLOBAL )
set_target_properties( MNN_CL
PROPERTIES IMPORTED_LOCATION
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_CL.so
)
)
add_library( MNN_Express SHARED IMPORTED GLOBAL )
set_target_properties( MNN_Express
PROPERTIES IMPORTED_LOCATION
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_Express.so
)

View File

@ -5,7 +5,6 @@ adb push ./libMNN_CL.so /data/local/tmp/MNN/libMNN_CL.so
adb push ./libMNN_Vulkan.so /data/local/tmp/MNN/libMNN_Vulkan.so
adb push ./libMNN_GL.so /data/local/tmp/MNN/libMNN_GL.so
adb push ./libMNN_Express.so /data/local/tmp/MNN/libMNN_Express.so
adb push ./libMNN_Arm82.so /data/local/tmp/MNN/libMNN_Arm82.so
adb push ./MNNV2Basic.out /data/local/tmp/MNN/MNNV2Basic.out
adb shell "cd /data/local/tmp/MNN && rm -r output"
adb shell "cd /data/local/tmp/MNN && mkdir output"
@ -18,3 +17,4 @@ adb push ./timeProfile.out /data/local/tmp/MNN/timeProfile.out
adb push ./train.out /data/local/tmp/MNN/train.out
adb push ./benchmark.out /data/local/tmp/MNN/benchmark.out
adb push ./benchmarkExprModels.out /data/local/tmp/MNN/benchmarkExprModels.out
adb push ./run_test.out /data/local/tmp/MNN/run_test.out

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,8 @@
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>$(DEVELOPMENT_LANGUAGE)</string>
<key>CFBundleIdentifier</key>
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>

View File

@ -1,57 +0,0 @@
#!bin/sh
echo "Register Op Begin"
function read_dir(){
str1=`grep -e $2 $1/*.$4|sed s/[[:space:]]//g`
array=(${str1//\;/ })
for var in ${array[@]}; do
`echo $var|awk -F $3 '{
a="___";
b="__();";
c="extern void ";
print(c""a""$3"__"$4""b) >> "extern";
print (a""$3"__"$4""b) >> "call"
}'`
done
}
start=$(date +%s)
SEP='[:(,)]'
FILE_EXTERN_CPP='cpp'
FILE_EXTERN_MM='mm'
SHELL_FOLDER=$(dirname $0)'/../../..'
# handle CPU
CPUFILE=$SHELL_FOLDER/source/backend/cpu/CPUOPRegister.cpp
echo "// This file is generated by Shell for ops register\nnamespace MNN {\n#ifdef MNN_CODEGEN_REGISTER" > $CPUFILE
echo "Start Register CPU"
CPU=$SHELL_FOLDER/source/backend/cpu
CPU_KEY='REGISTER_CPU_OP_CREATOR'
read_dir $CPU $CPU_KEY $SEP $FILE_EXTERN_CPP
cat extern >> $CPUFILE
rm extern
echo '\nvoid registerCPUOps() {' >> $CPUFILE
cat call >> $CPUFILE
echo '}\n#endif\n}' >> $CPUFILE
rm call
# handle Shape
echo "Start Register Shape"
SHAPEFILE=$SHELL_FOLDER/source/shape/ShapeRegister.cpp
SHAPE=$SHELL_FOLDER/source/shape
SHAPE_KEY="REGISTER_SHAPE"
echo "// This file is generated by Shell for ops register\nnamespace MNN {\n#ifdef MNN_CODEGEN_REGISTER" > $SHAPEFILE
read_dir $SHAPE $SHAPE_KEY $SEP $FILE_EXTERN_CPP
cat extern >> $SHAPEFILE
rm extern
echo '\nvoid registerShapeOps() {' >> $SHAPEFILE
cat call >> $SHAPEFILE
echo '}\n#endif\n}' >> $SHAPEFILE
rm call
echo "Register Op End"
dur=$(echo "$(date +%s) - $start" | bc)
printf "Execution time: %.6f seconds" $dur

View File

@ -8,10 +8,14 @@
#import "AppDelegate.h"
#import "MNNTestSuite.h"
#import <MNN/expr/Executor.hpp>
@implementation AppDelegate
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
MNN::BackendConfig config;
// If want to test metal, change MNN_FORWARD_CPU to MNN_FORWARD_METAL
MNN::Express::Executor::getGlobalExecutor()->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 1);
MNNTestSuite::runAll();
return YES;
}

View File

@ -8,6 +8,9 @@ import cv2
def inference():
""" inference mobilenet_v1 using a specific picture """
interpreter = MNN.Interpreter("mobilenet_v1.mnn")
interpreter.setCacheFile('.tempcache')
config = {}
config['precision'] = 'low'
session = interpreter.createSession()
input_tensor = interpreter.getSessionInput(session)
image = cv2.imread('ILSVRC2012_val_00049999.JPEG')

View File

@ -96,8 +96,7 @@ def demo():
train_dataloader = MNN.data.DataLoader(train_dataset, batch_size = 64, shuffle = True)
test_dataloader = MNN.data.DataLoader(test_dataset, batch_size = 100, shuffle = False)
opt = MNN.optim.SGD(0.01, 0.9, 0.0005)
opt.append(model.parameters)
opt = MNN.optim.SGD(model, 0.01, 0.9, 0.0005)
F.set_thread_number(4)

View File

@ -125,8 +125,7 @@ def demo():
net = Net(feature_extractor, num_classes)
opt = MNN.optim.SGD(1e-3, 0.9, 0.00004)
opt.append(net.parameters)
opt = MNN.optim.SGD(net, 1e-3, 0.9, 0.00004)
for epoch in range(10):
train_func(net, train_dataloader, opt, num_classes)

View File

@ -0,0 +1,15 @@
import numpy as np
import MNN
nn = MNN.nn
F = MNN.expr
v0 = F.const([0.3,0.1, -0.3,0.4], [4])
v2 = F.const([0.3,0.1, -0.3,0.4], [4])
v1 = v0 * v0
outputDiff = F.const([0.05, 0.03, 0.02, 0.01], [4])
v0Grad = nn.grad(v1, [v0, v2], [outputDiff], "")
print(v0Grad)
print(v0Grad[0].read())
F.save(v0Grad, "temp.grad")

View File

@ -0,0 +1,36 @@
import numpy as np
import MNN
nn = MNN.nn
F = MNN.expr
class Net(nn.Module):
"""construct a lenet 5 model"""
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.conv(1, 20, [5, 5])
self.conv2 = nn.conv(20, 50, [5, 5])
self.fc1 = nn.linear(800, 500)
self.fc2 = nn.linear(500, 10)
self.step = F.const([10], [], F.NCHW, F.int)
self.lr = F.const([0.0004],[], F.NCHW, F.float)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool(x, [2, 2], [2, 2])
x = F.relu(self.conv2(x))
x = F.max_pool(x, [2, 2], [2, 2])
x = F.reshape(x, [0, -1])
x = F.relu(self.fc1(x))
x = self.fc2(x)
x = F.softmax(x, 1)
return x
model = Net()
F.save(model.parameters, 'mnist.snapshot')
model2 = Net()
model2.load_parameters(F.load_as_list('mnist.snapshot'))
print(model2.lr.read())
print(model2.step.read())

Some files were not shown because too many files have changed in this diff Show More