mirror of https://github.com/alibaba/MNN.git
Github release 1.1.0
This commit is contained in:
parent
939a80dba8
commit
d6795ad031
|
@ -330,7 +330,6 @@ project/android/.idea/caches/build_file_checksums.ser
|
|||
# FIXME(haijing): Xcode pre-build stage breaks compilation of flatbuffers by setting envs that do cmake cross-compilation for iOS
|
||||
# schema/current
|
||||
schema/private
|
||||
schema/current
|
||||
tools/converter/source/IR
|
||||
benchmark/benchmark.txt
|
||||
|
||||
|
@ -345,18 +344,13 @@ pymnn/android/.idea/modules.xml
|
|||
pymnn/android/.idea/runConfigurations.xml
|
||||
pymnn/android/.idea/vcs.xml
|
||||
pymnn/android/.idea/caches/build_file_checksums.ser
|
||||
pymnn/src/pybind_private/
|
||||
|
||||
buildios
|
||||
build*/
|
||||
include/MNN/VCS.h
|
||||
source/backend/opencl/execution/cl/codegen/opencl_program.cc
|
||||
source/backend/opencl/execution/cl/opencl_program.cc
|
||||
# FIXME(haijing): MTL issues.....
|
||||
# source/backend/metal/MetalOPRegister.mm
|
||||
source/backend/opengl/AllShader.cpp
|
||||
include/MNN/backend/opengl/shaders/AllShader.h
|
||||
source/backend/vulkan/compiler/AllShader.cpp
|
||||
include/MNN/backend/vulkan/shaders/AllShader.h
|
||||
.idea
|
||||
project/ios/ios_64
|
||||
project/ios/ios_32
|
||||
|
|
124
CMakeLists.txt
124
CMakeLists.txt
|
@ -49,6 +49,7 @@ include(FindPythonInterp REQUIRED)
|
|||
option(MNN_USE_SYSTEM_LIB "For opencl and vulkan, use system lib or use dlopen" OFF)
|
||||
option(MNN_BUILD_HARD "Build -mfloat-abi=hard or not" OFF)
|
||||
option(MNN_BUILD_SHARED_LIBS "MNN build shared or static lib" ON)
|
||||
option(MNN_WIN_RUNTIME_MT "MNN use /MT on Windows dll" OFF)
|
||||
option(MNN_FORBID_MULTI_THREAD "Disable Multi Thread" OFF)
|
||||
option(MNN_OPENMP "Use OpenMP's thread pool implementation. Does not work on iOS or Mac OS" OFF)
|
||||
option(MNN_USE_THREAD_POOL "Use MNN's own thread pool implementation" ON)
|
||||
|
@ -62,14 +63,14 @@ option(MNN_SUPPORT_TFLITE_QUAN "Enable MNN's tflite quantized op" ON)
|
|||
option(MNN_DEBUG_MEMORY "MNN Debug Memory Access" OFF)
|
||||
option(MNN_DEBUG_TENSOR_SIZE "Enable Tensor Size" OFF)
|
||||
option(MNN_GPU_TRACE "Enable MNN Gpu Debug" OFF)
|
||||
option(MNN_OPENCL_LWS_TUNE "Enable MNN OpenCL Lws Tuning" ON)
|
||||
option(MNN_PORTABLE_BUILD "Link the static version of third party libraries where possible to improve the portability of built executables" OFF)
|
||||
option(MNN_SEP_BUILD "Build MNN Backends and expression seperately. Only works with MNN_BUILD_SHARED_LIBS=ON" ON)
|
||||
option(NATIVE_LIBRARY_OUTPUT "Native Library Path" OFF)
|
||||
option(NATIVE_INCLUDE_OUTPUT "Native Include Path" OFF)
|
||||
option(MNN_AAPL_FMWK "Build MNN.framework instead of traditional .a/.dylib" OFF)
|
||||
option(MNN_FMA_ENABLE "x86 routine use fma extension" OFF)
|
||||
option(MNN_WITH_PLUGIN "Build with plugin op support." OFF)
|
||||
option(MNN_BUILD_MINI "Build MNN-MINI that just supports fixed shape models." OFF)
|
||||
option(MNN_USE_SSE "Use SSE optimization for x86 if possiable" ON)
|
||||
|
||||
IF(NOT MNN_BUILD_SHARED_LIBS)
|
||||
message(WARNING "Close MNN_SEP_BUILD for static library")
|
||||
|
@ -79,13 +80,14 @@ IF(APPLE AND MNN_AAPL_FMWK AND MNN_SEP_BUILD)
|
|||
message(WARNING "MNN_SEP_BUILD AND MNN_AAPL_FMWK can't coexist. Turning off MNN_SEP_BUILD")
|
||||
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
|
||||
ENDIF()
|
||||
IF(MSVC OR WIN32)
|
||||
IF(WIN32)
|
||||
IF(MNN_SEP_BUILD)
|
||||
message(WARNING "MNN_SEP_BUILD IS TROUBLESOME ON Windows. Forcing OFF...")
|
||||
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
|
||||
ENDIF()
|
||||
SET(MNN_USE_SYSTEM_LIB ON CACHE BOOL "<docstring>" FORCE)
|
||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
|
||||
|
||||
IF(MSVC)
|
||||
# generate optimized (release) exe and library with pdb debug file, https://stackoverflow.com/a/31264946
|
||||
SET(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
|
||||
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
|
||||
|
@ -94,12 +96,13 @@ IF(MSVC OR WIN32)
|
|||
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/cmake/macros.cmake)
|
||||
|
||||
IF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT MNN_BUILD_SHARED_LIBS AND NOT (MSVC OR WIN32))
|
||||
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
|
||||
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
|
||||
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
|
||||
IF(MNN_BUILD_CONVERTER)
|
||||
SET(MNN_PORTABLE_BUILD ON CACHE BOOL "<docstring>" FORCE)
|
||||
|
@ -117,6 +120,9 @@ endif()
|
|||
if(MNN_SUPPORT_TFLITE_QUAN)
|
||||
add_definitions(-DMNN_SUPPORT_TFLITE_QUAN)
|
||||
endif()
|
||||
if(MNN_BUILD_MINI)
|
||||
add_definitions(-DMNN_BUILD_MINI)
|
||||
endif()
|
||||
|
||||
# debug options
|
||||
if(MNN_DEBUG_MEMORY)
|
||||
|
@ -128,9 +134,6 @@ endif()
|
|||
if(MNN_GPU_TRACE)
|
||||
add_definitions(-DMNN_GPU_FORCE_FINISH)
|
||||
endif()
|
||||
if(MNN_OPENCL_LWS_TUNE)
|
||||
add_definitions(-DMNN_OPENCL_LWS_TUNE)
|
||||
endif()
|
||||
|
||||
# backend options
|
||||
option(MNN_METAL "Enable Metal" OFF)
|
||||
|
@ -138,11 +141,8 @@ option(MNN_OPENCL "Enable OpenCL" OFF)
|
|||
option(MNN_OPENGL "Enable OpenGL" OFF)
|
||||
option(MNN_VULKAN "Enable Vulkan" OFF)
|
||||
option(MNN_ARM82 "Enable ARM82" OFF)
|
||||
|
||||
# codegen register ops
|
||||
if (MNN_METAL)
|
||||
add_definitions(-DMNN_CODEGEN_REGISTER)
|
||||
endif()
|
||||
option(MNN_CUDA "Enable CUDA" OFF)
|
||||
option(MNN_TENSORRT "Enable TensorRT" OFF)
|
||||
|
||||
# target options
|
||||
option(MNN_BUILD_BENCHMARK "Build benchmark or not" OFF)
|
||||
|
@ -165,11 +165,13 @@ message(STATUS "\tOpenCL: ${MNN_OPENCL}")
|
|||
message(STATUS "\tOpenGL: ${MNN_OPENGL}")
|
||||
message(STATUS "\tVulkan: ${MNN_VULKAN}")
|
||||
message(STATUS "\tARM82: ${MNN_ARM82}")
|
||||
message(STATUS "\tTensorRT: ${MNN_TENSORRT}")
|
||||
message(STATUS "\tCUDA: ${MNN_CUDA}")
|
||||
message(STATUS "\tOpenMP: ${MNN_OPENMP}")
|
||||
message(STATUS "\tHidden: ${MNN_HIDDEN}")
|
||||
message(STATUS "\tBuild Path: ${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
if(WIN32)
|
||||
if(MSVC)
|
||||
if(${CMAKE_VERSION} VERSION_LESS "3.14.0")
|
||||
message(FATAL_ERROR "MNN requires CMake 3.14+ to build on Windows!")
|
||||
endif()
|
||||
|
@ -178,14 +180,14 @@ if(WIN32)
|
|||
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
|
||||
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
|
||||
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
|
||||
if (MNN_BUILD_SHARED_LIBS)
|
||||
if(${flag_var} MATCHES "/MT")
|
||||
string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
|
||||
endif()
|
||||
else ()
|
||||
if (MNN_WIN_RUNTIME_MT)
|
||||
if(${flag_var} MATCHES "/MD")
|
||||
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
|
||||
endif()
|
||||
else ()
|
||||
if(${flag_var} MATCHES "/MT")
|
||||
string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
|
||||
endif()
|
||||
endif ()
|
||||
endforeach()
|
||||
elseif(CMAKE_SYSTEM_NAME MATCHES "^Android" OR CMAKE_SYSTEM_NAME MATCHES "^Linux")
|
||||
|
@ -270,6 +272,8 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "^Linux")
|
|||
endif()
|
||||
include_directories(${CMAKE_CURRENT_LIST_DIR}/include/
|
||||
${CMAKE_CURRENT_LIST_DIR}/source/
|
||||
${CMAKE_CURRENT_LIST_DIR}/express/
|
||||
${CMAKE_CURRENT_LIST_DIR}/tools/
|
||||
${CMAKE_CURRENT_LIST_DIR}/schema/current/
|
||||
${CMAKE_CURRENT_LIST_DIR}/3rd_party/
|
||||
${CMAKE_CURRENT_LIST_DIR}/3rd_party/flatbuffers/include
|
||||
|
@ -293,13 +297,13 @@ FILE(GLOB MNN_CV_SRC ${CMAKE_CURRENT_LIST_DIR}/source/cv/*)
|
|||
add_library(MNNCV OBJECT ${MNN_CV_SRC})
|
||||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCV>)
|
||||
list(APPEND MNN_TARGETS MNNCV)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(X86_64)|(x64)|(X64)|(amd64)|(AMD64)|(i686)")
|
||||
if(WIN32 OR MSVC)
|
||||
target_compile_options(MNNCV PRIVATE /arch:AVX)
|
||||
else()
|
||||
if (MNN_USE_SSE)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(X86_64)|(x64)|(X64)|(amd64)|(AMD64)|(i686)")
|
||||
if (NOT MSVC)
|
||||
target_compile_options(MNNCV PRIVATE -msse3)
|
||||
target_compile_options(MNNCV PRIVATE -mavx)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Math
|
||||
|
@ -308,11 +312,19 @@ add_library(MNNMath OBJECT ${MNN_Math_SRC})
|
|||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMath>)
|
||||
list(APPEND MNN_TARGETS MNNMath)
|
||||
|
||||
# Shape
|
||||
FILE(GLOB MNN_Shape_SRC ${CMAKE_CURRENT_LIST_DIR}/source/shape/*)
|
||||
add_library(MNNShape OBJECT ${MNN_Shape_SRC})
|
||||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNShape>)
|
||||
list(APPEND MNN_TARGETS MNNShape)
|
||||
# Transform
|
||||
FILE(GLOB MNN_Transform_SRC ${CMAKE_CURRENT_LIST_DIR}/source/shape/* ${CMAKE_CURRENT_LIST_DIR}/source/geometry/*)
|
||||
add_library(MNNTransform OBJECT ${MNN_Transform_SRC})
|
||||
IF (NOT MNN_BUILD_MINI)
|
||||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNTransform>)
|
||||
ENDIF()
|
||||
list(APPEND MNN_TARGETS MNNTransform)
|
||||
|
||||
# Utils
|
||||
FILE(GLOB MNN_Utils_SRC ${CMAKE_CURRENT_LIST_DIR}/source/utils/*)
|
||||
add_library(MNNUtils OBJECT ${MNN_Utils_SRC})
|
||||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNUtils>)
|
||||
list(APPEND MNN_TARGETS MNNUtils)
|
||||
|
||||
# Compute
|
||||
FILE(GLOB MNN_Compute_SRC ${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/compute/*)
|
||||
|
@ -327,7 +339,9 @@ list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCPU>)
|
|||
list(APPEND MNN_TARGETS MNNCPU)
|
||||
|
||||
# X86_64 AVX/SSE
|
||||
if (MNN_USE_SSE)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/x86_x64/CMakeLists.txt)
|
||||
endif()
|
||||
|
||||
# AArch32/64 Assemblies
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/arm/CMakeLists.txt)
|
||||
|
@ -377,7 +391,7 @@ if (NOT APPLE)
|
|||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
|
||||
if (WIN32)
|
||||
if (MSVC)
|
||||
set(OpenMP_C_FLAGS "/openmp ${OpenMP_C_FLAGS}")
|
||||
set(OpenMP_CXX_FLAGS "/openmp ${OpenMP_CXX_FLAGS}")
|
||||
endif()
|
||||
|
@ -387,20 +401,22 @@ endif()
|
|||
|
||||
set(CMAKE_CXX_FLAGS_ORIGIN ${CMAKE_CXX_FLAGS})
|
||||
set(CMAKE_C_FLAGS_ORIGIN ${CMAKE_C_FLAGS})
|
||||
if ((NOT (MSVC OR WIN32)) AND MNN_HIDDEN)
|
||||
if ((NOT MSVC) AND MNN_HIDDEN)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden -fvisibility=hidden")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
|
||||
if (NOT APPLE)
|
||||
# Omit frame pointer may cause difficult debug
|
||||
if ((NOT APPLE) AND (NOT WIN32))
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer")
|
||||
endif()
|
||||
endif()
|
||||
if (NOT (MSVC OR WIN32))
|
||||
if (NOT MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions ")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
|
||||
endif()
|
||||
|
||||
# Metal
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/metal/CMakeLists.txt)
|
||||
set(MNN_DEPS "")
|
||||
set(MNN_EXTRA_DEPENDS "")
|
||||
list(APPEND MNN_DEPS MNN)
|
||||
|
||||
# Plugin
|
||||
|
@ -409,6 +425,14 @@ if(MNN_WITH_PLUGIN)
|
|||
include(${CMAKE_CURRENT_LIST_DIR}/source/plugin/CMakeLists.txt)
|
||||
endif()
|
||||
|
||||
# Metal
|
||||
if(MNN_METAL AND APPLE)
|
||||
add_definitions(-DMNN_METAL_ENABLED=1)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/metal/CMakeLists.txt)
|
||||
list(APPEND MNN_TARGETS MNNMetal)
|
||||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMetal>)
|
||||
endif()
|
||||
|
||||
# Vulkan
|
||||
IF(MNN_VULKAN)
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/vulkan/)
|
||||
|
@ -446,22 +470,34 @@ IF(MNN_OPENGL)
|
|||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
# CUDA
|
||||
IF(MNN_CUDA)
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/cuda/)
|
||||
list(APPEND MNN_TARGETS MNN_CUDA)
|
||||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_CUDA>)
|
||||
list(APPEND MNN_EXTRA_DEPENDS ${MNN_CUDA_LIBS})
|
||||
ENDIF()
|
||||
|
||||
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR IOS_ARCH STREQUAL "arm64")
|
||||
# ARM82 Assemblies
|
||||
IF(MNN_ARM82)
|
||||
add_definitions(-DENABLE_ARMV82)
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/arm82/)
|
||||
IF(MNN_SEP_BUILD)
|
||||
list(APPEND MNN_DEPS MNN_Arm82)
|
||||
ELSE()
|
||||
list(APPEND MNN_TARGETS MNN_Arm82)
|
||||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_Arm82>)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
# Express
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/express/)
|
||||
|
||||
# TensorRT
|
||||
IF(MNN_TENSORRT)
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/tensorrt/)
|
||||
list(APPEND MNN_TARGETS MNN_TRT)
|
||||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_TRT>)
|
||||
list(APPEND MNN_EXTRA_DEPENDS ${MNN_TRT_LIBS})
|
||||
ENDIF()
|
||||
|
||||
IF(MNN_SEP_BUILD)
|
||||
add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS})
|
||||
target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS})
|
||||
|
@ -471,7 +507,7 @@ ELSE()
|
|||
list(APPEND MNN_TARGETS MNNExpress)
|
||||
IF(MNN_BUILD_SHARED_LIBS)
|
||||
add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS})
|
||||
if (MSVC OR WIN32)
|
||||
if (WIN32)
|
||||
foreach(TARGET ${MNN_TARGETS})
|
||||
target_compile_definitions(${TARGET} PRIVATE "-DBUILDING_MNN_DLL")
|
||||
target_compile_definitions(${TARGET} INTERFACE "-DUSING_MNN_DLL")
|
||||
|
@ -484,7 +520,7 @@ ELSE()
|
|||
ENDIF()
|
||||
target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS})
|
||||
ENDIF()
|
||||
if (MSVC OR WIN32)
|
||||
if (MSVC)
|
||||
target_link_options(MNN PRIVATE "/IGNORE:4049,4217")
|
||||
endif()
|
||||
|
||||
|
@ -504,9 +540,11 @@ if(APPLE)
|
|||
target_link_libraries(MNN PUBLIC ${FOUNDATION})
|
||||
find_library(METAL Metal REQUIRED)
|
||||
target_link_libraries(MNN PUBLIC ${METAL})
|
||||
find_library(GRAPHIC CoreGraphics)
|
||||
target_link_libraries(MNN PUBLIC ${GRAPHIC})
|
||||
ENDIF()
|
||||
endif()
|
||||
add_dependencies(MNN MNNCore MNNCV MNNShape MNNMath MNNCompute MNNCPU GenVCSHDR)
|
||||
add_dependencies(MNN MNNCore MNNCV MNNTransform MNNMath MNNCompute MNNCPU GenVCSHDR)
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/tools/converter)
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "^Linux")
|
||||
|
@ -532,12 +570,6 @@ if (NOT MNN_BUILD_SHARED_LIBS)
|
|||
endif()
|
||||
endif()
|
||||
list(APPEND MNN_TARGETS MNN)
|
||||
FOREACH(TARGET ${MNN_TARGETS})
|
||||
IF((NOT MSVC) AND (NOT WIN32))
|
||||
else()
|
||||
target_compile_definitions(${TARGET} PRIVATE _CRT_SECURE_NO_WARNINGS)
|
||||
endif()
|
||||
ENDFOREACH()
|
||||
list(REMOVE_ITEM MNN_TARGETS MNN)
|
||||
IF(MNN_BUILD_DEMO)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/demo/exec/CMakeLists.txt)
|
||||
|
|
|
@ -46,6 +46,7 @@ Pod::Spec.new do |s|
|
|||
'schema/current/*.{h}',\
|
||||
'3rd_party/flatbuffers/include/flatbuffers/*.{h}',\
|
||||
'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\
|
||||
'source/geometry/**/*.{h,c,m,mm,cc,hpp,cpp}',\
|
||||
'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\
|
||||
'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
|
||||
'source/shape/*.{h,c,m,mm,cc,hpp,cpp}',\
|
||||
|
|
|
@ -66,7 +66,7 @@ Interpreter由Engine和Backends构成。前者负责模型的加载、计算图
|
|||
|
||||
三群:
|
||||
|
||||
<img src="doc/DingTalkQR3.png" height="256"/>
|
||||
<img src="doc/DingTalkQR23.png" height="256"/>
|
||||
|
||||
## License
|
||||
Apache 2.0
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
//
|
||||
// CPUBatchMatMul.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2019/03/25.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include "backend/cpu/CPUBatchMatMul.hpp"
|
||||
#include "backend/cpu/CPUBackend.hpp"
|
||||
#include "math/Matrix.hpp"
|
||||
|
||||
namespace MNN {
|
||||
|
||||
CPUBatchMatMul::CPUBatchMatMul(Backend* backend, bool adjX, bool adjY) : Execution(backend) {
|
||||
mMatMul.reset(new CPUMatMul(backend, adjX, adjY, true));
|
||||
}
|
||||
|
||||
ErrorCode CPUBatchMatMul::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
|
||||
auto input0 = inputs[0];
|
||||
auto input1 = inputs[1];
|
||||
auto output = outputs[0];
|
||||
// Fill output by zero if one of inputs is empty.
|
||||
if (input0->elementSize() == 0 || input1->elementSize() == 0) {
|
||||
return NO_ERROR;
|
||||
}
|
||||
auto dimensions = input0->dimensions();
|
||||
mMatrixA.reset(Tensor::createDevice<float>({input0->length(input0->dimensions()-2), input0->length(input0->dimensions()-1)}));
|
||||
mMatrixB.reset(Tensor::createDevice<float>({input1->length(input1->dimensions()-2), input1->length(input0->dimensions()-1)}));
|
||||
mMatrixC.reset(Tensor::createDevice<float>({output->length(output->dimensions()-2), output->length(output->dimensions()-1)}));
|
||||
mTempInputs = {mMatrixA.get(), mMatrixB.get()};
|
||||
mTempOutputs = {mMatrixC.get()};
|
||||
auto res = backend()->onAcquireBuffer(mMatrixA.get(), Backend::DYNAMIC);
|
||||
res = res && backend()->onAcquireBuffer(mMatrixB.get(), Backend::DYNAMIC);
|
||||
res = res && backend()->onAcquireBuffer(mMatrixC.get(), Backend::DYNAMIC);
|
||||
|
||||
if (!res) {
|
||||
return OUT_OF_MEMORY;
|
||||
}
|
||||
int batch = 1;
|
||||
for (int i = 0; i < dimensions - 2; ++i) {
|
||||
batch *= input0->length(i);
|
||||
}
|
||||
mBatch = batch;
|
||||
auto code = mMatMul->onResize(mTempInputs, mTempOutputs);
|
||||
backend()->onReleaseBuffer(mMatrixA.get(), Backend::DYNAMIC);
|
||||
backend()->onReleaseBuffer(mMatrixB.get(), Backend::DYNAMIC);
|
||||
backend()->onReleaseBuffer(mMatrixC.get(), Backend::DYNAMIC);
|
||||
return code;
|
||||
}
|
||||
|
||||
ErrorCode CPUBatchMatMul::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
|
||||
auto input0 = inputs[0];
|
||||
auto input1 = inputs[1];
|
||||
auto output = outputs[0];
|
||||
// Fill output by zero if one of inputs is empty.
|
||||
if (input0->elementSize() == 0 || input1->elementSize() == 0) {
|
||||
::memset(output->host<float>(), 0, output->size());
|
||||
return NO_ERROR;
|
||||
}
|
||||
const int dimensions = input0->dimensions();
|
||||
MNN_ASSERT(dimensions >= 3);
|
||||
const int input0Stride = input0->length(dimensions - 1) * input0->length(dimensions - 2);
|
||||
const int input1Stride = input1->length(dimensions - 1) * input1->length(dimensions - 2);
|
||||
const int outputStride = output->length(dimensions - 1) * output->length(dimensions - 2);
|
||||
const auto input0Ptr = input0->host<float>();
|
||||
const auto input1Ptr = input1->host<float>();
|
||||
float* const outputPtr = output->host<float>();
|
||||
|
||||
for (int i = 0; i < mBatch; ++i) {
|
||||
::memcpy(mMatrixA->host<float>(), input0Ptr + i * input0Stride, input0Stride * sizeof(float));
|
||||
::memcpy(mMatrixB->host<float>(), input1Ptr + i * input1Stride, input1Stride * sizeof(float));
|
||||
mMatMul->onExecute(mTempInputs, mTempOutputs);
|
||||
::memcpy(outputPtr + i * outputStride, mMatrixC->host<float>(), outputStride * sizeof(float));
|
||||
}
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
class CPUBatchMatMulCreator : public CPUBackend::Creator {
|
||||
public:
|
||||
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
||||
const MNN::Op* op, Backend* backend) const override {
|
||||
return new CPUBatchMatMul(backend, op->main_as_BatchMatMulParam()->adjX(), op->main_as_BatchMatMulParam()->adjY());
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_CPU_OP_CREATOR(CPUBatchMatMulCreator, OpType_BatchMatMul);
|
||||
|
||||
} // namespace MNN
|
|
@ -0,0 +1,35 @@
|
|||
//
|
||||
// CPUBatchMatMul.hpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2019/03/25.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#ifndef CPUBatchMatMul_hpp
|
||||
#define CPUBatchMatMul_hpp
|
||||
|
||||
#include "backend/cpu/CPUMatMul.hpp"
|
||||
|
||||
namespace MNN {
|
||||
|
||||
class CPUBatchMatMul : public Execution {
|
||||
public:
|
||||
CPUBatchMatMul(Backend *backend, bool adjX, bool adjY);
|
||||
virtual ~CPUBatchMatMul() = default;
|
||||
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
|
||||
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
|
||||
|
||||
private:
|
||||
int mBatch;
|
||||
std::shared_ptr<Execution> mMatMul;
|
||||
std::vector<Tensor*> mTempInputs;
|
||||
std::vector<Tensor*> mTempOutputs;
|
||||
std::shared_ptr<Tensor> mMatrixA;
|
||||
std::shared_ptr<Tensor> mMatrixB;
|
||||
std::shared_ptr<Tensor> mMatrixC;
|
||||
};
|
||||
|
||||
} // namespace MNN
|
||||
|
||||
#endif /* CPUBatchMatMul_hpp */
|
|
@ -18,7 +18,6 @@
|
|||
#include "backend/cpu/compute/ConvOpt.h"
|
||||
#include "backend/cpu/CPUBackend.hpp"
|
||||
#include "backend/cpu/compute/ConvolutionFloatFactory.h"
|
||||
#include "math/Vec4.hpp"
|
||||
|
||||
#define MIN_CON_PLANESIZE 256
|
||||
|
|
@ -10,7 +10,9 @@
|
|||
#include <math.h>
|
||||
#include "backend/cpu/CPUBackend.hpp"
|
||||
#include "core/Macro.h"
|
||||
#include "math/Vec4.hpp"
|
||||
#include "math/Vec.hpp"
|
||||
|
||||
using Vec4 = MNN::Math::Vec<float, 4>;
|
||||
|
||||
namespace MNN {
|
||||
|
||||
|
@ -39,12 +41,12 @@ ErrorCode CPUCosineSimilarity::onExecute(const std::vector<Tensor*>& inputs, con
|
|||
const auto x1ChannelPtr = x1DataBatchPtr + j;
|
||||
const auto x2ChannelPtr = x2DataBatchPtr + j;
|
||||
|
||||
Math::Vec4 innerProduct(.0f);
|
||||
Math::Vec4 x1Square(.0f);
|
||||
Math::Vec4 x2Square(.0f);
|
||||
Vec4 innerProduct(.0f);
|
||||
Vec4 x1Square(.0f);
|
||||
Vec4 x2Square(.0f);
|
||||
for (int c = 0; c < channel; ++c) {
|
||||
Math::Vec4 x1Data = Math::Vec4::load(x1ChannelPtr + c * channleStride);
|
||||
Math::Vec4 x2Data = Math::Vec4::load(x2ChannelPtr + c * channleStride);
|
||||
Vec4 x1Data = Vec4::load(x1ChannelPtr + c * channleStride);
|
||||
Vec4 x2Data = Vec4::load(x2ChannelPtr + c * channleStride);
|
||||
auto x1Xx2 = x1Data * x2Data;
|
||||
innerProduct = innerProduct + x1Xx2;
|
||||
x1Square = x1Square + x1Data * x1Data;
|
|
@ -12,8 +12,8 @@
|
|||
#include "core/Concurrency.h"
|
||||
#include "core/Macro.h"
|
||||
|
||||
#include "math/Vec4.hpp"
|
||||
using MNN::Math::Vec4;
|
||||
#include "math/Vec.hpp"
|
||||
using Vec4 = MNN::Math::Vec<float, 4>;
|
||||
|
||||
namespace MNN {
|
||||
|
|
@ -21,7 +21,7 @@ public:
|
|||
auto parameter = op->main_as_InnerProduct();
|
||||
int outputCount = parameter->outputCount();
|
||||
int srcCount = parameter->weight()->size() / outputCount;
|
||||
mWeight.reset(CPUConvolution::reorderWeightSize(srcCount, outputCount, 1, 4));
|
||||
mWeight.reset(CPUConvolution::reorderWeightSize(srcCount, outputCount, 1, 4, 4));
|
||||
if (mWeight.get() == nullptr) {
|
||||
mValid = false;
|
||||
return;
|
|
@ -180,6 +180,14 @@ ErrorCode CPULSTM::onResize(const std::vector<Tensor *> &inputs, const std::vect
|
|||
::memcpy(mBiasC->host<float>(), mLSTM->bias()->float32s()->data(), mBiasC->size());
|
||||
::memcpy(mWeightH->host<float>(), mLSTM->weightH()->float32s()->data(), mWeightH->size());
|
||||
}
|
||||
if (mGateHaveBias) {
|
||||
// Merge bias
|
||||
auto biasPtr = mBiasC->host<float>();
|
||||
auto biasPtr2 = biasPtr + 4 * numUnits;
|
||||
for (int i=0; i<4*numUnits; ++i) {
|
||||
biasPtr[i] = biasPtr[i] + biasPtr2[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (inputs.size() > 1) {
|
||||
|
@ -260,16 +268,8 @@ ErrorCode CPULSTM::onExecute(const std::vector<Tensor *> &inputs, const std::vec
|
|||
MNN_CONCURRENCY_END();
|
||||
|
||||
float* biasStartPtr = mBiasC->host<float>();
|
||||
if(!mGateHaveBias){
|
||||
biasStartPtr = nullptr;
|
||||
}
|
||||
mRetriveOutputFunction(mGates.host<float>(), biasStartPtr);
|
||||
|
||||
float* recurrenceBiasStartPtr = mBiasC->host<float>();
|
||||
if(mGateHaveBias){
|
||||
recurrenceBiasStartPtr += 4 * numUnits;
|
||||
}
|
||||
|
||||
// tranform
|
||||
const float *contData = nullptr;
|
||||
if (inputs.size() > 1) {
|
||||
|
@ -330,14 +330,11 @@ ErrorCode CPULSTM::onExecute(const std::vector<Tensor *> &inputs, const std::vec
|
|||
}
|
||||
|
||||
// add bias
|
||||
auto biasPtr = recurrenceBiasStartPtr + oc;
|
||||
I = sigmoid(*biasPtr + I);
|
||||
biasPtr = biasPtr + numUnits;
|
||||
F = sigmoid(*biasPtr + F);
|
||||
biasPtr = biasPtr + numUnits;
|
||||
O = sigmoid(*biasPtr + O);
|
||||
biasPtr = biasPtr + numUnits;
|
||||
G = tanhf(*biasPtr + G);
|
||||
//MNN_PRINT("%f, %f, %f, %f\n", I, O, F, G);
|
||||
I = sigmoid(I);
|
||||
F = sigmoid(F);
|
||||
O = sigmoid(O);
|
||||
G = tanhf(G);
|
||||
|
||||
auto newCell = F * cellData[oc] + I * G;
|
||||
cellData[oc] = newCell;
|
|
@ -0,0 +1,311 @@
|
|||
//
|
||||
// CPUSoftmax.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2018/07/16.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include "backend/cpu/CPUSoftmax.hpp"
|
||||
#include <math.h>
|
||||
#include "backend/cpu/CPUBackend.hpp"
|
||||
#include "backend/cpu/compute/CommonOptFunction.h"
|
||||
#include "core/Concurrency.h"
|
||||
#include "core/Macro.h"
|
||||
#include "core/TensorUtils.hpp"
|
||||
#ifdef MNN_USE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
namespace MNN {
|
||||
|
||||
int CPUSoftmax::_softmax1(const float *srcData, float *dstData, int outside, int channel, int threadNum) {
|
||||
// Max and sub
|
||||
MNN_CONCURRENCY_BEGIN(tId, threadNum)
|
||||
{
|
||||
const float *srcY = srcData + tId * channel;
|
||||
float *dstY = dstData + tId * channel;
|
||||
for (int y = (int)tId; y < outside; y += threadNum, srcY += channel * threadNum, dstY += channel * threadNum) {
|
||||
float maxValue = srcY[0];
|
||||
{
|
||||
int c = 1;
|
||||
#ifdef MNN_USE_NEON
|
||||
#if !(defined(__ARM_FEATURE_FMA) && defined(__aarch64__))
|
||||
#define vmaxvq_f32(v) \
|
||||
({ \
|
||||
float __m = v[0]; \
|
||||
for (int i = 1; i < 4; i++) { \
|
||||
if (v[i] > __m) \
|
||||
__m = v[i]; \
|
||||
} \
|
||||
__m; \
|
||||
})
|
||||
#endif
|
||||
if (c + 3 < channel) {
|
||||
float32x4_t maxx4 = vld1q_f32(srcY + c);
|
||||
c += 4;
|
||||
for (; c + 3 < channel; c += 4) {
|
||||
maxx4 = vmaxq_f32(maxx4, vld1q_f32(srcY + c));
|
||||
}
|
||||
float value = vmaxvq_f32(maxx4);
|
||||
if (value > maxValue)
|
||||
maxValue = value;
|
||||
}
|
||||
#endif
|
||||
for (; c < channel; ++c) {
|
||||
float value = srcY[c];
|
||||
if (value > maxValue)
|
||||
maxValue = value;
|
||||
}
|
||||
}
|
||||
|
||||
for (int c = 0; c < channel; ++c) {
|
||||
dstY[c] = -srcY[c] + maxValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
MNN_CONCURRENCY_END();
|
||||
|
||||
//Exp
|
||||
auto schedule = ((CPUBackend*)backend())->multiThreadDivide(channel * outside);
|
||||
int sizeDivide = schedule.first;
|
||||
int scheduleNumber = schedule.second;
|
||||
|
||||
MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
|
||||
int start = sizeDivide * (int)tId;
|
||||
int realSize = sizeDivide;
|
||||
if (tId == scheduleNumber -1 ) {
|
||||
realSize = channel * outside - start;
|
||||
}
|
||||
if (realSize > 0) {
|
||||
MNNExp(dstData + start, dstData + start, realSize);
|
||||
}
|
||||
}
|
||||
MNN_CONCURRENCY_END();
|
||||
|
||||
// Sum and div
|
||||
MNN_CONCURRENCY_BEGIN(tId, threadNum);
|
||||
{
|
||||
float *dstY = dstData + tId * channel;
|
||||
for (int y = (int)tId; y < outside; y += threadNum, dstY += channel * threadNum) {
|
||||
// sum
|
||||
float sumValue = 0;
|
||||
|
||||
for (int c = 0; c < channel; ++c) {
|
||||
sumValue += dstY[c];
|
||||
}
|
||||
|
||||
// div
|
||||
{
|
||||
int c = 0;
|
||||
#ifdef MNN_USE_NEON
|
||||
float div = 1.f / sumValue;
|
||||
for (; c + 3 < channel; c += 4) {
|
||||
vst1q_f32(dstY + c, vmulq_n_f32(vld1q_f32(dstY + c), div));
|
||||
}
|
||||
#endif
|
||||
for (; c < channel; ++c) {
|
||||
dstY[c] /= sumValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
MNN_CONCURRENCY_END();
|
||||
|
||||
return 0;
|
||||
}
|
||||
int CPUSoftmax::_softmaxCommon(const float *srcData, float *dstData, int inside, int outside, int channel,
|
||||
float *maxValue, float *sumValue, int threadNum) {
|
||||
if (inside == 1)
|
||||
return _softmax1(srcData, dstData, outside, channel, threadNum);
|
||||
|
||||
const int stepY = inside * channel;
|
||||
MNN_CONCURRENCY_BEGIN(tId, threadNum);
|
||||
{
|
||||
const float *srcY = srcData + tId * stepY;
|
||||
float *dstY = dstData + tId * stepY;
|
||||
float *maxValueSub = maxValue + tId * inside;
|
||||
|
||||
for (int y = (int)tId; y < outside; y += threadNum, srcY += stepY * threadNum, dstY += stepY * threadNum) {
|
||||
memcpy(maxValueSub, srcY, sizeof(float) * inside);
|
||||
const float *src = srcY + inside;
|
||||
for (int c = 1; c < channel; ++c, src += inside) {
|
||||
for (int x = 0; x < inside; ++x) {
|
||||
if (src[x] > maxValueSub[x])
|
||||
maxValueSub[x] = src[x];
|
||||
}
|
||||
}
|
||||
src = srcY;
|
||||
float *dst = dstY;
|
||||
for (int c = 0; c < channel; ++c, src += inside, dst += inside) {
|
||||
for (int x = 0; x < inside; ++x) {
|
||||
dst[x] = -src[x] + maxValueSub[x];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
MNN_CONCURRENCY_END();
|
||||
|
||||
auto totalSize = channel * inside * outside;
|
||||
//Exp
|
||||
auto schedule = ((CPUBackend*)backend())->multiThreadDivide(totalSize);
|
||||
int sizeDivide = schedule.first;
|
||||
int scheduleNumber = schedule.second;
|
||||
|
||||
MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
|
||||
int start = sizeDivide * (int)tId;
|
||||
int realSize = sizeDivide;
|
||||
if (tId == scheduleNumber -1 ) {
|
||||
realSize = totalSize - start;
|
||||
}
|
||||
if (realSize > 0) {
|
||||
MNNExp(dstData + start, dstData + start, realSize);
|
||||
}
|
||||
}
|
||||
MNN_CONCURRENCY_END();
|
||||
|
||||
MNN_CONCURRENCY_BEGIN(tId, threadNum);
|
||||
{
|
||||
const float *srcY = srcData + tId * stepY;
|
||||
float *dstY = dstData + tId * stepY;
|
||||
float *sumValueSub = sumValue + tId * inside;
|
||||
for (int y = (int)tId; y < outside; y += threadNum, srcY += stepY * threadNum, dstY += stepY * threadNum) {
|
||||
memset(sumValueSub, 0, sizeof(float) * inside);
|
||||
float *dst = dstY;
|
||||
for (int c = 0; c < channel; ++c, dst += inside) {
|
||||
for (int x = 0; x < inside; ++x) {
|
||||
sumValueSub[x] += dst[x];
|
||||
}
|
||||
}
|
||||
dst = dstY;
|
||||
for (int c = 0; c < channel; ++c, dst += inside) {
|
||||
for (int x = 0; x < inside; ++x) {
|
||||
dst[x] /= sumValueSub[x];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
MNN_CONCURRENCY_END();
|
||||
return 0;
|
||||
}
|
||||
|
||||
ErrorCode CPUSoftmax::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
||||
auto input = inputs[0];
|
||||
const int dimensions = input->buffer().dimensions;
|
||||
|
||||
const auto layout = TensorUtils::getDescribe(input)->dimensionFormat;
|
||||
mNeedUnpackC4 = layout == MNN_DATA_FORMAT_NC4HW4;
|
||||
|
||||
if (mNeedUnpackC4) {
|
||||
int totalSize = 1;
|
||||
for (int i = 1; i < dimensions; ++i) {
|
||||
totalSize *= input->length(i);
|
||||
}
|
||||
mStorage.buffer().dim[0].extent = input->length(0);
|
||||
mStorage.buffer().dim[1].extent = totalSize;
|
||||
TensorUtils::getDescribe(&mStorage)->dimensionFormat = MNN_DATA_FORMAT_NHWC;
|
||||
mStorage.buffer().dimensions = 2;
|
||||
mStorage.buffer().type = input->getType();
|
||||
backend()->onAcquireBuffer(&mStorage, Backend::DYNAMIC);
|
||||
}
|
||||
|
||||
int inside = 1;
|
||||
int dims = input->buffer().dimensions;
|
||||
for (int i = mAxis + 1; i < dims; ++i) {
|
||||
inside *= input->length(i);
|
||||
}
|
||||
|
||||
if (inside != 1) { // not run _softmax1, we need maxValue Tensor and sumValue Tensor.
|
||||
int threadNum = ((CPUBackend *)backend())->threadNumber();
|
||||
|
||||
mMaxValue.buffer().dim[0].extent = inside * threadNum;
|
||||
mMaxValue.buffer().dimensions = 1;
|
||||
mMaxValue.setType(DataType_DT_FLOAT);
|
||||
backend()->onAcquireBuffer(&mMaxValue, Backend::DYNAMIC);
|
||||
|
||||
mSumValue.buffer().dim[0].extent = inside * threadNum;
|
||||
mSumValue.buffer().dimensions = 1;
|
||||
mSumValue.setType(DataType_DT_FLOAT);
|
||||
backend()->onAcquireBuffer(&mSumValue, Backend::DYNAMIC);
|
||||
|
||||
backend()->onReleaseBuffer(&mMaxValue, Backend::DYNAMIC);
|
||||
backend()->onReleaseBuffer(&mSumValue, Backend::DYNAMIC);
|
||||
}
|
||||
|
||||
if (mNeedUnpackC4) {
|
||||
backend()->onReleaseBuffer(&mStorage, Backend::DYNAMIC);
|
||||
}
|
||||
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
ErrorCode CPUSoftmax::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
||||
MNN_ASSERT(1 == inputs.size());
|
||||
MNN_ASSERT(1 == outputs.size());
|
||||
auto inputTensor = inputs[0];
|
||||
auto outputTensor = outputs[0];
|
||||
const auto inputDataPtr = inputTensor->host<float>();
|
||||
auto outputDataPtr = outputTensor->host<float>();
|
||||
const int batch = inputTensor->batch();
|
||||
const auto dims = inputTensor->buffer().dimensions;
|
||||
|
||||
float *tempData = nullptr;
|
||||
if (mNeedUnpackC4) {
|
||||
tempData = mStorage.host<float>();
|
||||
}
|
||||
|
||||
int areaInput = 1;
|
||||
for (int i = 2; i < dims; ++i) {
|
||||
areaInput *= inputTensor->length(i);
|
||||
}
|
||||
int inside = 1;
|
||||
int outside = 1;
|
||||
int channel = 1;
|
||||
for (int i = 0; i < mAxis; ++i) {
|
||||
outside *= inputTensor->length(i);
|
||||
}
|
||||
channel = inputTensor->length(mAxis);
|
||||
for (int i = mAxis + 1; i < dims; ++i) {
|
||||
inside *= inputTensor->length(i);
|
||||
}
|
||||
|
||||
int threadNum = ((CPUBackend *)backend())->threadNumber();
|
||||
if (!mNeedUnpackC4) {
|
||||
_softmaxCommon(inputDataPtr, outputDataPtr, inside, outside, channel, mMaxValue.host<float>(),
|
||||
mSumValue.host<float>(), threadNum);
|
||||
return NO_ERROR;
|
||||
}
|
||||
auto outputSize = outputTensor->elementSize();
|
||||
int batchSize = outputSize / batch;
|
||||
for (int batchIndex = 0; batchIndex < batch; ++batchIndex) {
|
||||
auto inputData = inputDataPtr + batchIndex * batchSize;
|
||||
MNNUnpackC4(outputDataPtr + batchIndex * mStorage.length(1), inputData, areaInput, inputTensor->channel());
|
||||
}
|
||||
_softmaxCommon(outputDataPtr, tempData, inside, outside, channel, mMaxValue.host<float>(), mSumValue.host<float>(), threadNum);
|
||||
for (int batchIndex = 0; batchIndex < batch; ++batchIndex) {
|
||||
auto outputData = outputDataPtr + batchIndex * batchSize;
|
||||
auto tempPtr = tempData + batchIndex * mStorage.length(1);
|
||||
MNNPackC4(outputData, tempPtr, areaInput, outputTensor->channel());
|
||||
}
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
CPUSoftmax::CPUSoftmax(Backend *b, int axis) : MNN::Execution(b), mAxis(axis), mStorage(2), mNeedUnpackC4(false) {
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
class CPUSoftmaxCreator : public CPUBackend::Creator {
|
||||
public:
|
||||
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
|
||||
const MNN::Op *op, Backend *backend) const override {
|
||||
auto axis = op->main_as_Axis()->axis();
|
||||
if (axis < 0) {
|
||||
axis = inputs[0]->dimensions() + axis;
|
||||
}
|
||||
return new CPUSoftmax(backend, axis);
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_CPU_OP_CREATOR(CPUSoftmaxCreator, OpType_Softmax);
|
||||
|
||||
} // namespace MNN
|
|
@ -0,0 +1,35 @@
|
|||
//
|
||||
// CPUSoftmax.hpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2018/07/16.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#ifndef CPUSoftmax_hpp
|
||||
#define CPUSoftmax_hpp
|
||||
|
||||
#include "core/Execution.hpp"
|
||||
|
||||
namespace MNN {
|
||||
class CPUSoftmax : public Execution {
|
||||
public:
|
||||
CPUSoftmax(Backend *b, int axis);
|
||||
virtual ~CPUSoftmax() = default;
|
||||
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
|
||||
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
|
||||
|
||||
private:
|
||||
int _softmaxCommon(const float *srcData, float *dstData, int inside, int outside, int channel, float *maxValue,
|
||||
float *sumValue, int threadNum);
|
||||
int _softmax1(const float *srcData, float *dstData, int outside, int channel, int threadNum);
|
||||
|
||||
int mAxis;
|
||||
Tensor mStorage;
|
||||
Tensor mMaxValue;
|
||||
Tensor mSumValue;
|
||||
bool mNeedUnpackC4;
|
||||
};
|
||||
} // namespace MNN
|
||||
|
||||
#endif /* CPUSoftmax_hpp */
|
|
@ -13,10 +13,8 @@
|
|||
#include "backend/cpu/compute/ConvOpt.h"
|
||||
#include "core/Macro.h"
|
||||
#include "core/TensorUtils.hpp"
|
||||
#include "math/Vec4.hpp"
|
||||
using namespace MNN::Math;
|
||||
|
||||
typedef Vec4 float4;
|
||||
#include "math/Vec.hpp"
|
||||
using Vec4 = MNN::Math::Vec<float, 4>;
|
||||
|
||||
#define SOURCE_BLOCK 64
|
||||
#define WEIGHT_BLOCK 256
|
|
@ -0,0 +1,128 @@
|
|||
//
|
||||
// GeometryCropAndResize.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2020/08/5.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include "geometry/GeometryComputer.hpp"
|
||||
#include "core/OpCommonUtils.hpp"
|
||||
#include "geometry/GeometryComputerUtils.hpp"
|
||||
#include "ConvertUtils.hpp"
|
||||
|
||||
namespace MNN {
|
||||
class GeometryCropAndResize : public GeometryComputer {
|
||||
public:
|
||||
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& res) const override {
|
||||
MNN_ASSERT(4 == inputs.size());
|
||||
MNN_ASSERT(1 == outputs.size());
|
||||
auto img = inputs[0];
|
||||
auto boxes = inputs[1];
|
||||
auto box_ind = inputs[2];
|
||||
auto crop_size = inputs[3];
|
||||
auto output = outputs[0];
|
||||
auto extrapolation = op->main_as_CropAndResize()->extrapolationValue();
|
||||
auto method = op->main_as_CropAndResize()->method();
|
||||
// resizeType of Interp : 1-NEAREST, 2-BILINEAR
|
||||
const int resizeType = method == CropAndResizeMethod_BILINEAR ? 2 : 1;
|
||||
|
||||
int batch = img->length(0), ih = img->length(1), iw = img->length(2),
|
||||
depth = img->length(3), boxNum = boxes->length(0);
|
||||
const int cropHeight = crop_size->host<uint32_t>()[0],
|
||||
cropWidth = crop_size->host<uint32_t>()[1];
|
||||
|
||||
auto des = TensorUtils::getDescribe(output);
|
||||
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||
des->dimensionFormat = MNN_DATA_FORMAT_NHWC;
|
||||
des->regions.clear();
|
||||
des->regions.reserve(boxNum);
|
||||
for (int i = 0; i < boxNum; i++) {
|
||||
const float y1 = boxes->host<float>()[i*4];
|
||||
const float x1 = boxes->host<float>()[i*4+1];
|
||||
const float y2 = boxes->host<float>()[i*4+2];
|
||||
const float x2 = boxes->host<float>()[i*4+3];
|
||||
const int ind = box_ind->host<uint32_t>()[i];
|
||||
const float ch = (y2 - y1) * (ih - 1), cw = (x2 - x1) * (iw - 1);
|
||||
const float yScale = ch / static_cast<float>(cropHeight - 1);
|
||||
const float xScale = cw / static_cast<float>(cropWidth - 1);
|
||||
const float yOffset = y1 * (ih - 1), xOffset = x1 * (iw - 1);
|
||||
// select croped image from images, convert it's format from NHWC to NC4HW4
|
||||
std::shared_ptr<Tensor> cropValue(new Tensor);
|
||||
{
|
||||
cropValue->buffer().type = halide_type_of<float>();
|
||||
cropValue->buffer().dimensions = 4;
|
||||
cropValue->setLength(0, 1);
|
||||
cropValue->setLength(1, depth);
|
||||
cropValue->setLength(2, ih);
|
||||
cropValue->setLength(3, iw);
|
||||
auto des = TensorUtils::getDescribe(cropValue.get());
|
||||
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
|
||||
des->regions.clear();
|
||||
Tensor::InsideDescribe::Region region;
|
||||
region.origin = img;
|
||||
region.size[1] = depth;
|
||||
region.size[2] = ih * iw;
|
||||
region.src.offset = ind * ih * iw * depth;
|
||||
region.dst.offset = 0;
|
||||
region.src.stride[1] = 1;
|
||||
region.src.stride[2] = depth;
|
||||
region.dst.stride[1] = ih * iw;
|
||||
region.dst.stride[2] = 1;
|
||||
des->regions.emplace_back(std::move(region));
|
||||
res.extras.emplace_back(cropValue);
|
||||
}
|
||||
// using Interp Op deal with crop and resize for selected image
|
||||
std::shared_ptr<Tensor> resizeValue;
|
||||
{
|
||||
resizeValue.reset(Tensor::createDevice<float>({1, depth, cropHeight, cropWidth}));
|
||||
auto des = TensorUtils::getDescribe(resizeValue.get());
|
||||
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
|
||||
std::unique_ptr<OpT> interp(new OpT);
|
||||
interp->type = OpType_Interp;
|
||||
interp->main.type = OpParameter_Interp;
|
||||
interp->main.value = new InterpT;
|
||||
interp->main.AsInterp()->widthScale = xScale;
|
||||
interp->main.AsInterp()->heightScale = yScale;
|
||||
interp->main.AsInterp()->widthOffset = xOffset;
|
||||
interp->main.AsInterp()->heightOffset = yOffset;
|
||||
interp->main.AsInterp()->alignCorners = false;
|
||||
interp->main.AsInterp()->resizeType = resizeType;
|
||||
auto cmd = GeometryComputerUtils::makeCommand(interp.get(), {cropValue.get()}, {resizeValue.get()});
|
||||
res.extras.emplace_back(resizeValue);
|
||||
res.command.emplace_back(cmd);
|
||||
}
|
||||
// convert resize image's format from NC4HW4 to NHWC, add it to output's batch
|
||||
{
|
||||
Tensor::InsideDescribe::Region region;
|
||||
region.origin = resizeValue.get();
|
||||
region.size[1] = cropHeight * cropWidth;
|
||||
region.size[2] = depth;
|
||||
region.src.offset = 0;
|
||||
region.dst.offset = i * cropHeight * cropWidth * depth;
|
||||
region.src.stride[1] = 1;
|
||||
region.src.stride[2] = cropHeight * cropWidth;
|
||||
region.dst.stride[1] = depth;
|
||||
region.dst.stride[2] = 1;
|
||||
des->regions.emplace_back(std::move(region));
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
|
||||
const std::vector<Tensor*>& outputs) const override {
|
||||
//return {false};
|
||||
return {true};
|
||||
}
|
||||
};
|
||||
|
||||
static void _create() {
|
||||
std::shared_ptr<GeometryComputer> comp(new GeometryCropAndResize);
|
||||
// GeometryComputer::registerGeometryComputer(comp, {OpType_CropAndResize});
|
||||
}
|
||||
|
||||
REGISTER_GEOMETRY(GeometryCropAndResize, _create);
|
||||
|
||||
} // namespace MNN
|
|
@ -0,0 +1,304 @@
|
|||
//
|
||||
// GeometryGather.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2020/06/09.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include "geometry/GeometryComputer.hpp"
|
||||
#include "core/OpCommonUtils.hpp"
|
||||
namespace MNN {
|
||||
|
||||
class GeometryGather : public DefaultGeometryComputer {
|
||||
public:
|
||||
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
|
||||
const std::vector<Tensor*>& outputs) const override {
|
||||
MNN_ASSERT(inputs.size() == 2);
|
||||
MNN_ASSERT(1 == outputs.size());
|
||||
auto embedding = inputs[0];
|
||||
auto indices = inputs[1];
|
||||
auto output = outputs[0];
|
||||
|
||||
const int firstDimStride = embedding->buffer().dim[0].stride;
|
||||
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && firstDimStride != 0) {
|
||||
std::vector<bool> res(outputs.size(), true);
|
||||
return res;
|
||||
}
|
||||
return std::vector<bool>(outputs.size(), false);
|
||||
}
|
||||
|
||||
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
||||
Context& context, CommandBuffer& res) const override {
|
||||
MNN_ASSERT(2 == inputs.size());
|
||||
MNN_ASSERT(1 == outputs.size());
|
||||
auto embedding = inputs[0];
|
||||
auto indices = inputs[1];
|
||||
auto output = outputs[0];
|
||||
|
||||
const int firstDimStride = embedding->buffer().dim[0].stride;
|
||||
if (TensorUtils::getDescribe(indices)->usage != MNN::Tensor::InsideDescribe::CONSTANT || firstDimStride == 0) {
|
||||
Command cmd;
|
||||
cmd.op = op;
|
||||
cmd.inputs = std::move(inputs);
|
||||
cmd.outputs = std::move(outputs);
|
||||
res.command.emplace_back(std::move(cmd));
|
||||
return true;
|
||||
}
|
||||
|
||||
auto bytes = embedding->buffer().type.bytes();
|
||||
|
||||
const size_t indicesCount = indices->elementSize();
|
||||
const auto limit = embedding->length(0);
|
||||
const int* indicesData = indices->host<int32_t>();
|
||||
|
||||
auto outputDes = TensorUtils::getDescribe(output);
|
||||
outputDes->regions.clear();
|
||||
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||
for (int i = 0; i < indicesCount; i++) {
|
||||
if (indicesData[i] < 0 || indicesData[i] > limit) {
|
||||
MNN_PRINT("Gather indice error\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
Tensor::InsideDescribe::Region slice;
|
||||
slice.origin = embedding;
|
||||
slice.size[0] = 1;
|
||||
slice.size[1] = 1;
|
||||
slice.size[2] = firstDimStride;
|
||||
slice.src.offset = firstDimStride * indicesData[i];
|
||||
slice.dst.offset = i * firstDimStride;
|
||||
slice.src.stride[0] = 1;
|
||||
slice.src.stride[1] = 1;
|
||||
slice.src.stride[2] = 1;
|
||||
slice.dst.stride[0] = 1;
|
||||
slice.dst.stride[1] = 1;
|
||||
slice.dst.stride[2] = 1;
|
||||
outputDes->regions.emplace_back(std::move(slice));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class GeometryGatherND : public DefaultGeometryComputer {
|
||||
public:
|
||||
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
|
||||
const std::vector<Tensor*>& outputs) const override {
|
||||
MNN_ASSERT(inputs.size() == 2);
|
||||
MNN_ASSERT(1 == outputs.size());
|
||||
auto params = inputs[0];
|
||||
auto indices = inputs[1];
|
||||
auto output = outputs[0];
|
||||
|
||||
int mSliceN = 1;
|
||||
int mSliceSize = 1;
|
||||
for (int i = 0; i < indices->dimensions() - 1; ++i) {
|
||||
mSliceN *= indices->length(i);
|
||||
}
|
||||
auto indiceNd = indices->length(indices->dimensions() - 1);
|
||||
std::vector<int> mDimsToCount;
|
||||
mDimsToCount.resize(indiceNd);
|
||||
for (int i = indiceNd; i < params->dimensions(); ++i) {
|
||||
mSliceSize *= params->length(i);
|
||||
}
|
||||
|
||||
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && mSliceSize != 0) {
|
||||
std::vector<bool> res(outputs.size(), true);
|
||||
return res;
|
||||
} else {
|
||||
std::vector<bool> res(outputs.size(), false);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
||||
Context& context, CommandBuffer& res) const override {
|
||||
MNN_ASSERT(2 == inputs.size());
|
||||
MNN_ASSERT(1 == outputs.size());
|
||||
auto params = inputs[0];
|
||||
auto indice = inputs[1];
|
||||
auto output = outputs[0];
|
||||
|
||||
int mSliceN = 1;
|
||||
int mSliceSize = 1;
|
||||
for (int i = 0; i < indice->dimensions() - 1; ++i) {
|
||||
mSliceN *= indice->length(i);
|
||||
}
|
||||
auto indiceNd = indice->length(indice->dimensions() - 1);
|
||||
std::vector<int> mDimsToCount;
|
||||
mDimsToCount.resize(indiceNd);
|
||||
for (int i = indiceNd; i < params->dimensions(); ++i) {
|
||||
mSliceSize *= params->length(i);
|
||||
}
|
||||
|
||||
if (TensorUtils::getDescribe(indice)->usage != MNN::Tensor::InsideDescribe::CONSTANT || mSliceSize == 0) {
|
||||
Command cmd;
|
||||
cmd.op = op;
|
||||
cmd.inputs = std::move(inputs);
|
||||
cmd.outputs = std::move(outputs);
|
||||
res.command.emplace_back(std::move(cmd));
|
||||
return true;
|
||||
}
|
||||
|
||||
auto paramSize = params->elementSize();
|
||||
for (int i = 0; i < indiceNd; ++i) {
|
||||
mDimsToCount[i] = paramSize / params->length(i);
|
||||
paramSize = mDimsToCount[i];
|
||||
}
|
||||
mDimsToCount.resize(indiceNd);
|
||||
auto indiceData = indice->host<int32_t>();
|
||||
|
||||
auto outputDes = TensorUtils::getDescribe(output);
|
||||
outputDes->regions.clear();
|
||||
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||
for (int i = 0; i < mSliceN; i++) {
|
||||
int fromPos = 0;
|
||||
for (int j = 0; j < indiceNd; ++j) {
|
||||
fromPos += mDimsToCount[j] * indiceData[i * indiceNd + j];
|
||||
}
|
||||
|
||||
Tensor::InsideDescribe::Region slice;
|
||||
slice.origin = params;
|
||||
slice.size[0] = 1;
|
||||
slice.size[1] = 1;
|
||||
slice.size[2] = mSliceSize;
|
||||
slice.src.offset = fromPos;
|
||||
slice.dst.offset = i * mSliceSize;
|
||||
slice.src.stride[0] = 1;
|
||||
slice.src.stride[1] = 1;
|
||||
slice.src.stride[2] = 1;
|
||||
slice.dst.stride[0] = 1;
|
||||
slice.dst.stride[1] = 1;
|
||||
slice.dst.stride[2] = 1;
|
||||
outputDes->regions.emplace_back(std::move(slice));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class GeometryGatherV2 : public DefaultGeometryComputer {
|
||||
public:
|
||||
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
|
||||
const std::vector<Tensor*>& outputs) const override {
|
||||
MNN_ASSERT(inputs.size() >= 2);
|
||||
MNN_ASSERT(1 == outputs.size());
|
||||
auto params = inputs[0];
|
||||
auto indices = inputs[1];
|
||||
auto output = outputs[0];
|
||||
|
||||
int axis = 0;
|
||||
if (inputs.size() == 3) {
|
||||
const Tensor* axisTensor = inputs[2];
|
||||
axis = axisTensor->host<int32_t>()[0];
|
||||
}
|
||||
|
||||
MNN_ASSERT(axis > -params->buffer().dimensions && axis < params->buffer().dimensions);
|
||||
|
||||
if (axis < 0) {
|
||||
axis = params->buffer().dimensions + axis;
|
||||
}
|
||||
const int gatherDimSize = params->buffer().dim[axis].extent;
|
||||
const int N = indices->elementSize();
|
||||
MNN_ASSERT(gatherDimSize <= std::numeric_limits<int32_t>::max());
|
||||
|
||||
int inside = 1;
|
||||
for (int i = axis + 1; i < params->dimensions(); ++i) {
|
||||
inside *= params->length(i);
|
||||
}
|
||||
|
||||
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && inside != 0) {
|
||||
std::vector<bool> res(outputs.size(), true);
|
||||
return res;
|
||||
}
|
||||
return std::vector<bool>(outputs.size(), false);
|
||||
}
|
||||
|
||||
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
||||
Context& context, CommandBuffer& res) const override {
|
||||
MNN_ASSERT(inputs.size() >= 2);
|
||||
MNN_ASSERT(1 == outputs.size());
|
||||
auto params = inputs[0];
|
||||
auto indices = inputs[1];
|
||||
auto output = outputs[0];
|
||||
|
||||
int axis = 0;
|
||||
if (inputs.size() == 3) {
|
||||
const Tensor* axisTensor = inputs[2];
|
||||
axis = axisTensor->host<int32_t>()[0];
|
||||
}
|
||||
MNN_ASSERT(axis > -params->buffer().dimensions && axis < params->buffer().dimensions);
|
||||
|
||||
if (axis < 0) {
|
||||
axis = params->buffer().dimensions + axis;
|
||||
}
|
||||
const int gatherDimSize = params->buffer().dim[axis].extent;
|
||||
const int N = indices->elementSize();
|
||||
MNN_ASSERT(gatherDimSize <= std::numeric_limits<int32_t>::max());
|
||||
|
||||
int inside = 1;
|
||||
int outside = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
outside *= params->length(i);
|
||||
}
|
||||
for (int i = axis + 1; i < params->dimensions(); ++i) {
|
||||
inside *= params->length(i);
|
||||
}
|
||||
|
||||
if (TensorUtils::getDescribe(indices)->usage != MNN::Tensor::InsideDescribe::CONSTANT || inside == 0) {
|
||||
Command cmd;
|
||||
cmd.op = op;
|
||||
cmd.inputs = std::move(inputs);
|
||||
cmd.outputs = std::move(outputs);
|
||||
res.command.emplace_back(std::move(cmd));
|
||||
return true;
|
||||
}
|
||||
|
||||
const int limit = params->length(axis);
|
||||
auto bytes = output->buffer().type.bytes();
|
||||
const int insideStride = inside;
|
||||
const int outputOutsideStride = inside * N;
|
||||
const int inputOutsideStride = inside * inputs[0]->length(axis);
|
||||
const int* indicesPtr = indices->host<int32_t>();
|
||||
|
||||
auto outputDes = TensorUtils::getDescribe(output);
|
||||
outputDes->regions.clear();
|
||||
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||
for (int o = 0; o < outside; ++o) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
if (indicesPtr[i] < 0 || indicesPtr[i] > limit) {
|
||||
continue;
|
||||
}
|
||||
Tensor::InsideDescribe::Region slice;
|
||||
slice.origin = params;
|
||||
slice.size[0] = 1;
|
||||
slice.size[1] = 1;
|
||||
slice.size[2] = insideStride;
|
||||
slice.src.offset = inputOutsideStride * o + insideStride * indicesPtr[i];
|
||||
slice.dst.offset = outputOutsideStride * o + i * insideStride;
|
||||
slice.src.stride[0] = 1;
|
||||
slice.src.stride[1] = 1;
|
||||
slice.src.stride[2] = 1;
|
||||
slice.dst.stride[0] = 1;
|
||||
slice.dst.stride[1] = 1;
|
||||
slice.dst.stride[2] = 1;
|
||||
outputDes->regions.emplace_back(std::move(slice));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
static void _create() {
|
||||
// std::shared_ptr<GeometryComputer> comp(new GeometryGather);
|
||||
// GeometryComputer::registerGeometryComputer(comp, {OpType_Gather});
|
||||
//
|
||||
// std::shared_ptr<GeometryComputer> comp2(new GeometryGatherND);
|
||||
// GeometryComputer::registerGeometryComputer(comp2, {OpType_GatherND});
|
||||
//
|
||||
// std::shared_ptr<GeometryComputer> comp3(new GeometryGatherV2);
|
||||
// GeometryComputer::registerGeometryComputer(comp3, {OpType_GatherV2});
|
||||
}
|
||||
|
||||
REGISTER_GEOMETRY(GeometryGather, _create);
|
||||
|
||||
} // namespace MNN
|
|
@ -0,0 +1,214 @@
|
|||
//
|
||||
// GeometrySoftmax.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2020/06/28.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include "geometry/GeometryComputer.hpp"
|
||||
#include "core/OpCommonUtils.hpp"
|
||||
#include "geometry/GeometryComputerUtils.hpp"
|
||||
|
||||
namespace MNN {
|
||||
class GeometrySoftmax : public GeometryComputer {
|
||||
public:
|
||||
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
|
||||
const std::vector<Tensor*>& outputs) const override {
|
||||
auto axis = op->main_as_Axis()->axis();
|
||||
if (axis < 0) {
|
||||
axis = inputs[0]->dimensions() + axis;
|
||||
}
|
||||
|
||||
if (axis == 1) {
|
||||
return std::vector<bool>(outputs.size(), false);
|
||||
}
|
||||
return std::vector<bool>(outputs.size(), true);
|
||||
}
|
||||
|
||||
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs,
|
||||
const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& res) const override {
|
||||
MNN_ASSERT(1 == inputs.size());
|
||||
MNN_ASSERT(1 == outputs.size());
|
||||
|
||||
auto input = inputs[0];
|
||||
auto output = outputs[0];
|
||||
auto dims = input->buffer().dimensions;
|
||||
|
||||
auto axis = op->main_as_Axis()->axis();
|
||||
if (axis < 0) {
|
||||
axis = inputs[0]->dimensions() + axis;
|
||||
}
|
||||
|
||||
if (axis == 1) {
|
||||
Command cmd;
|
||||
cmd.op = op;
|
||||
cmd.inputs = std::move(inputs);
|
||||
cmd.outputs = std::move(outputs);
|
||||
res.command.emplace_back(std::move(cmd));
|
||||
return true;
|
||||
}
|
||||
|
||||
int inside = 1;
|
||||
int outside = 1;
|
||||
int channel = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
outside *= input->length(i);
|
||||
}
|
||||
channel = input->length(axis);
|
||||
for (int i = axis + 1; i < dims; ++i) {
|
||||
inside *= input->length(i);
|
||||
}
|
||||
|
||||
//input transform to NCHW format
|
||||
std::shared_ptr<Tensor> tmpInput;
|
||||
{
|
||||
tmpInput.reset(Tensor::createDevice<float>({outside, channel, inside}));
|
||||
auto outputDes = TensorUtils::getDescribe(tmpInput.get());
|
||||
outputDes->regions.clear();
|
||||
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||
|
||||
Tensor::InsideDescribe::Region desReg;
|
||||
desReg.size[0] = outside;
|
||||
desReg.size[1] = channel;
|
||||
desReg.size[2] = inside;
|
||||
desReg.dst.offset = 0;
|
||||
desReg.dst.stride[0] = channel*inside;
|
||||
desReg.dst.stride[1] = inside;
|
||||
desReg.dst.stride[2] = 1;
|
||||
desReg.src.offset = 0;
|
||||
desReg.src.stride[0] = channel*inside;
|
||||
desReg.src.stride[1] = inside;
|
||||
desReg.src.stride[2] = 1;
|
||||
desReg.origin = input;
|
||||
outputDes->regions.emplace_back(std::move(desReg));
|
||||
|
||||
res.extras.emplace_back(tmpInput);
|
||||
}
|
||||
|
||||
//reduction max, axis=1
|
||||
std::shared_ptr<Tensor> maxValue;
|
||||
{
|
||||
maxValue.reset(Tensor::createDevice<float>({outside, 1, inside}));
|
||||
res.extras.emplace_back(maxValue);
|
||||
res.command.emplace_back(GeometryComputerUtils::makeReduce(ReductionType_MAXIMUM, tmpInput.get(), maxValue.get()));
|
||||
}
|
||||
|
||||
//broadcast reduction axis dim
|
||||
std::shared_ptr<Tensor> maxBroadValue;
|
||||
{
|
||||
maxBroadValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
|
||||
auto outputDes = TensorUtils::getDescribe(maxBroadValue.get());
|
||||
outputDes->regions.clear();
|
||||
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||
|
||||
Tensor::InsideDescribe::Region desReg;
|
||||
desReg.size[0] = outside;
|
||||
desReg.size[1] = channel;
|
||||
desReg.size[2] = inside;
|
||||
desReg.dst.offset = 0;
|
||||
desReg.dst.stride[0] = channel*inside;
|
||||
desReg.dst.stride[1] = inside;
|
||||
desReg.dst.stride[2] = 1;
|
||||
desReg.src.offset = 0;
|
||||
desReg.src.stride[0] = inside;
|
||||
desReg.src.stride[1] = 0;
|
||||
desReg.src.stride[2] = 1;
|
||||
desReg.origin = maxValue.get();
|
||||
outputDes->regions.emplace_back(std::move(desReg));
|
||||
|
||||
res.extras.emplace_back(maxBroadValue);
|
||||
}
|
||||
|
||||
//sub
|
||||
std::shared_ptr<Tensor> subMaxValue;
|
||||
{
|
||||
subMaxValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
|
||||
auto cmd = GeometryComputerUtils::makeBinary(BinaryOpOperation_SUB, tmpInput.get(), maxBroadValue.get(), subMaxValue.get());
|
||||
res.extras.emplace_back(subMaxValue);
|
||||
res.command.emplace_back(std::move(cmd));
|
||||
}
|
||||
//exp
|
||||
std::shared_ptr<Tensor> expValue;
|
||||
{
|
||||
expValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
|
||||
auto cmd = GeometryComputerUtils::makeUnary(UnaryOpOperation_EXP, subMaxValue.get(), expValue.get());
|
||||
res.extras.emplace_back(expValue);
|
||||
res.command.emplace_back(std::move(cmd));
|
||||
|
||||
}
|
||||
|
||||
//reduction sum, axis=2, only support NCHW
|
||||
std::shared_ptr<Tensor> sumValue;
|
||||
{
|
||||
sumValue.reset(Tensor::createDevice<float>({outside, 1, inside}));
|
||||
res.extras.emplace_back(sumValue);
|
||||
res.command.emplace_back(GeometryComputerUtils::makeReduce(ReductionType_SUM, expValue.get(), sumValue.get()));
|
||||
}
|
||||
|
||||
//broadcast reduction axis dim
|
||||
std::shared_ptr<Tensor> sumBroadValue;
|
||||
{
|
||||
sumBroadValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
|
||||
auto outputDes = TensorUtils::getDescribe(sumBroadValue.get());
|
||||
outputDes->regions.clear();
|
||||
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||
|
||||
Tensor::InsideDescribe::Region desReg;
|
||||
desReg.size[0] = outside;
|
||||
desReg.size[1] = channel;
|
||||
desReg.size[2] = inside;
|
||||
desReg.dst.offset = 0;
|
||||
desReg.dst.stride[0] = channel*inside;
|
||||
desReg.dst.stride[1] = inside;
|
||||
desReg.dst.stride[2] = 1;
|
||||
desReg.src.offset = 0;
|
||||
desReg.src.stride[0] = inside;
|
||||
desReg.src.stride[1] = 0;
|
||||
desReg.src.stride[2] = 1;
|
||||
desReg.origin = sumValue.get();
|
||||
outputDes->regions.emplace_back(std::move(desReg));
|
||||
|
||||
res.extras.emplace_back(sumBroadValue);
|
||||
}
|
||||
|
||||
//div
|
||||
std::shared_ptr<Tensor> tmpOutput;
|
||||
{
|
||||
tmpOutput.reset(Tensor::createDevice<float>({outside, channel, inside}));
|
||||
auto cmd = GeometryComputerUtils::makeBinary(BinaryOpOperation_REALDIV, expValue.get(), sumBroadValue.get(), tmpOutput.get());
|
||||
res.extras.emplace_back(tmpOutput);
|
||||
res.command.emplace_back(std::move(cmd));
|
||||
}
|
||||
|
||||
//transform to output
|
||||
{
|
||||
auto outputDes = TensorUtils::getDescribe(output);
|
||||
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||
Tensor::InsideDescribe::Region desReg;
|
||||
desReg.size[0] = outside;
|
||||
desReg.size[1] = channel;
|
||||
desReg.size[2] = inside;
|
||||
desReg.dst.offset = 0;
|
||||
desReg.dst.stride[0] = channel*inside;
|
||||
desReg.dst.stride[1] = inside;
|
||||
desReg.dst.stride[2] = 1;
|
||||
desReg.src.offset = 0;
|
||||
desReg.src.stride[0] = channel*inside;
|
||||
desReg.src.stride[1] = inside;
|
||||
desReg.src.stride[2] = 1;
|
||||
desReg.origin = tmpOutput.get();
|
||||
outputDes->regions.emplace_back(std::move(desReg));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
static void _create() {
|
||||
// std::shared_ptr<GeometryComputer> comp(new GeometrySoftmax);
|
||||
// GeometryComputer::registerGeometryComputer(comp, {OpType_Softmax});
|
||||
}
|
||||
|
||||
REGISTER_GEOMETRY(GeometrySoftmax, _create);
|
||||
|
||||
} // namespace MNN
|
|
@ -7,7 +7,7 @@ add_executable(benchmarkExprModels.out ${CMAKE_CURRENT_LIST_DIR}/benchmarkExprMo
|
|||
target_include_directories(benchmarkExprModels.out PRIVATE "${CMAKE_CURRENT_LIST_DIR}/exprModels" ${CMAKE_CURRENT_SOURCE_DIR}/)
|
||||
target_link_libraries(benchmarkExprModels.out ${MNN_DEPS})
|
||||
|
||||
if ((MSVC OR WIN32) AND NOT MNN_BUILD_SHARED_LIBS)
|
||||
if (MSVC AND NOT MNN_BUILD_SHARED_LIBS)
|
||||
foreach (DEPEND ${MNN_DEPS})
|
||||
target_link_options(benchmark.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>)
|
||||
target_link_options(benchmarkExprModels.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>)
|
||||
|
|
|
@ -124,6 +124,7 @@ std::vector<float> doBench(Model& model, int loop, int warmup = 10, int forward
|
|||
const auto bufferSize = revertor->getBufferSize();
|
||||
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromBuffer(modelBuffer, bufferSize));
|
||||
revertor.reset();
|
||||
net->setSessionMode(MNN::Interpreter::Session_Release);
|
||||
MNN::ScheduleConfig config;
|
||||
config.numThread = numberThread;
|
||||
config.type = static_cast<MNNForwardType>(forward);
|
||||
|
|
|
@ -90,6 +90,7 @@ static std::vector<float> runNet(VARP netOutput, const ScheduleConfig& config, i
|
|||
const void* buf = builder.GetBufferPointer();
|
||||
size_t size = builder.GetSize();
|
||||
std::unique_ptr<Interpreter> net(Interpreter::createFromBuffer(buf, size));
|
||||
net->setSessionMode(MNN::Interpreter::Session_Release);
|
||||
auto session = net->createSession(config);
|
||||
net->releaseModel();
|
||||
auto inputTensor = net->getSessionInput(session, NULL);
|
||||
|
|
|
@ -1,84 +0,0 @@
|
|||
import os
|
||||
import sys
|
||||
major_py_ver = sys.version_info.major
|
||||
|
||||
def convert_string_to_hex_list(code_str):
|
||||
hex_list = []
|
||||
for i in range(len(code_str)):
|
||||
hex_ = hex(ord(code_str[i]))
|
||||
hex_list.append(hex_)
|
||||
return hex_list
|
||||
|
||||
def opencl_codegen():
|
||||
cl_kernel_dir = sys.argv[1]
|
||||
output_path = sys.argv[2]
|
||||
print("Generating OpenCL Kernels in "+cl_kernel_dir+" to "+output_path)
|
||||
if not os.path.exists(cl_kernel_dir):
|
||||
print(cl_kernel_dir + " doesn't exist!")
|
||||
|
||||
#common.h
|
||||
common_header_code = ""
|
||||
#quantized_common.h
|
||||
quantized_common_header_code = ""
|
||||
#activation_common.h
|
||||
activation_common_header_code = ""
|
||||
for file_name in os.listdir(cl_kernel_dir):
|
||||
file_path = os.path.join(cl_kernel_dir, file_name)
|
||||
if file_path[-2:] == ".h" and file_name[:-2] == "quantized_common":
|
||||
with open(file_path, "r") as f:
|
||||
quantized_common_header_code += f.read()
|
||||
elif file_path[-2:] == ".h" and file_name[:-2] == "activation_common":
|
||||
with open(file_path, "r") as f:
|
||||
activation_common_header_code += f.read()
|
||||
|
||||
opencl_code_maps = {}
|
||||
for file_name in os.listdir(cl_kernel_dir):
|
||||
file_path = os.path.join(cl_kernel_dir, file_name)
|
||||
if file_path[-3:] == ".cl":
|
||||
with open(file_path, "r") as f:
|
||||
code_str = ""
|
||||
for line in f.readlines():
|
||||
if "#include <activation_common.h>" in line:
|
||||
code_str += common_header_code
|
||||
code_str += activation_common_header_code
|
||||
elif "#include <quantized_common.h>" in line:
|
||||
code_str += common_header_code
|
||||
code_str += quantized_common_header_code
|
||||
elif "#include <common.h>" in line:
|
||||
code_str += common_header_code
|
||||
else:
|
||||
code_str += line
|
||||
opencl_code_maps[file_name[:-3]] = convert_string_to_hex_list(code_str)
|
||||
|
||||
#source model
|
||||
opencl_source_map = "#include <map> \n"
|
||||
opencl_source_map += "#include <string> \n"
|
||||
opencl_source_map += "#include <vector> \n"
|
||||
opencl_source_map += "namespace MNN { \n"
|
||||
opencl_source_map += "extern const std::map<std::string, std::vector<unsigned char>> OpenCLProgramMap = \n { \n"
|
||||
|
||||
if major_py_ver == 2:
|
||||
items = opencl_code_maps.iteritems()
|
||||
else:
|
||||
items = opencl_code_maps.items()
|
||||
for file_name, file_source in items:
|
||||
opencl_source_map += "{\n \""
|
||||
opencl_source_map += file_name
|
||||
opencl_source_map += "\", \n"
|
||||
opencl_source_map += " { "
|
||||
for source_hex in file_source:
|
||||
opencl_source_map += source_hex
|
||||
opencl_source_map += ","
|
||||
opencl_source_map += " } "
|
||||
opencl_source_map += "\n }, \n"
|
||||
|
||||
opencl_source_map += " }; \n"
|
||||
opencl_source_map += "} \n"
|
||||
|
||||
with open(output_path, "w") as w_file:
|
||||
w_file.write(opencl_source_map)
|
||||
|
||||
print("Generate OpenCL Source done !!! \n")
|
||||
|
||||
if __name__ == '__main__':
|
||||
opencl_codegen()
|
|
@ -0,0 +1,140 @@
|
|||
#!/usr/bin/env bash
|
||||
. ./parse_options.sh || exit 1;
|
||||
|
||||
CMAKE=cmake
|
||||
MAKE=make
|
||||
ANDROID_NDK=/home/android-ndk-r18b
|
||||
|
||||
BUILD_ROOT=`pwd`
|
||||
|
||||
# Clean the exist directory other than remove it in order to solve
|
||||
# the problem "Current working directory cannot be established".
|
||||
function make_or_clean_dir {
|
||||
if [ -d $1 ]; then
|
||||
rm -rf $1/*
|
||||
else
|
||||
mkdir $1
|
||||
fi
|
||||
}
|
||||
|
||||
function build_arm_android_32 {
|
||||
make_or_clean_dir build_arm_android_32 && cd build_arm_android_32
|
||||
$CMAKE ../.. \
|
||||
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DANDROID_ABI="armeabi-v7a" \
|
||||
-DANDROID_STL=c++_static \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DANDROID_NATIVE_API_LEVEL=android-21 \
|
||||
-DANDROID_TOOLCHAIN=clang \
|
||||
-DMNN_USE_LOGCAT=true \
|
||||
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
|
||||
-DNATIVE_LIBRARY_OUTPUT=. \
|
||||
-DNATIVE_INCLUDE_OUTPUT=. \
|
||||
-DMNN_VULKAN=$USE_VULKAN \
|
||||
-DMNN_OPENCL=$USE_OPENCL \
|
||||
-DMNN_OPENGL=$USE_OPENGL \
|
||||
-DMNN_USE_THREAD_POOL=$USE_THREAD_POOL || exit 1;
|
||||
$MAKE -j $build_threads || exit 1;
|
||||
cd $BUILD_ROOT; true;
|
||||
}
|
||||
|
||||
function build_arm_android_64 {
|
||||
make_or_clean_dir build_arm_android_64 && cd build_arm_android_64
|
||||
$CMAKE ../.. \
|
||||
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DANDROID_ABI="arm64-v8a" \
|
||||
-DANDROID_STL=c++_static \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DANDROID_NATIVE_API_LEVEL=android-21 \
|
||||
-DANDROID_TOOLCHAIN=clang \
|
||||
-DMNN_USE_LOGCAT=true \
|
||||
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
|
||||
-DNATIVE_LIBRARY_OUTPUT=. \
|
||||
-DNATIVE_INCLUDE_OUTPUT=. \
|
||||
-DMNN_ARM82=ON \
|
||||
-DMNN_VULKAN=$USE_VULKAN \
|
||||
-DMNN_OPENCL=$USE_OPENCL \
|
||||
-DMNN_OPENGL=$USE_OPENGL \
|
||||
-DMNN_USE_THREAD_POOL=$USE_THREAD_POOL || exit 1;
|
||||
$MAKE -j $build_threads || exit 1;
|
||||
cd $BUILD_ROOT; true;
|
||||
}
|
||||
|
||||
function build_arm_linux_32 {
|
||||
cd $BUILD_ROOT; true;
|
||||
}
|
||||
|
||||
function build_arm_linux_64 {
|
||||
cd $BUILD_ROOT; true;
|
||||
}
|
||||
|
||||
function build_x86_linux {
|
||||
make_or_clean_dir build_x86_linux && cd build_x86_linux
|
||||
$CMAKE ../.. \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DMNN_BUILD_TRAIN=ON \
|
||||
-DMNN_SEP_BUILD=OFF \
|
||||
-DMNN_BUILD_DEMO=ON \
|
||||
-DMNN_BUILD_QUANTOOLS=ON \
|
||||
-DMNN_EVALUATION=ON \
|
||||
-DMNN_BUILD_CONVERTER=ON \
|
||||
-DMNN_SUPPORT_TFLITE_QUAN=ON \
|
||||
-DMNN_BUILD_TEST=ON \
|
||||
-DMNN_OPENCL=$USE_OPENCL \
|
||||
-DMNN_VULKAN=$USE_VULKAN \
|
||||
-DMNN_OPENMP=$USE_OPENMP \
|
||||
-DMNN_USE_THREAD_POOL=OFF \
|
||||
-DMNN_BUILD_BENCHMARK=ON || exit 1;
|
||||
$MAKE -j $build_threads || exit 1;
|
||||
cd $BUILD_ROOT; true;
|
||||
}
|
||||
|
||||
function build_all {
|
||||
build_arm_android_32 || exit 1;
|
||||
build_arm_android_64 || exit 1;
|
||||
build_arm_linux_32 || exit 1;
|
||||
build_arm_linux_64 || exit 1;
|
||||
build_x86_linux || exit 1;
|
||||
true;
|
||||
}
|
||||
|
||||
function clean {
|
||||
rm -rf build_arm_android_32
|
||||
rm -rf build_arm_android_64
|
||||
rm -rf build_arm_linux_32
|
||||
rm -rf build_arm_linux_64
|
||||
rm -rf build_x86_linux
|
||||
}
|
||||
|
||||
function build {
|
||||
case $platform in
|
||||
"arm_linux_32")
|
||||
build_arm_linux_32 || exit 1;
|
||||
;;
|
||||
"arm_linux_64")
|
||||
build_arm_linux_64 || exit 1;
|
||||
;;
|
||||
"x86_linux")
|
||||
build_x86_linux || exit 1;
|
||||
;;
|
||||
"arm_android_32")
|
||||
build_arm_android_32 || exit 1;
|
||||
;;
|
||||
"arm_android_64")
|
||||
build_arm_android_64 || exit 1;
|
||||
;;
|
||||
"all")
|
||||
build_all || exit 1;
|
||||
;;
|
||||
*) echo "Invalid platform: $platform" && exit 1;
|
||||
esac
|
||||
}
|
||||
|
||||
if [ $clean == 1 ]; then
|
||||
clean
|
||||
else
|
||||
build $@
|
||||
fi
|
||||
true;
|
|
@ -0,0 +1,113 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Valid platform:
|
||||
# - arm_android_32
|
||||
# - arm_android_64
|
||||
# - arm_linux_32
|
||||
# - arm_linux_64
|
||||
# - x86_linux
|
||||
platform="all"
|
||||
|
||||
# Option to build with opencl.
|
||||
use_opencl=0
|
||||
|
||||
# Option to build with opengl.
|
||||
use_opengl=0
|
||||
|
||||
# Option to build with vulkan.
|
||||
use_vulkan=0
|
||||
|
||||
# Option to build with openmp multithreads library.
|
||||
use_openmp=0
|
||||
|
||||
build_threads=1
|
||||
|
||||
# Option to clear the build history.
|
||||
clean=0
|
||||
|
||||
USE_OPENCL=OFF
|
||||
USE_VULKAN=OFF
|
||||
USE_OPENGL=OFF
|
||||
USE_OPENMP=OFF
|
||||
USE_THREAD_POOL=ON
|
||||
|
||||
function print_usage {
|
||||
echo -e "Usgae: ./build.sh"
|
||||
echo -e " --platform=x: Specify build platform x. "
|
||||
echo -e " All valid platforms are \"arm_android_32\", \"arm_android_64\",
|
||||
\"arm_linux_32\", \"arm_linux_64\", \"x86_linux\", \"all\"."
|
||||
echo -e " The default is \"all\"."
|
||||
echo -e " --use_openmp=true|false: Build with openmp or not."
|
||||
echo -e " The default is false."
|
||||
echo -e " --use_opencl=true|false: Build with opencl or not."
|
||||
echo -e " The default is false."
|
||||
echo -e " --use_opengl=true|false: Build with opengl or not."
|
||||
echo -e " The default is false."
|
||||
echo -e " --use_vulkan=true|false: Build with vulkan or not."
|
||||
echo -e " The default is false."
|
||||
echo -e " --job=n: Build with n threads. Default is 1."
|
||||
}
|
||||
|
||||
function parse_platform {
|
||||
platform=`echo "$1" | awk -F '=' '{print $2}'`
|
||||
}
|
||||
|
||||
function parse_nthreads {
|
||||
build_threads=`echo "$1" | awk -F '=' '{print $2}'`
|
||||
}
|
||||
|
||||
function parse_bool {
|
||||
val=`echo "$1" | awk -F '=' '{print $2}'`
|
||||
if [ $val == "true" ] || [ $val == "1" ]; then
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
fi
|
||||
}
|
||||
|
||||
[ -z "${1:-}" ] && print_usage && exit 1;
|
||||
|
||||
while true; do
|
||||
[ -z "${1:-}" ] && break;
|
||||
case "$1" in
|
||||
--platform=*) parse_platform "$1"; shift 1;
|
||||
;;
|
||||
--use_openmp=*) parse_bool "$1"; use_openmp=$?; shift 1;
|
||||
;;
|
||||
--use_openmp) use_openmp=true; shift 1;
|
||||
;;
|
||||
--use_opencl=*) parse_bool "$1"; use_opencl=$?; shift 1;
|
||||
;;
|
||||
--use_opencl) use_opencl=true; shift 1;
|
||||
;;
|
||||
--use_opengl=*) parse_bool "$1"; use_opengl=$?; shift 1;
|
||||
;;
|
||||
--use_opengl) use_opengl=true; shift 1;
|
||||
;;
|
||||
--use_vulkan=*) parse_bool "$1"; use_vulkan=$?; shift 1;
|
||||
;;
|
||||
--use_vulkan) use_vulkan=true; shift 1;
|
||||
;;
|
||||
--job=*) parse_nthreads "$1"; shift 1;
|
||||
;;
|
||||
clean) clean=1; shift 1;
|
||||
;;
|
||||
*) break;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $use_opencl == 1 ]; then
|
||||
USE_OPENCL=ON
|
||||
fi
|
||||
if [ $use_opengl == 1 ]; then
|
||||
USE_OPENGL=ON
|
||||
fi
|
||||
if [ $use_vulkan == 1 ]; then
|
||||
USE_VULKAN=ON
|
||||
fi
|
||||
if [ $use_openmp == 1 ]; then
|
||||
USE_OPENMP=ON
|
||||
USE_THREAD_POOL=OFF
|
||||
fi
|
||||
|
||||
true;
|
|
@ -0,0 +1,3 @@
|
|||
call "C:/Program Files (x86)/Microsoft Visual Studio/2017/BuildTools/VC/Auxiliary/Build/vcvars64.bat"
|
||||
cmake -G "Ninja" -DCMAKE_BUILD_TYPE=Release ..
|
||||
ninja
|
|
@ -0,0 +1,3 @@
|
|||
call "C:/Program Files (x86)/Microsoft Visual Studio/2017/BuildTools/VC/Auxiliary/Build/vcvars32.bat"
|
||||
cmake -G "Ninja" -DCMAKE_BUILD_TYPE=Release ..
|
||||
ninja
|
|
@ -12,3 +12,9 @@ target_link_libraries(segment.out ${MNN_DEPS})
|
|||
|
||||
add_executable(expressDemo.out ${CMAKE_CURRENT_LIST_DIR}/expressDemo.cpp)
|
||||
target_link_libraries(expressDemo.out ${MNN_DEPS})
|
||||
|
||||
add_executable(transformerDemo.out ${CMAKE_CURRENT_LIST_DIR}/transformerDemo.cpp)
|
||||
target_link_libraries(transformerDemo.out ${MNN_DEPS})
|
||||
|
||||
add_executable(rasterDemo.out ${CMAKE_CURRENT_LIST_DIR}/rasterDemo.cpp)
|
||||
target_link_libraries(rasterDemo.out ${MNN_DEPS})
|
||||
|
|
|
@ -53,7 +53,6 @@ int main(int argc, const char* argv[]) {
|
|||
MNN_ERROR("Output Not valid\n");
|
||||
return 0;
|
||||
}
|
||||
auto size = outputInfo->size;
|
||||
//Test Speed
|
||||
if (testTime > 0){
|
||||
//Let the frequence up
|
||||
|
@ -82,6 +81,7 @@ int main(int argc, const char* argv[]) {
|
|||
}
|
||||
|
||||
{
|
||||
auto size = outputInfo->size;
|
||||
auto outputPtr = output->readMap<float>();
|
||||
if (nullptr == outputPtr) {
|
||||
MNN_ERROR("Output Not valid read error\n");
|
||||
|
|
|
@ -0,0 +1,251 @@
|
|||
//
|
||||
// rasterDemo.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2020/10/14.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <MNN/MNNDefine.h>
|
||||
#include <MNN/Tensor.hpp>
|
||||
#include <MNN/Interpreter.hpp>
|
||||
#include "MNN_generated.h"
|
||||
#include "core/TensorUtils.hpp"
|
||||
#include "core/Execution.hpp"
|
||||
#include "core/Backend.hpp"
|
||||
#include "rapidjson/document.h"
|
||||
#include "rapidjson/stringbuffer.h"
|
||||
#include "rapidjson/writer.h"
|
||||
using namespace MNN;
|
||||
/*
|
||||
1.Raster will do the index mapping like below:
|
||||
|
||||
for (region : regions)
|
||||
src = region.src, dst = region.dst;
|
||||
for (i = 0 -> size[0])
|
||||
for (j = 0 -> size[1])
|
||||
for (k = 0 -> size[2])
|
||||
output[dst.offset + i * dst.stride[0] + j * dst.stride[1] + k * dst.stride[2]] =
|
||||
region.origion[src.offset + i * src.stride[0] + j * src.stride[1] + k * src.stride[2]];
|
||||
|
||||
2. Raster Op has a input and a output, but the input is not the real input tensor, it's a
|
||||
middle tensor whith VIRTUAL type that has many regions point to inputs tensors, like below.
|
||||
|
||||
input_0 --> region_0 --\
|
||||
\
|
||||
input_1 --> region_1 ---- middle ----> output
|
||||
/
|
||||
input_2 --> region_2 --/
|
||||
|
||||
3. This example read a json file and construct some Rasters and compute.
|
||||
The input json file format is as below:
|
||||
{
|
||||
"inputs" : [
|
||||
{
|
||||
"id" : int,
|
||||
"type" : "type_name", // float or int
|
||||
"dims" : [int],
|
||||
"data" : [int/float] // if null, fill with random number
|
||||
}
|
||||
],
|
||||
"outputs" : [
|
||||
// same with inputs
|
||||
],
|
||||
"regions" : [
|
||||
{
|
||||
"id" : int, // points to outputs
|
||||
"size" : [int],
|
||||
"src" : {
|
||||
"offset" : int,
|
||||
"stride" : [int]
|
||||
},
|
||||
"dst" : { // same with src },
|
||||
"origin" : int // point to inputs
|
||||
}
|
||||
]
|
||||
}
|
||||
*/
|
||||
|
||||
static std::string runRaster(std::string jsonString, int runNum) {
|
||||
srand(0);
|
||||
rapidjson::Document document;
|
||||
document.Parse(jsonString.c_str());
|
||||
if (document.HasParseError()) {
|
||||
MNN_ERROR("Invalid Json Format!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// prepare CPU backend
|
||||
ScheduleConfig config;
|
||||
config.type = MNN_FORWARD_CPU;
|
||||
BackendConfig backendConfig;
|
||||
backendConfig.precision = BackendConfig::Precision_High;
|
||||
config.backendConfig = &backendConfig;
|
||||
Backend::Info compute;
|
||||
compute.type = config.type;
|
||||
compute.numThread = config.numThread;
|
||||
compute.user = config.backendConfig;
|
||||
const RuntimeCreator* runtimeCreator(MNNGetExtraRuntimeCreator(compute.type));
|
||||
std::unique_ptr<Runtime> runtime(runtimeCreator->onCreate(compute));
|
||||
std::unique_ptr<Backend> backend(runtime->onCreate());
|
||||
|
||||
// build Op
|
||||
std::unique_ptr<OpT> opt(new OpT);
|
||||
opt->type = OpType_Raster;
|
||||
flatbuffers::FlatBufferBuilder builder(1024);
|
||||
builder.ForceDefaults(true);
|
||||
auto len = Op::Pack(builder, opt.get());
|
||||
builder.Finish(len);
|
||||
auto buffer = builder.GetBufferPointer();
|
||||
const Op* op = flatbuffers::GetMutableRoot<Op>(buffer);
|
||||
// build tensors (NCHW) from json
|
||||
std::vector<std::unique_ptr<Tensor>> inputs;
|
||||
std::vector<std::unique_ptr<Tensor>> outputs;
|
||||
auto readTensors = [&document, &backend](std::vector<std::unique_ptr<Tensor>>& tensors, const char* type) {
|
||||
if (document.HasMember(type)) {
|
||||
auto info = document[type].GetArray();
|
||||
tensors.resize(info.Size());
|
||||
for (auto iter = info.begin(); iter != info.end(); iter++) {
|
||||
auto obj = iter->GetObject();
|
||||
int id = obj["id"].GetInt();
|
||||
tensors[id].reset(new Tensor(4));
|
||||
auto tensor = tensors[id].get();
|
||||
auto dataType = obj["type"].GetString();
|
||||
bool isFloat = !strcmp(dataType, "float");
|
||||
tensor->setType(isFloat ? DataType_DT_FLOAT : DataType_DT_INT32);
|
||||
auto dims = obj["dims"].GetArray();
|
||||
for (auto d = dims.begin(); d != dims.end(); d++) {
|
||||
tensor->setLength(d - dims.begin(), d->GetInt());
|
||||
}
|
||||
TensorUtils::setLinearLayout(tensor);
|
||||
backend->onAcquireBuffer(tensor, Backend::STATIC);
|
||||
TensorUtils::getDescribe(tensor)->backend = backend.get();
|
||||
auto data = obj["data"].GetArray();
|
||||
if (!strcmp(type, "inputs")) {
|
||||
bool hasData = data.Size() == tensor->elementSize();
|
||||
auto dataIter = data.begin();
|
||||
for (int i = 0; i < tensor->elementSize(); i++, dataIter++) {
|
||||
if (isFloat) {
|
||||
tensor->host<float>()[i] = hasData ? dataIter->GetFloat() : rand() % 10 / 10.0;
|
||||
} else {
|
||||
tensor->host<int>()[i] = hasData ? dataIter->GetInt() : rand() % 10;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
readTensors(inputs, "inputs");
|
||||
readTensors(outputs, "outputs");
|
||||
|
||||
// build middle tensors' region info from json
|
||||
std::vector<std::unique_ptr<Tensor>> middles;
|
||||
middles.resize(outputs.size());
|
||||
if (document.HasMember("regions")) {
|
||||
auto info = document["regions"].GetArray();
|
||||
for (auto iter = info.begin(); iter != info.end(); iter++) {
|
||||
auto obj = iter->GetObject();
|
||||
int id = obj["id"].GetInt();
|
||||
if (middles[id] == nullptr) {
|
||||
middles[id].reset(new Tensor(4));
|
||||
}
|
||||
auto des = TensorUtils::getDescribe(middles[id].get());
|
||||
des->memoryType = MNN::Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||
Tensor::InsideDescribe::Region region;
|
||||
int origin = obj["origin"].GetInt();
|
||||
region.origin = inputs[origin].get();
|
||||
auto size = obj["size"].GetArray();
|
||||
auto src = obj["src"].GetObject();
|
||||
auto dst = obj["dst"].GetObject();
|
||||
auto srcStride = src["stride"].GetArray();
|
||||
auto dstStride = dst["stride"].GetArray();
|
||||
for (int i = 0; i < 3; i++) {
|
||||
region.size[i] = size[i].GetInt();
|
||||
region.src.stride[i] = srcStride[i].GetInt();
|
||||
region.dst.stride[i] = dstStride[i].GetInt();
|
||||
}
|
||||
region.src.offset = src["offset"].GetInt();
|
||||
region.dst.offset = dst["offset"].GetInt();
|
||||
des->regions.push_back(region);
|
||||
}
|
||||
}
|
||||
|
||||
// build execution of Raster and run them
|
||||
for (int i = 0; i < outputs.size(); i++) {
|
||||
std::vector<Tensor*> ins = {middles[i].get()}, outs = {outputs[i].get()};
|
||||
std::unique_ptr<Execution> exe(backend->onCreate(ins, outs, op));
|
||||
exe->onResize(ins, outs);
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
for (int j = 0; j < runNum; j++) {
|
||||
exe->onExecute(ins, outs);
|
||||
}
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
|
||||
double time = time_span.count() * 1000.0 / runNum;
|
||||
printf("For output_id = %d, run %d times, the average time is %f ms.\n", i, runNum, time);
|
||||
}
|
||||
|
||||
auto writeTensors = [&document](std::vector<std::unique_ptr<Tensor>>& tensors, const char* type) {
|
||||
auto info = document[type].GetArray();
|
||||
for (auto iter = info.begin(); iter != info.end(); iter++) {
|
||||
auto obj = iter->GetObject();
|
||||
int id = obj["id"].GetInt();
|
||||
auto data = obj["data"].GetArray();
|
||||
if (data.Size() == tensors[id]->elementSize()) {
|
||||
// has data, dont write
|
||||
return;
|
||||
}
|
||||
bool isFloat = !strcmp(obj["type"].GetString(), "float");
|
||||
data.Reserve(tensors[id]->elementSize(), document.GetAllocator());
|
||||
for (int i = 0; i < tensors[id]->elementSize(); i++) {
|
||||
if (isFloat) {
|
||||
data.PushBack(tensors[id]->host<float>()[i], document.GetAllocator());
|
||||
} else {
|
||||
data.PushBack(tensors[id]->host<int>()[i], document.GetAllocator());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
writeTensors(inputs, "inputs");
|
||||
writeTensors(outputs, "outputs");
|
||||
rapidjson::StringBuffer stringBuffer;
|
||||
rapidjson::Writer<rapidjson::StringBuffer> writer(stringBuffer);
|
||||
document.Accept(writer);
|
||||
return stringBuffer.GetString();
|
||||
}
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
if (argc < 2) {
|
||||
printf("Usage: ./rasterDemo.out input.json [output.json] [runNum]\ndefault output is input, and default runNum is 100.\n");
|
||||
return 0;
|
||||
}
|
||||
const char* inputFile = argv[1];
|
||||
const char* outputFile = argv[1];
|
||||
int runNum = 100;
|
||||
if (argc >= 3) {
|
||||
outputFile = argv[2];
|
||||
}
|
||||
if (argc >= 4) {
|
||||
runNum = ::atoi(argv[3]);
|
||||
}
|
||||
std::ifstream in(inputFile);
|
||||
if (in.fail()) {
|
||||
printf("Invalid input Json File!\n");
|
||||
return 0;
|
||||
}
|
||||
std::ofstream out(outputFile);
|
||||
if (out.fail()) {
|
||||
printf("Invalid output Json File!\n");
|
||||
return 0;
|
||||
}
|
||||
std::stringstream ss;
|
||||
ss << in.rdbuf();
|
||||
out << runRaster(ss.str(), runNum);
|
||||
out.close();
|
||||
printf("Run Raster Done!\n");
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
#include <MNN/expr/Module.hpp>
|
||||
#define MNN_OPEN_TIME_TRACE
|
||||
#include <MNN/AutoTime.hpp>
|
||||
#include <MNN/expr/ExprCreator.hpp>
|
||||
#include <MNN/expr/Executor.hpp>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
#include<string.h>
|
||||
using namespace MNN::Express;
|
||||
using namespace MNN;
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
if (argc < 2) {
|
||||
MNN_ERROR("Don't has model name\n");
|
||||
return 0;
|
||||
}
|
||||
BackendConfig config;
|
||||
//Executor::getGlobalExecutor()->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 4);
|
||||
auto modelName = argv[1];
|
||||
std::shared_ptr<Module> model;
|
||||
model.reset(Module::load({"NmtModel/Placeholder", "NmtModel/Placeholder_1"}, {"NmtModel/transpose_2"}, modelName));
|
||||
std::vector<int> input0 = {32,16,234,3215,61,135,29,10,24317,4661,4,0};
|
||||
std::vector<int> input1 = {1,1,1,1,1,1,1,1,1,1,1,1};
|
||||
auto first = _Input({1, (int)input0.size()}, NHWC, halide_type_of<int>());
|
||||
::memcpy(first->writeMap<int>(), input0.data(), input0.size() * sizeof(int));
|
||||
auto second = _Input({1, (int)input1.size()}, NHWC, halide_type_of<int>());
|
||||
::memcpy(second->writeMap<int>(), input1.data(), input1.size() * sizeof(int));
|
||||
std::vector<VARP> outputs;
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
{
|
||||
AUTOTIME;
|
||||
Executor::getGlobalExecutor()->resetProfile();
|
||||
outputs = model->onForward({first, second});
|
||||
Executor::getGlobalExecutor()->dumpProfile();
|
||||
}
|
||||
std::ostringstream fileNameOs;
|
||||
std::ostringstream dimInfo;
|
||||
fileNameOs << i << "_output.txt";
|
||||
auto info = outputs[0]->getInfo();
|
||||
for (int d=0; d<info->dim.size(); ++d) {
|
||||
dimInfo << info->dim[d] << "_";
|
||||
}
|
||||
auto fileName = fileNameOs.str();
|
||||
MNN_PRINT("Output Name: %s, Dim: %s\n", fileName.c_str(), dimInfo.str().c_str());
|
||||
auto ptr = outputs[0]->readMap<int>();
|
||||
std::ofstream outputOs(fileName.c_str());
|
||||
for (int i=0; i<info->size; ++i) {
|
||||
outputOs << ptr[i] << "\n";
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
AUTOTIME;
|
||||
outputs = model->onForward({first, second});
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -53,27 +53,23 @@ static int CompareElements(const LabeledElement *a, const LabeledElement *b) {
|
|||
if (!_net || !_session) {
|
||||
return nil;
|
||||
}
|
||||
MNN::Tensor *output = _net->getSessionOutput(_session, nullptr);
|
||||
MNN::Tensor copy(output);
|
||||
auto input = _net->getSessionInput(_session, nullptr);
|
||||
MNN::Tensor tensorCache(input);
|
||||
input->copyToHostTensor(&tensorCache);
|
||||
|
||||
// run
|
||||
NSTimeInterval begin = NSDate.timeIntervalSinceReferenceDate;
|
||||
// you should set input data for each inference
|
||||
if (cycles == 1) {
|
||||
_net->runSession(_session);
|
||||
} else {
|
||||
auto input = _net->getSessionInput(_session, nullptr);
|
||||
MNN::Tensor tensorCache(input);
|
||||
input->copyToHostTensor(&tensorCache);
|
||||
for (int i = 0; i < cycles; i++) {
|
||||
input->copyFromHostTensor(&tensorCache);
|
||||
_net->runSession(_session);
|
||||
}
|
||||
output->copyToHostTensor(©);
|
||||
}
|
||||
NSTimeInterval cost = NSDate.timeIntervalSinceReferenceDate - begin;
|
||||
|
||||
// result
|
||||
MNN::Tensor *output = _net->getSessionOutput(_session, nullptr);
|
||||
MNN::Tensor copy(output);
|
||||
output->copyToHostTensor(©);
|
||||
float *data = copy.host<float>();
|
||||
LabeledElement objects[1000];
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
|
|
|
@ -1,14 +1,21 @@
|
|||
file(GLOB_RECURSE MNN_EXPR_SRCS "${CMAKE_CURRENT_LIST_DIR}/*.cpp")
|
||||
file(GLOB_RECURSE MNN_EXPR_SRCS "${CMAKE_CURRENT_LIST_DIR}/*.*")
|
||||
option(MNN_EXPR_ENABLE_PROFILER "Support profile Expr's op cost" OFF)
|
||||
option(MNN_EXPR_SHAPE_EAGER "Force compute Expr's shape directly cost" OFF)
|
||||
IF (MNN_EXPR_ENABLE_PROFILER)
|
||||
add_definitions(-DMNN_EXPR_ENABLE_PROFILER)
|
||||
ENDIF()
|
||||
IF (MNN_EXPR_SHAPE_EAGER)
|
||||
add_definitions(-DMNN_EXPR_SHAPE_EAGER)
|
||||
ENDIF()
|
||||
IF(MNN_SEP_BUILD)
|
||||
if (MNN_BUILD_FOR_ANDROID_COMMAND)
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../")
|
||||
endif()
|
||||
add_library(MNN_Express SHARED ${MNN_EXPR_SRCS})
|
||||
target_link_libraries(MNN_Express MNN)
|
||||
if (MNN_BUILD_MINI)
|
||||
target_link_libraries(MNN_Express $<TARGET_OBJECTS:MNNTransform>)
|
||||
endif()
|
||||
ELSE()
|
||||
add_library(MNNExpress OBJECT ${MNN_EXPR_SRCS})
|
||||
ENDIF()
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
//
|
||||
// Distributions.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2019/11/28.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include "Distributions.hpp"
|
||||
#include <cmath>
|
||||
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
|
||||
void Distributions::uniform(const int count, const float min, const float max, float *r, std::mt19937 gen) {
|
||||
std::uniform_real_distribution<float> dis(min, std::nextafter(max, std::numeric_limits<float>::max()));
|
||||
for (int i = 0; i < count; i++) {
|
||||
r[i] = dis(gen);
|
||||
}
|
||||
}
|
||||
|
||||
void Distributions::gaussian(const int count, const float mu, const float sigma, float *r, std::mt19937 gen) {
|
||||
std::normal_distribution<float> dis(mu, sigma);
|
||||
for (int i = 0; i < count; i++) {
|
||||
r[i] = dis(gen);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
|
@ -0,0 +1,27 @@
|
|||
//
|
||||
// Distributions.hpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2019/11/28.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#ifndef Distributions_hpp
|
||||
#define Distributions_hpp
|
||||
|
||||
#include <MNN/MNNDefine.h>
|
||||
#include <random>
|
||||
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
|
||||
class Distributions {
|
||||
public:
|
||||
static void uniform(const int count, const float min, const float max, float* r, std::mt19937 gen);
|
||||
static void gaussian(const int count, const float mu, const float sigma, float* r, std::mt19937 gen);
|
||||
};
|
||||
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
||||
|
||||
#endif // Distritutions_hpp
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,45 @@
|
|||
//
|
||||
// ExecutorScope.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2020/10/26.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include <thread>
|
||||
#include <MNN/expr/Executor.hpp>
|
||||
#include <MNN/expr/Scope.hpp>
|
||||
#include <MNN/expr/ExecutorScope.hpp>
|
||||
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
|
||||
typedef std::shared_ptr<Express::Executor> ExecutorRef;
|
||||
#if !defined(__APPLE__)
|
||||
thread_local static Scope<ExecutorRef> g_executor_scope;
|
||||
#else
|
||||
static Scope<ExecutorRef> g_executor_scope;
|
||||
#endif
|
||||
|
||||
ExecutorScope::ExecutorScope(const std::shared_ptr<Executor>& current) {
|
||||
g_executor_scope.EnterScope(current);
|
||||
}
|
||||
|
||||
ExecutorScope::ExecutorScope(const std::string& scope_name,
|
||||
const std::shared_ptr<Executor>& current) {
|
||||
g_executor_scope.EnterScope(scope_name, current);
|
||||
}
|
||||
|
||||
ExecutorScope::~ExecutorScope() {
|
||||
g_executor_scope.ExitScope();
|
||||
}
|
||||
|
||||
const std::shared_ptr<Executor> ExecutorScope::Current() {
|
||||
if (g_executor_scope.ScopedLevel() > 0) {
|
||||
return g_executor_scope.Current().content;
|
||||
}
|
||||
return Executor::getGlobalExecutor();
|
||||
}
|
||||
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
261
express/Expr.cpp
261
express/Expr.cpp
|
@ -8,23 +8,33 @@
|
|||
|
||||
#define FLATBUFFERS_PREFER_PRINTF
|
||||
#include <MNN/expr/Expr.hpp>
|
||||
#include <MNN/expr/Executor.hpp>
|
||||
#include <MNN/expr/ExprCreator.hpp>
|
||||
#include <map>
|
||||
#include "core/MNNMemoryUtils.h"
|
||||
#include "Utils.hpp"
|
||||
#include <map>
|
||||
#include "core/FileLoader.hpp"
|
||||
#include <MNN/expr/Executor.hpp>
|
||||
#include "core/TensorUtils.hpp"
|
||||
#include "MNN_generated.h"
|
||||
//#define MNN_OPEN_TIME_TRACE
|
||||
#include "MNN/AutoTime.hpp"
|
||||
#include "MNN/expr/ExecutorScope.hpp"
|
||||
|
||||
//#define MNN_EXPRESS_ERROR_REPORT
|
||||
static inline std::string numberToString(int index) {
|
||||
char s[10];
|
||||
snprintf(s, 10, "%d", index);
|
||||
return std::string(s);
|
||||
}
|
||||
|
||||
static bool HasUnknownDim(const std::vector<int>& dims) {
|
||||
for (const int& dim : dims) {
|
||||
if (dim < 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
void Variable::Info::syncSize() {
|
||||
|
@ -87,8 +97,7 @@ bool VARP::fix(VARP::InputType type) const {
|
|||
}
|
||||
|
||||
Expr::Expr(int outputSize) {
|
||||
mInside.reset(new Inside);
|
||||
mInside->mOutputInfos.resize(outputSize);
|
||||
mInside.reset(new Inside(outputSize));
|
||||
mOutputNames.resize(outputSize);
|
||||
}
|
||||
|
||||
|
@ -117,27 +126,46 @@ void Expr::_addLinkForInputs(EXPRP expr) {
|
|||
}
|
||||
}
|
||||
}
|
||||
EXPRP Expr::create(Variable::Info&& info) {
|
||||
EXPRP Expr::create(Variable::Info&& info, const void* ptr, VARP::InputType type, bool copy) {
|
||||
EXPRP expr(new Expr(1));
|
||||
expr->mOp = nullptr;
|
||||
auto originPtr = info.ptr;
|
||||
auto originPtr = ptr;
|
||||
expr->mInside->mOutputInfos[0] = std::move(info);
|
||||
auto& dstInfo = expr->mInside->mOutputInfos[0];
|
||||
dstInfo.syncSize();
|
||||
if (dstInfo.size > 0) {
|
||||
expr->mExtraBuffer.reset(new char[dstInfo.size * dstInfo.type.bytes()], std::default_delete<char[]>());
|
||||
expr->mInside->mOutputInfos[0].ptr = expr->mExtraBuffer.get();
|
||||
expr->mInside->mInfoDirty = false;
|
||||
dstInfo.syncSize();
|
||||
Utils::copyInfoToTensor(expr->mInside->mOutputTensors[0], expr->mInside->mOutputInfos.data());
|
||||
expr->mType = type;
|
||||
if (type == VARP::CONSTANT) {
|
||||
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::CONSTANT;
|
||||
} else if (type == VARP::INPUT) {
|
||||
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::INPUT;
|
||||
} else {
|
||||
expr->mInside->mOutputInfos[0].ptr = nullptr;
|
||||
expr->mInside->mInfoDirty = true;
|
||||
// VARP::TRAINABLE
|
||||
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::TRAINABLE;
|
||||
}
|
||||
if (dstInfo.size > 0 && copy) {
|
||||
auto res = Utils::allocMemoryForHostTensor(expr->mInside->mOutputTensors[0]);
|
||||
if (!res) {
|
||||
MNN_ASSERT(false);
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
expr->mInside->mOutputTensors[0]->buffer().host = nullptr;
|
||||
}
|
||||
if (nullptr == originPtr) {
|
||||
expr->mType = VARP::INPUT;
|
||||
if (type == VARP::INPUT && dstInfo.size > 0) {
|
||||
expr->mInside->mContentDirty = true;
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
expr->mType = VARP::CONSTANT;
|
||||
::memcpy(expr->mInside->mOutputInfos[0].ptr, originPtr, dstInfo.size * dstInfo.type.bytes());
|
||||
expr->mInside->mContentDirty = false;
|
||||
if (copy) {
|
||||
::memcpy(expr->mInside->mOutputTensors[0]->buffer().host, originPtr, dstInfo.size * dstInfo.type.bytes());
|
||||
} else {
|
||||
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->memoryType = Tensor::InsideDescribe::MEMORY_OUTSIDE;
|
||||
expr->mInside->mOutputTensors[0]->buffer().host = (uint8_t*)originPtr;
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
EXPRP Expr::create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize) {
|
||||
|
@ -147,8 +175,7 @@ EXPRP Expr::create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP
|
|||
expr->mOp = flatbuffers::GetMutableRoot<Op>(extra.first.get());
|
||||
expr->mOpBufferSize = extra.second;
|
||||
expr->mInputs = std::move(inputs);
|
||||
expr->mInside->mInputInfos.resize(expr->mInputs.size());
|
||||
expr->mInside->mReq = Executor::getGlobalExecutor()->getRequirement(expr.get());
|
||||
expr->mInside->mReq = ExecutorScope::Current()->getRequirement(expr.get());
|
||||
_addLinkForInputs(expr);
|
||||
return expr;
|
||||
}
|
||||
|
@ -161,34 +188,34 @@ EXPRP Expr::create(const OpT* op, std::vector<VARP> inputs, int outputSize) {
|
|||
info.dim[0] = 1;
|
||||
}
|
||||
info.order = Utils::revertFormat(op->main.AsInput()->dformat);
|
||||
info.ptr = nullptr;
|
||||
info.type = Utils::revertDataType(op->main.AsInput()->dtype);
|
||||
return create(std::move(info));
|
||||
return create(std::move(info), nullptr, VARP::INPUT);
|
||||
}
|
||||
if (OpType_Const == op->type || OpType_TrainableParam == op->type) {
|
||||
Variable::Info info;
|
||||
info.dim = op->main.AsBlob()->dims;
|
||||
info.order = Utils::revertFormat(op->main.AsBlob()->dataFormat);
|
||||
info.ptr = nullptr;
|
||||
void* ptr = nullptr;
|
||||
info.type = Utils::revertDataType(op->main.AsBlob()->dataType);
|
||||
switch (op->main.AsBlob()->dataType) {
|
||||
case DataType_DT_INT8:
|
||||
info.ptr = (void*)op->main.AsBlob()->int8s.data();
|
||||
ptr = (void*)op->main.AsBlob()->int8s.data();
|
||||
break;
|
||||
case DataType_DT_INT32:
|
||||
info.ptr = (void*)op->main.AsBlob()->int32s.data();
|
||||
ptr = (void*)op->main.AsBlob()->int32s.data();
|
||||
break;
|
||||
case DataType_DT_UINT8:
|
||||
info.ptr = (void*)op->main.AsBlob()->uint8s.data();
|
||||
ptr = (void*)op->main.AsBlob()->uint8s.data();
|
||||
break;
|
||||
case DataType_DT_FLOAT:
|
||||
info.ptr = (void*)op->main.AsBlob()->float32s.data();
|
||||
ptr = (void*)op->main.AsBlob()->float32s.data();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
auto expr = create(std::move(info));
|
||||
if (OpType_TrainableParam == op->type) {
|
||||
//MNN_ASSERT(nullptr != ptr);
|
||||
auto expr = create(std::move(info), ptr, VARP::CONSTANT);
|
||||
if (OpType_TrainableParam == op->type && nullptr != ptr) {
|
||||
expr->mType = VARP::TRAINABLE;
|
||||
}
|
||||
return expr;
|
||||
|
@ -213,7 +240,7 @@ bool Expr::requireInfo() {
|
|||
return false;
|
||||
}
|
||||
if (nullptr == mOp) {
|
||||
return mInside->mOutputInfos[0].size > 0;
|
||||
return !HasUnknownDim(mInside->mOutputInfos[0].dim);
|
||||
}
|
||||
bool ready = true;
|
||||
for (int i = 0; i < mInputs.size(); ++i) {
|
||||
|
@ -221,8 +248,8 @@ bool Expr::requireInfo() {
|
|||
// The Variable is set nullptr by api
|
||||
return false;
|
||||
}
|
||||
mInside->mInputInfos[i] = mInputs[i]->getInfo();
|
||||
if (nullptr == mInside->mInputInfos[i] && (!mInside->mReq.supportError[i])) {
|
||||
auto inputInfo = mInputs[i]->getInfo();
|
||||
if (nullptr == inputInfo) {
|
||||
#ifdef MNN_EXPRESS_ERROR_REPORT
|
||||
MNN_ERROR("%s, %d input not ready\n", mName.c_str(), i);
|
||||
#endif
|
||||
|
@ -233,15 +260,19 @@ bool Expr::requireInfo() {
|
|||
for (int i = 0; i < mInputs.size(); ++i) {
|
||||
auto& v = mInputs[i];
|
||||
if (mInside->mReq.shapeNeedContent[i]) {
|
||||
// `readInternal` maybe return nullptr if element count is 0.
|
||||
v->readInternal(true);
|
||||
// For shape need content, the content must not be nullptr
|
||||
auto ptr = v->readInternal(true);
|
||||
if (nullptr == ptr) {
|
||||
ready = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!ready) {
|
||||
return false;
|
||||
}
|
||||
//MNN_PRINT("Info %s, %p Start\n", mName.c_str(), this);
|
||||
auto res = Executor::getGlobalExecutor()->computeInfo(this);
|
||||
auto res = ExecutorScope::Current()->computeInfo(this);
|
||||
//MNN_PRINT("Info Compute %s\n", mName.c_str());
|
||||
|
||||
if (NO_ERROR == res) {
|
||||
|
@ -261,6 +292,14 @@ const std::vector<WeakEXPRP>& Variable::toExprs() const {
|
|||
|
||||
VARP Variable::create(EXPRP expr, int index) {
|
||||
VARP res(new Variable(expr, index));
|
||||
#ifdef MNN_EXPR_SHAPE_EAGER
|
||||
auto info = expr->requireInfo();
|
||||
if (!info) {
|
||||
#ifdef MNN_EXPRESS_ERROR_REPORT
|
||||
MNN_ERROR("Can't compute shape\n");
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
void Expr::replace(EXPRP old, EXPRP from) {
|
||||
|
@ -307,16 +346,22 @@ void Expr::replace(EXPRP old, EXPRP from) {
|
|||
old->mValid = from->mValid;
|
||||
old->mInside = from->mInside;
|
||||
old->mInputs = from->mInputs;
|
||||
std::vector<Expr*> visited;
|
||||
old->visitOutputs([&](EXPRP expr, int index) {
|
||||
if (expr->mInside->mInfoDirty && expr->mValid && !expr->mInside->mLinkCache) {
|
||||
if (expr->visited()) {
|
||||
return false;
|
||||
}
|
||||
visited.emplace_back(expr.get());
|
||||
expr->setVisited(true);
|
||||
expr->mInside->mCache.reset();
|
||||
expr->mInside->mCacheOffset = 0;
|
||||
expr->mValid = true;
|
||||
expr->mInside->mInfoDirty = true;
|
||||
return true;
|
||||
});
|
||||
for (auto e : visited) {
|
||||
e->setVisited(false);
|
||||
}
|
||||
}
|
||||
|
||||
void Variable::setName(const std::string& name) {
|
||||
|
@ -351,7 +396,7 @@ bool Variable::input(VARP src) {
|
|||
info = tempInfo.get();
|
||||
}
|
||||
auto dstInfo = getInfo();
|
||||
bool needChange = nullptr == dstInfo || info->order != dstInfo->order || info->dim.size() != dstInfo->dim.size();
|
||||
bool needChange = nullptr == dstInfo || info->order != dstInfo->order || info->dim.size() != dstInfo->dim.size() || info->type != dstInfo->type;
|
||||
if (!needChange) {
|
||||
for (int i=0; i<info->dim.size(); ++i) {
|
||||
if (dstInfo->dim[i] != info->dim[i]) {
|
||||
|
@ -362,22 +407,19 @@ bool Variable::input(VARP src) {
|
|||
}
|
||||
|
||||
if (!mFrom->mInside->mCache) {
|
||||
Executor::getGlobalExecutor()->makeCache({mFrom}, false);
|
||||
ExecutorScope::Current()->makeCache({mFrom}, false);
|
||||
}
|
||||
if (needChange) {
|
||||
bool needAlloc = info->size * info->type.bytes() > mFrom->mInside->mOutputInfos[0].size * mFrom->mInside->mOutputInfos[0].type.bytes();
|
||||
mFrom->mInside->mOutputInfos[0] = *info;
|
||||
if (needAlloc) {
|
||||
mFrom->mExtraBuffer.reset(new char[info->size * info->type.bytes()], std::default_delete<char[]>());
|
||||
}
|
||||
mFrom->mInside->mOutputInfos[0].ptr = mFrom->mExtraBuffer.get();
|
||||
mFrom->mInside->mCache->setShapeDirty(0, mFrom->outputInfo(0));
|
||||
Utils::releaseMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
|
||||
Utils::copyInfoToTensor(mFrom->inside()->mOutputTensors[0], mFrom->inside()->mOutputInfos.data());
|
||||
Utils::allocMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
|
||||
}
|
||||
if (info->size) {
|
||||
auto dstPtr = writeInternal(false);
|
||||
auto srcPtr = src->readMap<void>();
|
||||
if (nullptr == dstPtr || nullptr == srcPtr) {
|
||||
MNN_ERROR("Alloc memory error or compute src error in Variable::Input\n");
|
||||
//MNN_ERROR("Alloc memory error or compute src error in Variable::Input\n");
|
||||
return false;
|
||||
}
|
||||
::memcpy(dstPtr, srcPtr, info->size * info->type.bytes());
|
||||
|
@ -387,7 +429,7 @@ bool Variable::input(VARP src) {
|
|||
} else {
|
||||
informDirty();
|
||||
}
|
||||
mFrom->mInside->mCache->setContentReady();
|
||||
mFrom->mInside->mContentDirty = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -396,23 +438,44 @@ void Variable::replace(VARP dst, VARP src) {
|
|||
dst->setExpr(nullptr, 0);
|
||||
return;
|
||||
}
|
||||
if (nullptr == dst) {
|
||||
dst.mContent = src.mContent;
|
||||
return;
|
||||
}
|
||||
if (src->mFrom.get() == dst->mFrom.get()) {
|
||||
dst->mFromIndex = src->mFromIndex;
|
||||
return;
|
||||
}
|
||||
if (src->mFrom->outputSize() != dst->mFrom->outputSize()) {
|
||||
// Can't replace Expr, Just replace VARP
|
||||
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
|
||||
src->mFrom->mTo.emplace_back(expr);
|
||||
std::vector<Expr*> visited;
|
||||
dst->mFrom->visitOutputs([src, dst, &visited](EXPRP expr, int index) {
|
||||
if (expr->visited()) {
|
||||
return false;
|
||||
});
|
||||
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
|
||||
}
|
||||
expr->setVisited(true);
|
||||
visited.emplace_back(expr.get());
|
||||
expr->mInside->mCache.reset();
|
||||
expr->mInside->mCacheOffset = 0;
|
||||
expr->mValid = true;
|
||||
expr->mInside->mInfoDirty = true;
|
||||
expr->mInside->mContentDirty = true;
|
||||
return true;
|
||||
});
|
||||
for (auto v : visited) {
|
||||
v->setVisited(false);
|
||||
}
|
||||
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
|
||||
for (int i =0; i< expr->inputs().size(); ++i) {
|
||||
auto input = expr->inputs()[i];
|
||||
if (input == dst) {
|
||||
expr->mInputs[i] = src;
|
||||
}
|
||||
}
|
||||
src->mFrom->mTo.emplace_back(expr);
|
||||
return false;
|
||||
});
|
||||
|
||||
dst->mFrom = src->mFrom;
|
||||
dst->mFromIndex = src->mFromIndex;
|
||||
return;
|
||||
|
@ -452,15 +515,19 @@ bool Variable::resize(INTS dims) {
|
|||
}
|
||||
info.dim = dims;
|
||||
info.syncSize();
|
||||
mFrom->mExtraBuffer.reset(new char[info.size * info.type.bytes()], std::default_delete<char[]>());
|
||||
info.ptr = mFrom->mExtraBuffer.get();
|
||||
Utils::copyInfoToTensor(mFrom->inside()->mOutputTensors[0], mFrom->inside()->mOutputInfos.data());
|
||||
Utils::releaseMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
|
||||
if (0 >= info.size) {
|
||||
return false;
|
||||
}
|
||||
bool res = Utils::allocMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
|
||||
if (!res) {
|
||||
return false;
|
||||
}
|
||||
|
||||
mFrom->mValid = true;
|
||||
mFrom->mInside->mInputInfos.clear();
|
||||
auto cache = mFrom->mInside->mCache;
|
||||
if (nullptr != cache) {
|
||||
cache->setShapeDirty(0, mFrom->outputInfo(0));
|
||||
}
|
||||
mFrom->inside()->mInfoDirty = false;
|
||||
mFrom->inside()->mContentDirty = true;
|
||||
mFrom->visitOutputs([](EXPRP expr, int index) { return expr->setInfoDirty(); });
|
||||
return true;
|
||||
}
|
||||
|
@ -478,11 +545,12 @@ void Expr::visit(EXPRP expr, const std::function<bool(EXPRP)>& before, const std
|
|||
void* Variable::readInternal(bool forShape) {
|
||||
if (nullptr == mFrom->get()) {
|
||||
if (VARP::INPUT == mFrom->mType) {
|
||||
if (nullptr == mFrom->mInside->mCache) {
|
||||
if (mFrom->mInside->mContentDirty) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return mFrom->outputInfo(mFromIndex)->ptr;
|
||||
//MNN_ASSERT(nullptr != mFrom->inside()->mOutputTensors[0]->buffer().host);
|
||||
return mFrom->inside()->mOutputTensors[0]->buffer().host;
|
||||
}
|
||||
auto res = mFrom->requireInfo();
|
||||
if (false == res) {
|
||||
|
@ -490,21 +558,26 @@ void* Variable::readInternal(bool forShape) {
|
|||
}
|
||||
auto cache = mFrom->inside()->mCache;
|
||||
if (nullptr == cache) {
|
||||
Executor::getGlobalExecutor()->makeCache({mFrom}, forShape);
|
||||
ExecutorScope::Current()->makeCache({mFrom}, forShape);
|
||||
cache = mFrom->inside()->mCache;
|
||||
}
|
||||
if (nullptr == cache) {
|
||||
return nullptr;
|
||||
}
|
||||
if (NO_ERROR != Executor::getGlobalExecutor()->runCache(cache)) {
|
||||
if (NO_ERROR != ExecutorScope::Current()->runCache(cache)) {
|
||||
return nullptr;
|
||||
}
|
||||
cache->syncOutput(mFrom->mInside->mCacheOffset + mFromIndex, mFrom->outputInfo(mFromIndex));
|
||||
return mFrom->outputInfo(mFromIndex)->ptr;
|
||||
return Executor::mapOutput(cache.get(), mFrom->mInside->mCacheOffset + mFromIndex, mFrom->mInside->mOutputTensors[mFromIndex]);
|
||||
}
|
||||
|
||||
void Variable::informDirty() {
|
||||
mFrom->visitOutputs([](EXPRP expr, int index) {
|
||||
std::vector<Expr*> visited;
|
||||
mFrom->visitOutputs([&visited](EXPRP expr, int index) {
|
||||
if (expr->visited()) {
|
||||
return false;
|
||||
}
|
||||
visited.emplace_back(expr.get());
|
||||
expr->setVisited(true);
|
||||
if (expr->inside()->mReq.shapeNeedContent.empty()) {
|
||||
// Not init
|
||||
return false;
|
||||
|
@ -514,28 +587,32 @@ void Variable::informDirty() {
|
|||
expr->visitOutputs([](EXPRP e, int index) { return e->setInfoDirty(); });
|
||||
return false;
|
||||
}
|
||||
if (expr->inside()->mContentDirty) {
|
||||
return false;
|
||||
}
|
||||
expr->inside()->mContentDirty = true;
|
||||
if (expr->inside()->mReq.contentNeedContent[index]) {
|
||||
if (expr->inside()->mCache != nullptr) {
|
||||
expr->inside()->mCache->setContentDirty();
|
||||
Executor::setContentDirty(expr->inside()->mCache.get());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
for (auto e : visited) {
|
||||
e->setVisited(false);
|
||||
}
|
||||
}
|
||||
void Variable::prepareCompute(const std::vector<VARP>& vars, bool forceCpu) {
|
||||
std::vector<EXPRP> exprs;
|
||||
for (auto v : vars) {
|
||||
if (v->expr().first->inside()->mCache == nullptr) {
|
||||
if (!v->expr().first->visited()) {
|
||||
v->expr().first->inside()->mCache = nullptr;
|
||||
v->expr().first->requireInfo();
|
||||
v->expr().first->setVisited(true);
|
||||
exprs.emplace_back(v->expr().first);
|
||||
}
|
||||
}
|
||||
Executor::getGlobalExecutor()->makeCache(std::move(exprs), forceCpu);
|
||||
for (auto v : vars) {
|
||||
v->expr().first->setVisited(false);
|
||||
}
|
||||
ExecutorScope::Current()->makeCache(std::move(exprs), forceCpu);
|
||||
}
|
||||
|
||||
void* Variable::writeInternal(bool inform) {
|
||||
|
@ -545,16 +622,8 @@ void* Variable::writeInternal(bool inform) {
|
|||
if (inform) {
|
||||
informDirty();
|
||||
}
|
||||
auto cache = mFrom->mInside->mCache;
|
||||
if (nullptr == cache) {
|
||||
Executor::getGlobalExecutor()->makeCache({mFrom});
|
||||
cache = mFrom->mInside->mCache;
|
||||
}
|
||||
if (nullptr == cache) {
|
||||
return nullptr;
|
||||
}
|
||||
mFrom->mInside->mCache->setContentReady();
|
||||
return mFrom->mInside->mOutputInfos[0].ptr;
|
||||
mFrom->mInside->mContentDirty = false;
|
||||
return mFrom->inside()->mOutputTensors[0]->host<void>();
|
||||
}
|
||||
|
||||
void Variable::unMap() {
|
||||
|
@ -591,12 +660,17 @@ bool Expr::setInfoDirty() {
|
|||
mInside->mContentDirty = true;
|
||||
mValid = true;
|
||||
if (mInside->mCache != nullptr) {
|
||||
mInside->mCache->setShapeDirty(0, nullptr);
|
||||
Executor::setShapeDirty(mInside->mCache.get());
|
||||
}
|
||||
for (auto o : mInside->mOutputTensors) {
|
||||
Utils::releaseMemoryForHostTensor(o);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<VARP> Variable::load(const char* fileName) {
|
||||
AutoStorage<uint8_t> buffer;
|
||||
{
|
||||
FileLoader loader(fileName);
|
||||
if (!loader.valid()) {
|
||||
MNN_ERROR("Error for open %s\n", fileName);
|
||||
|
@ -606,11 +680,11 @@ std::vector<VARP> Variable::load(const char* fileName) {
|
|||
if (!loader.valid()) {
|
||||
return {};
|
||||
}
|
||||
AutoStorage<uint8_t> buffer;
|
||||
loader.merge(buffer);
|
||||
if (buffer.get() == nullptr) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return load(buffer.get(), buffer.size());
|
||||
}
|
||||
std::vector<VARP> Variable::load(const uint8_t* buffer, size_t length) {
|
||||
|
@ -722,6 +796,7 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
|
|||
} else {
|
||||
MNN_ASSERT(1 == expr->outputSize());
|
||||
auto& info = expr->mInside->mOutputInfos[0];
|
||||
auto ptr = expr->mInside->mOutputTensors[0]->host<void>();
|
||||
op.reset(new OpT);
|
||||
if (expr->mType != VARP::INPUT) {
|
||||
auto blob = new BlobT;
|
||||
|
@ -730,16 +805,20 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
|
|||
if (info.type.code == halide_type_float) {
|
||||
blob->dataType = DataType_DT_FLOAT;
|
||||
blob->float32s.resize(info.size);
|
||||
::memcpy(blob->float32s.data(), info.ptr, info.size * sizeof(float));
|
||||
} else if (info.type.code == halide_type_int) {
|
||||
::memcpy(blob->float32s.data(), ptr, info.size * sizeof(float));
|
||||
} else if (info.type.code == halide_type_int && info.type.bits == 32) {
|
||||
blob->dataType = DataType_DT_INT32;
|
||||
blob->int32s.resize(info.size);
|
||||
::memcpy(blob->int32s.data(), info.ptr, info.size * sizeof(int));
|
||||
}
|
||||
else if (info.type.code == halide_type_uint && info.type.bits == 8) {
|
||||
::memcpy(blob->int32s.data(), ptr, info.size * sizeof(int));
|
||||
} else if (info.type.code == halide_type_int && info.type.bits == 8) {
|
||||
blob->dataType = DataType_DT_INT8;
|
||||
blob->int8s.resize(info.size);
|
||||
auto pptr = (int8_t *)ptr;
|
||||
::memcpy(blob->int8s.data(), ptr, info.size * sizeof(int8_t));
|
||||
} else if (info.type.code == halide_type_uint && info.type.bits == 8) {
|
||||
blob->dataType = DataType_DT_UINT8;
|
||||
blob->uint8s.resize(info.size);
|
||||
::memcpy(blob->uint8s.data(), info.ptr, info.size * sizeof(uint8_t));
|
||||
::memcpy(blob->uint8s.data(), ptr, info.size * sizeof(uint8_t));
|
||||
}
|
||||
op->type = OpType_Const;
|
||||
if (expr->mType == VARP::TRAINABLE) {
|
||||
|
@ -781,12 +860,12 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
|
|||
auto op = dest->oplists[index].get();
|
||||
auto tensorIndexOffset = varIndexInfo[expr];
|
||||
for (int v=0; v<expr->outputSize(); ++v) {
|
||||
auto const tensorIndex = tensorIndexOffset + v;
|
||||
if (dest->tensorName[tensorIndex].empty()) {
|
||||
auto subindex = tensorIndexOffset + v;
|
||||
if (dest->tensorName[subindex].empty()) {
|
||||
if (v == 0) {
|
||||
dest->tensorName[tensorIndex] = op->name;
|
||||
dest->tensorName[subindex] = op->name;
|
||||
} else {
|
||||
dest->tensorName[tensorIndex] = op->name + numberToString(v);
|
||||
dest->tensorName[subindex] = op->name + numberToString(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,210 @@
|
|||
//
|
||||
// Initializer.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2019/11/28.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include "Initializer.hpp"
|
||||
#include <MNN/expr/ExprCreator.hpp>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
#include "Distributions.hpp"
|
||||
#include "RandomGenerator.hpp"
|
||||
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
|
||||
Express::VARP Initializer::createConstVar(Express::INTS dim, Express::Dimensionformat format) {
|
||||
auto res = Express::_Input(dim, format, halide_type_of<float>());
|
||||
this->onExecute(res);
|
||||
res.fix(Express::VARP::CONSTANT);
|
||||
return res;
|
||||
}
|
||||
|
||||
class ConstantInitializer : public Initializer {
|
||||
public:
|
||||
ConstantInitializer(float value) : mConstant(value) {
|
||||
}
|
||||
|
||||
virtual void onExecute(Express::VARP p) override {
|
||||
const int count = p->getInfo()->size;
|
||||
MNN_ASSERT(count > 0);
|
||||
auto ptr = p->writeMap<float>();
|
||||
for (int i = 0; i < count; i++) {
|
||||
ptr[i] = mConstant;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
float mConstant;
|
||||
};
|
||||
Initializer* Initializer::constValue(float value) {
|
||||
return new ConstantInitializer(value);
|
||||
}
|
||||
|
||||
class UniformInitializer : public Initializer {
|
||||
public:
|
||||
UniformInitializer(float min = 0, float max = 1) {
|
||||
mMin = min;
|
||||
mMax = max;
|
||||
}
|
||||
|
||||
virtual void onExecute(Express::VARP p) override {
|
||||
const int count = p->getInfo()->size;
|
||||
MNN_ASSERT(count > 0);
|
||||
Distributions::uniform(count, mMin, mMax, p->writeMap<float>(), RandomGenerator::generator());
|
||||
}
|
||||
|
||||
private:
|
||||
float mMin;
|
||||
float mMax;
|
||||
};
|
||||
|
||||
Initializer* Initializer::uniform(float minValue, float maxValue) {
|
||||
return new UniformInitializer(minValue, maxValue);
|
||||
}
|
||||
|
||||
class XavierInitializer : public Initializer {
|
||||
public:
|
||||
XavierInitializer(VarianceNorm norm = FANIN) {
|
||||
mNorm = norm;
|
||||
}
|
||||
|
||||
virtual void onExecute(Express::VARP p) override {
|
||||
const int count = p->getInfo()->size;
|
||||
MNN_ASSERT(count > 0);
|
||||
const std::vector<int> dims = p->getInfo()->dim;
|
||||
// referenced from Caffe
|
||||
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
|
||||
int fanIn = count / dims[0];
|
||||
int fanOut = dims.size() > 1 ? count / dims[1] : count;
|
||||
float n = fanIn; // default: FANIN
|
||||
if (mNorm == VarianceNorm::AVERAGE) {
|
||||
n = (fanIn + fanOut) / 2.0f;
|
||||
} else if (mNorm == VarianceNorm::FANOUT) {
|
||||
n = fanOut;
|
||||
}
|
||||
float scale = sqrtf(3.0f / n);
|
||||
|
||||
Distributions::uniform(count, -scale, scale, p->writeMap<float>(), RandomGenerator::generator());
|
||||
}
|
||||
|
||||
private:
|
||||
VarianceNorm mNorm;
|
||||
};
|
||||
Initializer* Initializer::xavier(VarianceNorm norm) {
|
||||
return new XavierInitializer(norm);
|
||||
}
|
||||
|
||||
class GaussianInitializer : public Initializer {
|
||||
public:
|
||||
GaussianInitializer(float mean = 0, float std = 1) {
|
||||
mMean = mean;
|
||||
mStd = std;
|
||||
}
|
||||
|
||||
virtual void onExecute(Express::VARP p) override {
|
||||
const int count = p->getInfo()->size;
|
||||
MNN_ASSERT(count > 0);
|
||||
Distributions::gaussian(count, mMean, mStd, p->writeMap<float>(), RandomGenerator::generator());
|
||||
}
|
||||
|
||||
private:
|
||||
float mMean;
|
||||
float mStd;
|
||||
};
|
||||
Initializer* Initializer::gauss(float mean, float std) {
|
||||
return new GaussianInitializer(mean, std);
|
||||
}
|
||||
|
||||
class MSRAInitializer : public Initializer {
|
||||
public:
|
||||
MSRAInitializer(VarianceNorm norm = FANIN) {
|
||||
mNorm = norm;
|
||||
}
|
||||
|
||||
virtual void onExecute(Express::VARP p) override {
|
||||
const int count = p->getInfo()->size;
|
||||
MNN_ASSERT(count > 0);
|
||||
const std::vector<int> dims = p->getInfo()->dim;
|
||||
// referenced from Caffe
|
||||
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
|
||||
int fanIn = count / dims[0];
|
||||
int fanOut = dims.size() > 1 ? count / dims[1] : count;
|
||||
float n = fanIn; // default: FANIN
|
||||
if (mNorm == VarianceNorm::AVERAGE) {
|
||||
n = (fanIn + fanOut) / 2.0f;
|
||||
} else if (mNorm == VarianceNorm::FANOUT) {
|
||||
n = fanOut;
|
||||
}
|
||||
float std = sqrtf(2.0f / n);
|
||||
|
||||
Distributions::gaussian(count, 0.0f, std, p->writeMap<float>(), RandomGenerator::generator());
|
||||
}
|
||||
|
||||
private:
|
||||
VarianceNorm mNorm;
|
||||
};
|
||||
Initializer* Initializer::MSRA(VarianceNorm norm) {
|
||||
return new MSRAInitializer(norm);
|
||||
}
|
||||
|
||||
class BilinearInitializer : public Initializer {
|
||||
public:
|
||||
BilinearInitializer() = default;
|
||||
|
||||
virtual void onExecute(Express::VARP p) override {
|
||||
const int count = p->getInfo()->size;
|
||||
MNN_ASSERT(count > 0);
|
||||
const std::vector<int> dims = p->getInfo()->dim;
|
||||
MNN_ASSERT(dims.size() == 4);
|
||||
MNN_ASSERT(dims[2] == dims[3]); // NCHW, H == W
|
||||
// referenced from Caffe
|
||||
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
|
||||
int f = ceilf(dims[3] / 2.0f);
|
||||
float c = (dims[3] - 1) / (2.0f * f);
|
||||
auto ptr = p->writeMap<float>();
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
float x = i % dims[3];
|
||||
float y = (i / dims[3]) % dims[2];
|
||||
ptr[i] = (1 - std::fabs(x / f - c)) * (1 - std::fabs(y / f - c));
|
||||
}
|
||||
}
|
||||
};
|
||||
Initializer* Initializer::bilinear() {
|
||||
return new BilinearInitializer();
|
||||
}
|
||||
|
||||
class PositiveUnitball : public Initializer {
|
||||
public:
|
||||
PositiveUnitball() = default;
|
||||
|
||||
virtual void onExecute(Express::VARP p) override {
|
||||
const int count = p->getInfo()->size;
|
||||
MNN_ASSERT(count > 0);
|
||||
const std::vector<int> dims = p->getInfo()->dim;
|
||||
auto ptr = p->writeMap<float>();
|
||||
|
||||
Distributions::uniform(count, 0, 1, ptr, RandomGenerator::generator());
|
||||
|
||||
int dim = count / dims[0];
|
||||
for (int i = 0; i < dims[0]; i++) {
|
||||
float sum = 0;
|
||||
for (int j = 0; j < dim; j++) {
|
||||
sum += ptr[i * dim + j];
|
||||
}
|
||||
for (int j = 0; j < dim; j++) {
|
||||
ptr[i * dim + j] = ptr[i * dim + j] / sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
Initializer* Initializer::positiveUnitball() {
|
||||
return new PositiveUnitball();
|
||||
}
|
||||
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
|
@ -0,0 +1,43 @@
|
|||
//
|
||||
// Initializer.hpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2019/11/28.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#ifndef Initializer_hpp
|
||||
#define Initializer_hpp
|
||||
|
||||
#include <MNN/expr/Expr.hpp>
|
||||
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
class RandomGenerator;
|
||||
class MNN_PUBLIC Initializer {
|
||||
public:
|
||||
Initializer() = default;
|
||||
virtual ~Initializer() = default;
|
||||
Express::VARP createConstVar(Express::INTS dim, Express::Dimensionformat format = Express::NCHW);
|
||||
virtual void onExecute(Express::VARP p) = 0;
|
||||
|
||||
static Initializer* constValue(float value);
|
||||
static Initializer* uniform(float minValue = 0.0f, float maxValue = 1.0f);
|
||||
|
||||
enum VarianceNorm {
|
||||
FANIN,
|
||||
FANOUT,
|
||||
AVERAGE,
|
||||
};
|
||||
|
||||
static Initializer* xavier(VarianceNorm norm = FANIN);
|
||||
static Initializer* gauss(float mean = 0.0f, float std = 1.0f);
|
||||
static Initializer* MSRA(VarianceNorm norm = FANIN);
|
||||
static Initializer* bilinear();
|
||||
static Initializer* positiveUnitball();
|
||||
};
|
||||
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
||||
|
||||
#endif // Initializer_hpp
|
|
@ -30,7 +30,18 @@ static DataType _convertDataType(halide_type_t type) {
|
|||
}
|
||||
return DataType_DT_INVALID;
|
||||
}
|
||||
static VARP _checkNC4HW4(VARP x) {
|
||||
#ifdef MNN_EXPR_SHAPE_EAGER
|
||||
auto info = x->getInfo();
|
||||
if (nullptr != info && info->order == NC4HW4) {
|
||||
return _Convert(x, NCHW);
|
||||
}
|
||||
#endif
|
||||
return x;
|
||||
}
|
||||
static VARP _Binary(VARP x, VARP y, BinaryOpOperation operation) {
|
||||
x = _checkNC4HW4(x);
|
||||
y = _checkNC4HW4(y);
|
||||
std::unique_ptr<OpT> op(new OpT);
|
||||
op->main.type = OpParameter_BinaryOp;
|
||||
op->type = OpType_BinaryOp;
|
||||
|
@ -49,6 +60,7 @@ static VARP _Unary(VARP x, UnaryOpOperation operation) {
|
|||
return (Variable::create(Expr::create(op.get(), {x})));
|
||||
}
|
||||
static VARP _Reduce(VARP x, INTS dim, ReductionType type, bool keepDim) {
|
||||
x = _checkNC4HW4(x);
|
||||
std::unique_ptr<OpT> op(new OpT);
|
||||
op->main.type = OpParameter_ReductionParam;
|
||||
op->type = OpType_Reduction;
|
||||
|
@ -60,6 +72,7 @@ static VARP _Reduce(VARP x, INTS dim, ReductionType type, bool keepDim) {
|
|||
return (Variable::create(Expr::create(op.get(), {x})));
|
||||
}
|
||||
static VARP _ReduceMutable(VARP x, VARP dim, ReductionType type, bool keepDim) {
|
||||
x = _checkNC4HW4(x);
|
||||
std::unique_ptr<OpT> op(new OpT);
|
||||
op->main.type = OpParameter_ReductionParam;
|
||||
op->type = OpType_Reduction;
|
||||
|
@ -955,6 +968,7 @@ Returns:
|
|||
A variable of type int.
|
||||
*/
|
||||
VARP _ArgMax(VARP input, int axis) {
|
||||
input = _checkNC4HW4(input);
|
||||
std::unique_ptr<OpT> op(new OpT);
|
||||
op->main.type = OpParameter_ArgMax;
|
||||
op->type = OpType_ArgMax;
|
||||
|
@ -976,6 +990,7 @@ Returns:
|
|||
A variable of type int.
|
||||
*/
|
||||
VARP _ArgMin(VARP input, int axis) {
|
||||
input = _checkNC4HW4(input);
|
||||
std::unique_ptr<OpT> op(new OpT);
|
||||
op->main.type = OpParameter_ArgMax;
|
||||
op->type = OpType_ArgMin;
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
// Created by MNN on 2019/08/20.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#ifndef MergeOptimizer_hpp
|
||||
#define MergeOptimizer_hpp
|
||||
|
||||
|
|
|
@ -54,16 +54,14 @@ VARP _Input(INTS shape, Dimensionformat data_format, halide_type_t dtype) {
|
|||
info.dim = std::move(shape);
|
||||
info.order = data_format;
|
||||
info.type = dtype;
|
||||
info.ptr = nullptr;
|
||||
return (Variable::create(Expr::create(std::move(info))));
|
||||
return (Variable::create(Expr::create(std::move(info), nullptr, VARP::INPUT)));
|
||||
}
|
||||
VARP _Scalar(const void* ptr, halide_type_t type) {
|
||||
Variable::Info info;
|
||||
info.dim = {};
|
||||
info.order = NHWC;
|
||||
info.type = type;
|
||||
info.ptr = (void*)ptr;
|
||||
return (Variable::create(Expr::create(std::move(info))));
|
||||
return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
|
||||
}
|
||||
/*create a constant variable.
|
||||
Args:
|
||||
|
@ -79,8 +77,7 @@ VARP _Const(const void* ptr, INTS shape, Dimensionformat format, halide_type_t t
|
|||
info.dim = std::move(shape);
|
||||
info.order = format;
|
||||
info.type = type;
|
||||
info.ptr = (void*)ptr;
|
||||
return (Variable::create(Expr::create(std::move(info))));
|
||||
return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
|
||||
}
|
||||
|
||||
VARP _Const(float value, INTS shape, Dimensionformat format) {
|
||||
|
@ -93,8 +90,8 @@ VARP _Const(float value, INTS shape, Dimensionformat format) {
|
|||
for (int i = 0; i < info.size; ++i) {
|
||||
values[i] = value;
|
||||
}
|
||||
info.ptr = (void*)values.data();
|
||||
return (Variable::create(Expr::create(std::move(info))));
|
||||
auto ptr = (void*)values.data();
|
||||
return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
|
||||
}
|
||||
|
||||
VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format, halide_type_t type) {
|
||||
|
@ -107,6 +104,23 @@ VARP _TrainableParam(float value, INTS dims, Dimensionformat format) {
|
|||
v.fix(VARP::TRAINABLE);
|
||||
return v;
|
||||
}
|
||||
VARP _InnerProduct(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS outputShape) {
|
||||
std::unique_ptr<OpT> ipOp(new OpT);
|
||||
ipOp->type = OpType_InnerProduct;
|
||||
ipOp->main.type = OpParameter_InnerProduct;
|
||||
ipOp->main.value = new InnerProductT;
|
||||
auto ipParam = ipOp->main.AsInnerProduct();
|
||||
|
||||
ipParam->outputCount = outputShape[1];
|
||||
if(!bias.empty()) {
|
||||
ipParam->biasTerm = 1;
|
||||
}
|
||||
ipParam->weightSize = weight.size();
|
||||
|
||||
ipParam->weight = std::move(weight);
|
||||
ipParam->bias = std::move(bias);
|
||||
return (Variable::create(Expr::create(ipOp.get(), {x})));
|
||||
}
|
||||
|
||||
VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads) {
|
||||
std::unique_ptr<OpT> convOp(new OpT);
|
||||
|
@ -183,7 +197,7 @@ VARP _Conv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS
|
|||
return (Variable::create(Expr::create(convOp.get(), {x})));
|
||||
}
|
||||
VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
||||
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6) {
|
||||
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6, int nbits) {
|
||||
std::unique_ptr<OpT> convOp(new OpT);
|
||||
convOp->type = OpType_Convolution;
|
||||
if (channel[0] == channel[1] && channel[0] == group) {
|
||||
|
@ -285,6 +299,42 @@ VARP _Deconv(VARP weight, VARP bias, VARP x, PaddingMode pad, INTS stride, INTS
|
|||
return (Variable::create(Expr::create(std::move(convOp), {x, weight})));
|
||||
}
|
||||
|
||||
VARP _Deconv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
||||
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6) {
|
||||
std::unique_ptr<OpT> convOp(new OpT);
|
||||
convOp->type = OpType_Deconvolution;
|
||||
if (channel[0] == channel[1] && channel[0] == group) {
|
||||
convOp->type = OpType_DeconvolutionDepthwise;
|
||||
}
|
||||
convOp->main.type = OpParameter_Convolution2D;
|
||||
convOp->main.value = new Convolution2DT;
|
||||
auto conv2D = convOp->main.AsConvolution2D();
|
||||
conv2D->common.reset(new Convolution2DCommonT);
|
||||
conv2D->common->padMode = _convertPadMode(pad);
|
||||
if (pads.size() == 2) {
|
||||
conv2D->common->padX = pads[0];
|
||||
conv2D->common->padY = pads[1];
|
||||
} else {
|
||||
conv2D->common->pads = std::move(pads);
|
||||
}
|
||||
conv2D->common->strideX = stride[0];
|
||||
conv2D->common->strideY = stride[1];
|
||||
conv2D->common->group = group;
|
||||
conv2D->common->outputCount = channel[1];
|
||||
conv2D->common->inputCount = channel[0];
|
||||
conv2D->common->dilateX = dilate[0];
|
||||
conv2D->common->dilateY = dilate[1];
|
||||
conv2D->common->kernelX = kernelSize[0];
|
||||
conv2D->common->kernelY = kernelSize[1];
|
||||
conv2D->common->relu6 = relu6;
|
||||
conv2D->common->relu = relu;
|
||||
MNN_ASSERT(weight.size() == channel[1] * (channel[0] / group) * kernelSize[0] * kernelSize[1]);
|
||||
conv2D->weight = std::move(weight);
|
||||
MNN_ASSERT(bias.size() == channel[1]);
|
||||
conv2D->bias = std::move(bias);
|
||||
return (Variable::create(Expr::create(convOp.get(), {x})));
|
||||
}
|
||||
|
||||
static VARP _Pool(VARP x, INTS kernel, INTS stride, PoolType type, PaddingMode pad, INTS pads) {
|
||||
std::unique_ptr<OpT> pool(new OpT);
|
||||
pool->type = OpType_Pooling;
|
||||
|
@ -381,9 +431,13 @@ x: A variable.
|
|||
Returns:
|
||||
output: A variable with the same type as `x`.
|
||||
*/
|
||||
VARP _Relu6(VARP x) {
|
||||
VARP _Relu6(VARP x, float minValue, float maxValue) {
|
||||
std::unique_ptr<OpT> relu(new OpT);
|
||||
relu->type = OpType_ReLU6;
|
||||
relu->main.value = new Relu6T;
|
||||
relu->main.type = OpParameter_Relu6;
|
||||
relu->main.AsRelu6()->maxValue = maxValue;
|
||||
relu->main.AsRelu6()->minValue = minValue;
|
||||
return (Variable::create(Expr::create(relu.get(), {x})));
|
||||
}
|
||||
/*Given an input value x, it computes the output as x if x > 0 and slopes * x if x <= 0.
|
||||
|
@ -746,9 +800,12 @@ input: A variable.
|
|||
Returns:
|
||||
A variable of Halide_Type_Int.
|
||||
*/
|
||||
VARP _Shape(VARP input) {
|
||||
VARP _Shape(VARP input, bool nchw) {
|
||||
std::unique_ptr<OpT> shape(new OpT);
|
||||
shape->type = OpType_Shape;
|
||||
if (nchw) {
|
||||
shape->defaultDimentionFormat = MNN_DATA_FORMAT_NCHW;
|
||||
}
|
||||
return (Variable::create(Expr::create(std::move(shape), {input})));
|
||||
}
|
||||
/*Stacks a list of rank-R variables into one rank-(R+1) variable.
|
||||
|
@ -906,6 +963,21 @@ VARP _Elu(VARP features, float alpha) {
|
|||
op->main.value = eluParam;
|
||||
return (Variable::create(Expr::create(std::move(op), {features})));
|
||||
}
|
||||
/*Given an input value x, it computes the output as 1.0 if x > threshold and 0.0 if x <= threshold.
|
||||
features: A variable of type Halide_Type_Float
|
||||
threshold: threshold value
|
||||
Returns:
|
||||
A variable. Has the same type as features.
|
||||
*/
|
||||
VARP _Threshold(VARP features, float threshold) {
|
||||
std::unique_ptr<OpT> op(new OpT);
|
||||
op->type = OpType_Threshold;
|
||||
auto eluParam = new ELUT;
|
||||
op->main.type = OpParameter_ELU;
|
||||
eluParam->alpha = threshold;
|
||||
op->main.value = eluParam;
|
||||
return (Variable::create(Expr::create(std::move(op), {features})));
|
||||
}
|
||||
/*Computes the size of the variable
|
||||
Args:
|
||||
input: A variable of type Halide_Type_Float or Halide_Type_Int
|
||||
|
@ -1049,7 +1121,6 @@ std::vector<VARP> _Moments(VARP x, INTS axis, VARP shift, bool keepDims) {
|
|||
op->main.type = OpParameter_MomentsParam;
|
||||
momentsParam->dim = axis;
|
||||
momentsParam->keepDims = keepDims;
|
||||
momentsParam->dType = (MNN::DataType)Utils::convertDataType(x->getInfo()->type);
|
||||
op->main.value = momentsParam;
|
||||
EXPRP expr = Expr::create(std::move(op), {x}, 2);
|
||||
std::vector<VARP> res;
|
||||
|
@ -1405,7 +1476,7 @@ VARP _ZeroGrad(VARP x) {
|
|||
}
|
||||
|
||||
VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize,
|
||||
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu) {
|
||||
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, int nbits) {
|
||||
std::unique_ptr<OpT> convOp(new OpT);
|
||||
convOp->type = OpType_ConvInt8;
|
||||
if (channel[0] == channel[1] && channel[0] == group) {
|
||||
|
@ -1433,9 +1504,16 @@ VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<fl
|
|||
conv2D->symmetricQuan->bias = std::move(bias);
|
||||
conv2D->symmetricQuan->scale = std::move(scale);
|
||||
conv2D->symmetricQuan->weight = std::move(weight);
|
||||
conv2D->symmetricQuan->nbits = nbits;
|
||||
return (Variable::create(Expr::create(convOp.get(), {x})));
|
||||
}
|
||||
|
||||
VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim) {
|
||||
std::unique_ptr<MNN::OpT> cosineSimilarityOp(new MNN::OpT);
|
||||
cosineSimilarityOp->type = MNN::OpType_CosineSimilarity;
|
||||
return (Variable::create(Expr::create(std::move(cosineSimilarityOp), {input0, input1, inputDim})));
|
||||
}
|
||||
|
||||
VARP _FloatToInt8(VARP x, VARP scale, char minValue/*For future*/, char maxValue/*For future*/) {
|
||||
auto xInfo = x->getInfo();
|
||||
auto scaleInfo = scale->getInfo();
|
||||
|
|
|
@ -22,28 +22,7 @@ Optimizer::Parameters::~Parameters() {
|
|||
}
|
||||
}
|
||||
std::shared_ptr<Optimizer> Optimizer::create(Config config) {
|
||||
const int numThread = config.numThread;
|
||||
auto forwardType = config.forwardType;
|
||||
if (forwardType != MNN_FORWARD_ALL) {
|
||||
if (MNNGetExtraBackendCreator(forwardType) == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return std::shared_ptr<Optimizer>(new MergeOptimizer(config.forwardType, numThread, nullptr));
|
||||
}
|
||||
|
||||
auto device = config.device;
|
||||
if (CPU == device) {
|
||||
return std::shared_ptr<Optimizer>(new MergeOptimizer(MNN_FORWARD_CPU, numThread, nullptr));
|
||||
}
|
||||
if (GPU == device) {
|
||||
std::vector<MNNForwardType> types {MNN_FORWARD_METAL, MNN_FORWARD_OPENCL, MNN_FORWARD_VULKAN, MNN_FORWARD_OPENGL};
|
||||
for (auto type : types) {
|
||||
auto creator = MNNGetExtraBackendCreator(type);
|
||||
if (nullptr != creator) {
|
||||
return std::shared_ptr<Optimizer>(new MergeOptimizer(type, numThread, nullptr));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Do nothing
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
//
|
||||
// RandomGenerator.hpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2019/11/28.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#ifndef RandomGenerator_hpp
|
||||
#define RandomGenerator_hpp
|
||||
|
||||
#include <MNN/MNNDefine.h>
|
||||
#include <random>
|
||||
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
|
||||
class MNN_PUBLIC RandomGenerator {
|
||||
private:
|
||||
RandomGenerator(int seed = std::random_device()()) {
|
||||
mSeed = seed;
|
||||
mGenerator.seed(mSeed);
|
||||
}
|
||||
|
||||
~RandomGenerator() = default;
|
||||
|
||||
RandomGenerator(RandomGenerator &);
|
||||
|
||||
RandomGenerator &operator=(const RandomGenerator &);
|
||||
|
||||
private:
|
||||
int mSeed;
|
||||
std::mt19937 mGenerator;
|
||||
|
||||
public:
|
||||
static std::mt19937 &generator(int seed = std::random_device()()) {
|
||||
static RandomGenerator rng(seed);
|
||||
return rng.mGenerator;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
||||
|
||||
#endif // RandomGenerator_hpp
|
|
@ -10,8 +10,24 @@
|
|||
#include <map>
|
||||
#include "MNN_generated.h"
|
||||
#include "core/TensorUtils.hpp"
|
||||
#include "core/MNNMemoryUtils.h"
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
Expr::Inside::Inside(int outputSize) {
|
||||
mOutputInfos.resize(outputSize);
|
||||
mOutputTensors.resize(outputSize);
|
||||
for (int i=0; i<outputSize; ++i) {
|
||||
mOutputTensors[i] = new Tensor;
|
||||
TensorUtils::getDescribe(mOutputTensors[i])->memoryType = Tensor::InsideDescribe::MEMORY_HOST;
|
||||
}
|
||||
}
|
||||
Expr::Inside::~Inside() {
|
||||
for (auto t : mOutputTensors) {
|
||||
delete t;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define CONVERT(src, dst, f)\
|
||||
if (f == src) return dst;
|
||||
|
||||
|
@ -61,7 +77,6 @@ void Utils::copyInfoToTensor(Tensor* dest, const Variable::Info* source) {
|
|||
}
|
||||
dest->buffer().dimensions = (int)source->dim.size();
|
||||
dest->buffer().type = source->type;
|
||||
dest->buffer().host = (uint8_t*)source->ptr;
|
||||
TensorUtils::getDescribe(dest)->dimensionFormat = (MNN_DATA_FORMAT)Utils::convertFormat(source->order);
|
||||
TensorUtils::setLinearLayout(dest);
|
||||
}
|
||||
|
@ -70,7 +85,31 @@ void Utils::copyTensorToInfo(Variable::Info* shape, const Tensor* tensor) {
|
|||
shape->dim = tensor->shape();
|
||||
shape->size = tensor->elementSize();
|
||||
shape->order = Utils::revertFormat(TensorUtils::getDescribe(tensor)->dimensionFormat);
|
||||
shape->ptr = tensor->host<float>();
|
||||
}
|
||||
bool Utils::allocMemoryForHostTensor(Tensor* dest) {
|
||||
if (nullptr != dest->buffer().host) {
|
||||
return true;
|
||||
}
|
||||
if (TensorUtils::getDescribe(dest)->memoryType != Tensor::InsideDescribe::MEMORY_HOST) {
|
||||
return false;
|
||||
}
|
||||
auto size = dest->size();
|
||||
if (0 >= size) {
|
||||
return false;
|
||||
}
|
||||
dest->buffer().host = (uint8_t*)MNNMemoryAllocAlign(size, MNN_MEMORY_ALIGN_DEFAULT);
|
||||
return dest->buffer().host != nullptr;
|
||||
}
|
||||
bool Utils::releaseMemoryForHostTensor(Tensor* dest) {
|
||||
if (nullptr == dest->buffer().host) {
|
||||
return true;
|
||||
}
|
||||
if (TensorUtils::getDescribe(dest)->memoryType != Tensor::InsideDescribe::MEMORY_HOST) {
|
||||
return false;
|
||||
}
|
||||
MNNMemoryFreeAlign(dest->buffer().host);
|
||||
dest->buffer().host = nullptr;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace Express
|
||||
|
|
|
@ -15,15 +15,16 @@
|
|||
namespace MNN {
|
||||
namespace Express {
|
||||
struct Expr::Inside {
|
||||
std::vector<const Variable::Info*> mInputInfos;
|
||||
Inside(int outputSize);
|
||||
~ Inside();
|
||||
std::vector<Variable::Info> mOutputInfos;
|
||||
std::vector<Tensor*> mOutputTensors;
|
||||
Executor::Requirement mReq;
|
||||
std::shared_ptr<Executor::ComputeCache::Unit> mUnit;
|
||||
std::shared_ptr<Executor::Unit> mUnit;
|
||||
std::shared_ptr<Executor::ComputeCache> mCache;
|
||||
int mCacheOffset = 0;
|
||||
bool mInfoDirty = true;
|
||||
bool mContentDirty = true;
|
||||
bool mLinkCache = false;
|
||||
};
|
||||
class Utils {
|
||||
public:
|
||||
|
@ -33,6 +34,8 @@ public:
|
|||
static int convertFormat(Dimensionformat format);
|
||||
static Express::Dimensionformat revertFormat(int format);
|
||||
static halide_type_t revertDataType(DataType dataType);
|
||||
static bool allocMemoryForHostTensor(Tensor* dest);
|
||||
static bool releaseMemoryForHostTensor(Tensor* dest);
|
||||
};
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#include <MNN/expr/ExprCreator.hpp>
|
||||
using namespace MNN::Express;
|
||||
namespace MNN {
|
||||
namespace Train {
|
||||
namespace Express {
|
||||
FixModule::FixModule(std::vector<Express::VARP> output, std::vector<Express::VARP> parameters,
|
||||
std::vector<std::pair<Express::VARP, Express::Dimensionformat>> inputs) {
|
||||
for (auto p : parameters) {
|
||||
|
@ -34,5 +34,19 @@ std::vector<Express::VARP> FixModule::onForward(const std::vector<Express::VARP>
|
|||
}
|
||||
return mOutput;
|
||||
}
|
||||
} // namespace Train
|
||||
|
||||
Module* FixModule::clone(CloneContext* ctx) const {
|
||||
FixModule* module(new FixModule);
|
||||
for (auto& it : mInputs) {
|
||||
VARP v = ctx->getOrClone(it.first);
|
||||
module->mInputs.push_back(std::make_pair(v, it.second));
|
||||
}
|
||||
for (auto& it : mOutput) {
|
||||
VARP v = ctx->getOrClone(it);
|
||||
module->mOutput.push_back(v);
|
||||
}
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
|
@ -8,9 +8,9 @@
|
|||
|
||||
#ifndef FixModule_hpp
|
||||
#define FixModule_hpp
|
||||
#include "Module.hpp"
|
||||
#include <MNN/expr/Module.hpp>
|
||||
namespace MNN {
|
||||
namespace Train {
|
||||
namespace Express {
|
||||
|
||||
class FixModule : public Module {
|
||||
public:
|
||||
|
@ -20,10 +20,14 @@ public:
|
|||
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
|
||||
virtual void onClearCache() override;
|
||||
private:
|
||||
FixModule() = default;
|
||||
|
||||
Module* clone(CloneContext* ctx) const override;
|
||||
|
||||
std::vector<std::pair<Express::VARP, Express::Dimensionformat>> mInputs;
|
||||
std::vector<Express::VARP> mOutput;
|
||||
};
|
||||
} // namespace Train
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
||||
|
||||
#endif
|
|
@ -0,0 +1,112 @@
|
|||
//
|
||||
// IfModule.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2020/09/01.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include "IfModule.hpp"
|
||||
#include "MNN_generated.h"
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
static int _findPos(const std::vector<std::string>& names, const std::string& key) {
|
||||
for (int i=0; i<names.size(); ++i) {
|
||||
if (names[i] == key) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
std::vector<Express::VARP> IfModule::onForward(const std::vector<Express::VARP>& inputs) {
|
||||
std::vector<Express::VARP> outputs(mOutputFromElse.size());
|
||||
MNN_ASSERT(mOutputFromThen.size() == mOutputFromElse.size());
|
||||
if (inputs[0]->readMap<int>()[0] > 0) {
|
||||
std::vector<Express::VARP> subInputs(mInputForThen.size());
|
||||
for (auto& p : mInputForThen) {
|
||||
subInputs[p.first] = inputs[p.second];
|
||||
}
|
||||
auto subOutputs = mThen->onForward(subInputs);
|
||||
for (int i=0; i<mOutputFromThen.size(); ++i) {
|
||||
outputs[i] = subOutputs[mOutputFromThen[i]];
|
||||
}
|
||||
} else {
|
||||
std::vector<Express::VARP> subInputs(mInputForElse.size());
|
||||
for (auto& p : mInputForElse) {
|
||||
subInputs[p.first] = inputs[p.second];
|
||||
}
|
||||
auto subOutputs = mElse->onForward(subInputs);
|
||||
for (int i=0; i<mOutputFromElse.size(); ++i) {
|
||||
outputs[i] = subOutputs[mOutputFromElse[i]];
|
||||
}
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
IfModule* IfModule::create(const Op* op, const std::map<std::string, SubGraph>& subGraph) {
|
||||
auto module = new IfModule;
|
||||
auto ifParam = op->main_as_IfParam();
|
||||
auto& thenG = subGraph.find(ifParam->then_graph()->str())->second;
|
||||
auto& elseG = subGraph.find(ifParam->else_graph()->str())->second;
|
||||
module->mElse = elseG.m;
|
||||
module->mThen = thenG.m;
|
||||
if (nullptr != op->name()) {
|
||||
module->setName(op->name()->str());
|
||||
}
|
||||
/** Compute map index
|
||||
std::vector<std::pair<int, int>> mInputForThen;
|
||||
|
||||
// First mElse' index, Second: inputs's index
|
||||
std::vector<std::pair<int, int>> mInputForElse;
|
||||
|
||||
std::vector<int> mOutputFromThen;
|
||||
std::vector<int> mOutputFromElse;
|
||||
*/
|
||||
// Map Inputs
|
||||
for (int i=0; i<ifParam->aliases_inputs()->size(); ++i) {
|
||||
auto index = i;
|
||||
auto data = ifParam->aliases_inputs()->GetAs<StringVec>(i);
|
||||
if (nullptr == data->data()) {
|
||||
continue;
|
||||
}
|
||||
for (int s=0; s<data->data()->size(); ++s) {
|
||||
auto name = data->data()->GetAsString(s)->str();
|
||||
auto thenPos = _findPos(thenG.inputs, name);
|
||||
if (thenPos >= 0) {
|
||||
module->mInputForThen.emplace_back(std::make_pair(thenPos, i));
|
||||
}
|
||||
auto elsePos = _findPos(elseG.inputs, name);
|
||||
if (elsePos >= 0) {
|
||||
module->mInputForElse.emplace_back(std::make_pair(elsePos, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Map outputs
|
||||
auto output = ifParam->aliases_outputs();
|
||||
module->mOutputFromThen.resize(output->size());
|
||||
module->mOutputFromElse.resize(output->size());
|
||||
for (int i=0; i<output->size(); ++i) {
|
||||
auto data = output->GetAs<StringVec>(i);
|
||||
MNN_ASSERT(data->data()->size() == 2);
|
||||
|
||||
auto thenPos = _findPos(thenG.outputs, data->data()->GetAsString(0)->str());
|
||||
MNN_ASSERT(thenPos >= 0);
|
||||
auto elsePos = _findPos(elseG.outputs, data->data()->GetAsString(1)->str());
|
||||
module->mOutputFromThen[i] = thenPos;
|
||||
module->mOutputFromElse[i] = elsePos;
|
||||
}
|
||||
return module;
|
||||
}
|
||||
|
||||
Module* IfModule::clone(CloneContext* ctx) const {
|
||||
IfModule* module(new IfModule);
|
||||
module->mInputForThen = mInputForThen;
|
||||
module->mInputForElse = mInputForElse;
|
||||
module->mOutputFromThen = mOutputFromThen;
|
||||
module->mOutputFromElse = mOutputFromElse;
|
||||
module->mThen.reset(mThen->clone(ctx));
|
||||
module->mElse.reset(mElse->clone(ctx));
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
|
@ -0,0 +1,43 @@
|
|||
//
|
||||
// IfModule.hpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2020/09/01.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#ifndef IfModule_hpp
|
||||
#define IfModule_hpp
|
||||
|
||||
#include <MNN/expr/Module.hpp>
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
class IfModule : public Module {
|
||||
public:
|
||||
virtual ~ IfModule() {
|
||||
// Do nothing
|
||||
}
|
||||
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
|
||||
static IfModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
|
||||
|
||||
private:
|
||||
IfModule(){}
|
||||
|
||||
Module* clone(CloneContext* ctx) const override;
|
||||
|
||||
// First mThen' index, Second: inputs's index
|
||||
std::vector<std::pair<int, int>> mInputForThen;
|
||||
|
||||
// First mElse' index, Second: inputs's index
|
||||
std::vector<std::pair<int, int>> mInputForElse;
|
||||
|
||||
std::vector<int> mOutputFromThen;
|
||||
std::vector<int> mOutputFromElse;
|
||||
|
||||
std::shared_ptr<Module> mThen;
|
||||
std::shared_ptr<Module> mElse;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* IfModule_hpp */
|
|
@ -0,0 +1,182 @@
|
|||
//
|
||||
// Module.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2019/11/25.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include <MNN/expr/Module.hpp>
|
||||
#include <MNN/expr/ExprCreator.hpp>
|
||||
#include "FixModule.hpp"
|
||||
#include "PipelineModule.hpp"
|
||||
#include "core/FileLoader.hpp"
|
||||
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
|
||||
class EmptyModule : public Module {
|
||||
public:
|
||||
EmptyModule(const std::vector<Express::VARP>& parameters) {
|
||||
for (auto p : parameters) {
|
||||
addParameter(p);
|
||||
}
|
||||
}
|
||||
virtual ~EmptyModule() {
|
||||
// Do nothing
|
||||
}
|
||||
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override {
|
||||
return {};
|
||||
}
|
||||
|
||||
protected:
|
||||
EmptyModule() = default;
|
||||
|
||||
Module* clone(Module::CloneContext* ctx) const override {
|
||||
EmptyModule* module(new EmptyModule);
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
};
|
||||
|
||||
Module* Module::createEmpty(const std::vector<Express::VARP>& parameters) {
|
||||
return new EmptyModule(parameters);
|
||||
}
|
||||
|
||||
Express::VARP Module::forward(Express::VARP input) {
|
||||
return this->onForward({input})[0];
|
||||
}
|
||||
std::vector<Express::VARP> Module::parameters() const {
|
||||
std::vector<Express::VARP> result;
|
||||
_collectParameters(result);
|
||||
return result;
|
||||
}
|
||||
bool Module::loadParameters(const std::vector<Express::VARP>& parameters) {
|
||||
std::vector<Express::VARP> result;
|
||||
_collectParameters(result);
|
||||
if (parameters.empty() || parameters.size() != result.size()) {
|
||||
MNN_ERROR("Error parameters, empty or parameter size not match \n");
|
||||
return false;
|
||||
}
|
||||
for (int i=0; i<parameters.size(); ++i) {
|
||||
if (nullptr != result[i].get()) {
|
||||
// Check Origin parameter's size
|
||||
auto dstInfo = result[i]->getInfo();
|
||||
auto srcInfo = parameters[i]->getInfo();
|
||||
if (dstInfo->dim.size() != srcInfo->dim.size() || dstInfo->order != srcInfo->order) {
|
||||
MNN_ERROR("Error parameters %d, dim size or order not match \n", i);
|
||||
return false;
|
||||
}
|
||||
if (dstInfo->size != srcInfo->size || dstInfo->type != srcInfo->type) {
|
||||
MNN_ERROR("Error parameters %d, size or type not match \n", i);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Variable::replace(result[i], parameters[i]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
void Module::setIsTraining(const bool isTraining) {
|
||||
mIsTraining = isTraining;
|
||||
for (auto c : mChildren) {
|
||||
c->setIsTraining(isTraining);
|
||||
}
|
||||
}
|
||||
|
||||
bool Module::getIsTraining() {
|
||||
return mIsTraining;
|
||||
}
|
||||
|
||||
void Module::registerModel(const std::vector<std::shared_ptr<Module>>& children) {
|
||||
mChildren.insert(mChildren.begin(), children.begin(), children.end());
|
||||
}
|
||||
int Module::addParameter(VARP parameter) {
|
||||
auto res = mParameters.size();
|
||||
mParameters.emplace_back(parameter);
|
||||
return (int)res;
|
||||
}
|
||||
|
||||
void Module::setParameter(Express::VARP parameter, int index) {
|
||||
if (index < 0 || index >= mParameters.size()) {
|
||||
MNN_ERROR("Module error: index out of range: %d - %d:\n", index, (int)mParameters.size());
|
||||
return;
|
||||
}
|
||||
mParameters[index] = parameter;
|
||||
}
|
||||
|
||||
void Module::_collectParameters(std::vector<Express::VARP>& result) const {
|
||||
for (auto p : mParameters) {
|
||||
result.push_back(p);
|
||||
}
|
||||
for (auto c : mChildren) {
|
||||
c->_collectParameters(result);
|
||||
}
|
||||
}
|
||||
void Module::clearCache() {
|
||||
for (auto c : mChildren) {
|
||||
c->clearCache();
|
||||
}
|
||||
this->onClearCache();
|
||||
}
|
||||
|
||||
Module* Module::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const char* fileName, bool dynamic) {
|
||||
AutoStorage<uint8_t> buffer;
|
||||
{
|
||||
FileLoader loader(fileName);
|
||||
if (!loader.valid()) {
|
||||
MNN_ERROR("Error for open %s\n", fileName);
|
||||
return {};
|
||||
}
|
||||
loader.read();
|
||||
if (!loader.valid()) {
|
||||
return {};
|
||||
}
|
||||
loader.merge(buffer);
|
||||
if (buffer.get() == nullptr) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return load(inputs, outputs, buffer.get(), buffer.size(), dynamic);
|
||||
}
|
||||
|
||||
Module* Module::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic) {
|
||||
return PipelineModule::load(inputs, outputs, buffer, length, dynamic);
|
||||
}
|
||||
|
||||
EXPRP Module::CloneContext::getOrClone(EXPRP expr) {
|
||||
auto it = mExprMap.find(expr.get());
|
||||
if (it == mExprMap.end()) {
|
||||
// EXPRP replica = expr->clone(shareParams);
|
||||
// TODO(hjchen2): Clone expr.
|
||||
EXPRP replica = expr;
|
||||
it = mExprMap.emplace(expr.get(), replica).first;
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
VARP Module::CloneContext::getOrClone(VARP var) {
|
||||
auto it = mVarMap.find(var.get());
|
||||
if (it != mVarMap.end()) {
|
||||
// TODO(hjchen2): Clone variable.
|
||||
VARP replica = var;
|
||||
it = mVarMap.emplace(var.get(), replica).first;
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
Module* Module::clone(const Module* module, const bool shareParams) {
|
||||
CloneContext context(shareParams);
|
||||
return module->clone(&context);
|
||||
}
|
||||
|
||||
Module* Module::cloneBaseTo(CloneContext* ctx, Module* module) const {
|
||||
for (const Express::VARP& var : mParameters) {
|
||||
module->mParameters.push_back(ctx->getOrClone(var));
|
||||
}
|
||||
module->mIsTraining = mIsTraining;
|
||||
module->mName = mName;
|
||||
module->mType = mType;
|
||||
return module;
|
||||
}
|
||||
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
|
@ -6,9 +6,11 @@
|
|||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include "NN.hpp"
|
||||
#include <MNN/expr/NN.hpp>
|
||||
#include "Distributions.hpp"
|
||||
#include "FixModule.hpp"
|
||||
#include "WhileModule.hpp"
|
||||
#include "IfModule.hpp"
|
||||
#include "Initializer.hpp"
|
||||
#include "MNN_generated.h"
|
||||
#include "RandomGenerator.hpp"
|
||||
|
@ -17,7 +19,7 @@
|
|||
|
||||
using namespace MNN::Express;
|
||||
namespace MNN {
|
||||
namespace Train {
|
||||
namespace Express {
|
||||
static VARP _activate(VARP x, NN::ActivationFunctionType type) {
|
||||
switch (type) {
|
||||
case NN::None:
|
||||
|
@ -58,6 +60,14 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
DropoutModule() = default;
|
||||
|
||||
Module* clone(CloneContext* ctx) const override {
|
||||
DropoutModule* module(new DropoutModule);
|
||||
module->mDropRatio = mDropRatio;
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
|
||||
float mDropRatio;
|
||||
};
|
||||
|
||||
|
@ -80,8 +90,8 @@ public:
|
|||
mRunningVariance = _Const(bnPa->varData()->data(), {1, mChannels, 1, 1}, NCHW);
|
||||
addParameter(mScale);
|
||||
addParameter(mBias);
|
||||
addParameter(mRunningVariance);
|
||||
addParameter(mRunningMean);
|
||||
mRunningVariancePos = addParameter(mRunningVariance);
|
||||
mRunningMeanPos = addParameter(mRunningMean);
|
||||
mReductionDims = {0, 2, 3};
|
||||
setType("BatchNorm");
|
||||
}
|
||||
|
@ -110,8 +120,8 @@ public:
|
|||
|
||||
addParameter(mScale);
|
||||
addParameter(mBias);
|
||||
addParameter(mRunningVariance);
|
||||
addParameter(mRunningMean);
|
||||
mRunningVariancePos = addParameter(mRunningVariance);
|
||||
mRunningMeanPos = addParameter(mRunningMean);
|
||||
setType("BatchNorm");
|
||||
}
|
||||
|
||||
|
@ -156,9 +166,8 @@ public:
|
|||
mRunningVariance = _Const(mMomentum) * mRunningVariance + _Const(1 - mMomentum) * sampleVar;
|
||||
outputData->setName(name());
|
||||
outputData = _Convert(outputData, dimFormat);
|
||||
Variable::prepareCompute({inputs[0], outputData, mRunningMean, mRunningVariance});
|
||||
mRunningMean.fix(Express::VARP::CONSTANT);
|
||||
mRunningVariance.fix(Express::VARP::CONSTANT);
|
||||
setParameter(mRunningMean, mRunningMeanPos);
|
||||
setParameter(mRunningVariance, mRunningVariancePos);
|
||||
return {outputData};
|
||||
}
|
||||
auto rStd = _Const(1.0f) / _Sqrt(mRunningVariance + _Const(mEps));
|
||||
|
@ -180,12 +189,31 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
BatchNormModule() = default;
|
||||
|
||||
Module* clone(CloneContext* ctx) const override {
|
||||
BatchNormModule* module(new BatchNormModule);
|
||||
module->mMomentum = mMomentum;
|
||||
module->mEps = mEps;
|
||||
module->mScale = ctx->getOrClone(mScale);
|
||||
module->mBias = ctx->getOrClone(mBias);
|
||||
module->mRunningMean = ctx->getOrClone(mRunningMean);
|
||||
module->mRunningVariance = ctx->getOrClone(mRunningVariance);
|
||||
module->mRunningMeanPos = mRunningMeanPos;
|
||||
module->mRunningVariancePos = mRunningVariancePos;
|
||||
module->mChannels = mChannels;
|
||||
module->mReductionDims = mReductionDims;
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
|
||||
float mMomentum = 0.99;
|
||||
float mEps = 1e-5;
|
||||
VARP mScale = nullptr;
|
||||
VARP mBias = nullptr;
|
||||
VARP mRunningMean = nullptr;
|
||||
VARP mRunningVariance = nullptr;
|
||||
int mRunningMeanPos = -1;
|
||||
int mRunningVariancePos = -1;
|
||||
int mChannels;
|
||||
std::vector<int> mReductionDims;
|
||||
};
|
||||
|
@ -246,7 +274,18 @@ public:
|
|||
tempOutput->setName(name());
|
||||
return {tempOutput};
|
||||
}
|
||||
|
||||
private:
|
||||
ConvModule() = default;
|
||||
|
||||
Module* clone(CloneContext* ctx) const override {
|
||||
ConvModule* module(new ConvModule);
|
||||
module->mParameter = mParameter;
|
||||
module->mParameter.weight = ctx->getOrClone(mParameter.weight);
|
||||
module->mParameter.bias = ctx->getOrClone(mParameter.bias);
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
|
||||
NN::ConvParameters mParameter;
|
||||
};
|
||||
static std::tuple<VARP, VARP, int> _initParameters(const NN::ConvOption& option, bool hasBias,
|
||||
|
@ -533,7 +572,23 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
const NN::ConvOption mOption;
|
||||
ConvOctaveModule() = default;
|
||||
|
||||
Module* clone(CloneContext* ctx) const override {
|
||||
ConvOctaveModule* module(new ConvOctaveModule);
|
||||
module->mOption = mOption;
|
||||
module->mLLW = ctx->getOrClone(mLLW);
|
||||
module->mLHW = ctx->getOrClone(mLHW);
|
||||
module->mHLW = ctx->getOrClone(mHLW);
|
||||
module->mHHW = ctx->getOrClone(mHHW);
|
||||
module->mLBias = ctx->getOrClone(mLBias);
|
||||
module->mHBias = ctx->getOrClone(mHBias);
|
||||
module->mSplitInput = mSplitInput;
|
||||
module->mGroup = mGroup;
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
|
||||
NN::ConvOption mOption;
|
||||
VARP mLLW;
|
||||
VARP mLHW;
|
||||
VARP mHLW;
|
||||
|
@ -555,7 +610,7 @@ Module* NN::ConvOctave(const ConvParameters& parameters,
|
|||
module->setName(parameters.name);
|
||||
return module;
|
||||
}
|
||||
Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr) {
|
||||
Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr, const std::map<std::string, SubGraph>& subgraphs) {
|
||||
if (nullptr == expr->get()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -565,6 +620,12 @@ Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr) {
|
|||
if (expr->get()->type() == OpType_Dropout) {
|
||||
return new DropoutModule(0.3f);
|
||||
}
|
||||
if (expr->get()->type() == OpType_While) {
|
||||
return WhileModule::create(expr->get(), subgraphs);
|
||||
}
|
||||
if (expr->get()->type() == OpType_If) {
|
||||
return IfModule::create(expr->get(), subgraphs);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -622,6 +683,9 @@ public:
|
|||
mLimitScale = _Scalar<float>(1.0f / limit);
|
||||
mClampValue = _Scalar<float>(limit);
|
||||
|
||||
mInputScalePos = addParameter(mInputScale);
|
||||
mOutputScalePos = addParameter(mOutputScale);
|
||||
|
||||
setType("ConvBNReluFused");
|
||||
}
|
||||
|
||||
|
@ -632,31 +696,16 @@ public:
|
|||
tempX = _Convert(tempX, NCHW);
|
||||
}
|
||||
auto originX = tempX;
|
||||
VARP scale;
|
||||
if (mFeatureScaleStatMethod == NN::PerTensor) {
|
||||
scale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
|
||||
} else {
|
||||
auto originSize = originX->getInfo()->size;
|
||||
auto batch = originX->getInfo()->dim[0];
|
||||
auto channel = originX->getInfo()->dim[1];
|
||||
if (originSize / batch / channel < 10) {
|
||||
// Too small data
|
||||
//MNN_PRINT("%d - %d - %d\n", originSize, batch, channel);
|
||||
std::vector<int> dims = {1, channel, 1, 1};
|
||||
auto dimVar = _Const(dims.data(), {4}, NCHW, halide_type_of<int32_t>());
|
||||
auto singleScale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
|
||||
scale = _Fill(dimVar, singleScale);
|
||||
} else {
|
||||
//MNN_PRINT("%d - %d - %d\n", originSize, batch, channel);
|
||||
scale = _Maximum(_ReduceMax(_Abs(tempX), {0, 2, 3}, true), _Scalar<float>(0.0001f)) * mLimitScale;
|
||||
}
|
||||
}
|
||||
scale.fix(VARP::CONSTANT);
|
||||
VARP scale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
|
||||
if (useScale == nullptr) {
|
||||
tempX = _Round(tempX * _Reciprocal(scale)) * scale;
|
||||
} else {
|
||||
tempX = _Round(tempX * _Reciprocal(useScale)) * useScale;
|
||||
}
|
||||
// Break the grad by use cast
|
||||
tempX = _Cast<float>(tempX);
|
||||
|
||||
// Move grad from tempX to originX
|
||||
tempX = _Convert(tempX + _ZeroGrad(originX), originFormat);
|
||||
return std::make_pair(tempX, scale);
|
||||
}
|
||||
|
@ -684,18 +733,16 @@ public:
|
|||
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override {
|
||||
VARP res;
|
||||
if (getIsTraining()) {
|
||||
Variable::prepareCompute({inputs[0]});
|
||||
auto x = _Convert(inputs[0], NCHW);
|
||||
// simulate weight quant
|
||||
auto weightScale = _Maximum(_ReduceMax(_Abs(mWeight), {1, 2, 3}, true), _Scalar<float>(1E-6)) * mLimitScale;
|
||||
weightScale.fix(VARP::CONSTANT);
|
||||
auto weightTemp = _Round(mWeight * _Reciprocal(weightScale)) * weightScale;
|
||||
weightTemp = weightTemp + _ZeroGrad(mWeight);
|
||||
|
||||
// simulate input quant to get original input scale
|
||||
auto inputPair = fakeQuantFeature(x);
|
||||
mInputScale = updateScale(mInputScale, inputPair.second);
|
||||
mInputScale.fix(VARP::CONSTANT);
|
||||
setParameter(mInputScale, mInputScalePos);
|
||||
|
||||
// simulate output quant to get original output scale
|
||||
res = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride,
|
||||
|
@ -709,10 +756,9 @@ public:
|
|||
|
||||
res = _activate(res, mActivation);
|
||||
|
||||
Variable::prepareCompute({conv, res});
|
||||
auto outputPair = fakeQuantFeature(res);
|
||||
mOutputScale = updateScale(mOutputScale, outputPair.second);
|
||||
mOutputScale.fix(VARP::CONSTANT);
|
||||
setParameter(mOutputScale, mOutputScalePos);
|
||||
res = outputPair.first;
|
||||
} else {
|
||||
if (nullptr == mInputScale) {
|
||||
|
@ -725,6 +771,7 @@ public:
|
|||
auto x = _Convert(inputs[0], NCHW);
|
||||
auto inputPair = fakeQuantFeature(x);
|
||||
mInputScale = inputPair.second;
|
||||
setParameter(mInputScale, mInputScalePos);
|
||||
inputPair.first.fix(VARP::CONSTANT);
|
||||
|
||||
auto simuRes = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride,
|
||||
|
@ -737,6 +784,7 @@ public:
|
|||
Variable::prepareCompute({simuRes});
|
||||
auto outputPair = fakeQuantFeature(simuRes);
|
||||
mOutputScale = outputPair.second;
|
||||
setParameter(mOutputScale, mOutputScalePos);
|
||||
outputPair.first.fix(VARP::CONSTANT);
|
||||
}
|
||||
|
||||
|
@ -772,12 +820,7 @@ public:
|
|||
{
|
||||
std::vector<int> dims = {x->getInfo()->dim[1]};
|
||||
auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>());
|
||||
VARP channelScale;
|
||||
if (mFeatureScaleStatMethod == NN::PerTensor) {
|
||||
channelScale = _Reciprocal(_Fill(dimVar, mInputScale));
|
||||
} else {
|
||||
channelScale = _Reciprocal(mInputScale);
|
||||
}
|
||||
VARP channelScale = _Reciprocal(_Fill(dimVar, mInputScale));
|
||||
x = _FloatToInt8(x, channelScale, -127, 127);// TODO add clamp
|
||||
}
|
||||
|
||||
|
@ -824,12 +867,7 @@ public:
|
|||
{
|
||||
std::vector<int> dims = {res->getInfo()->dim[1]};
|
||||
auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>());
|
||||
VARP channelScale;
|
||||
if (mFeatureScaleStatMethod == NN::PerTensor) {
|
||||
channelScale = _Fill(dimVar, mOutputScale);
|
||||
} else {
|
||||
channelScale = mOutputScale;
|
||||
}
|
||||
VARP channelScale = _Fill(dimVar, mOutputScale);
|
||||
res = _Int8ToFloat(res, channelScale);
|
||||
}
|
||||
}
|
||||
|
@ -838,6 +876,34 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
ConvBNReluFusedModule() = default;
|
||||
|
||||
Module* clone(CloneContext* ctx) const override {
|
||||
ConvBNReluFusedModule* module(new ConvBNReluFusedModule);
|
||||
module->mConvParameter = mConvParameter;
|
||||
module->mConvParameter.weight = ctx->getOrClone(mConvParameter.weight);
|
||||
module->mConvParameter.bias = ctx->getOrClone(mConvParameter.bias);
|
||||
module->mOption = mOption;
|
||||
module->mGroup = mGroup;
|
||||
module->mWeight = ctx->getOrClone(mWeight);
|
||||
module->mBias = ctx->getOrClone(mBias);
|
||||
module->mActivation = mActivation;
|
||||
module->mLimitScale = ctx->getOrClone(mLimitScale);
|
||||
module->mInputScalePos = mInputScalePos;
|
||||
module->mOutputScalePos = mOutputScalePos;
|
||||
module->mInputScale = ctx->getOrClone(mInputScale);
|
||||
module->mOutputScale = ctx->getOrClone(mOutputScale);
|
||||
module->mClampValue = ctx->getOrClone(mClampValue);
|
||||
module->mMomentum = mMomentum;
|
||||
module->mFeatureScaleStatMethod = mFeatureScaleStatMethod;
|
||||
module->mScaleUpdateMethod = mScaleUpdateMethod;
|
||||
if (mBatchNorm) {
|
||||
module->mBatchNorm.reset(mBatchNorm->clone(ctx));
|
||||
module->registerModel({module->mBatchNorm});
|
||||
}
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
|
||||
NN::ConvParameters mConvParameter;
|
||||
NN::ConvOption mOption;
|
||||
int mGroup;
|
||||
|
@ -846,6 +912,8 @@ private:
|
|||
NN::ActivationFunctionType mActivation = NN::ActivationFunctionType::None;
|
||||
std::shared_ptr<Module> mBatchNorm = nullptr;
|
||||
VARP mLimitScale;
|
||||
int mInputScalePos = -1;
|
||||
int mOutputScalePos = -1;
|
||||
VARP mInputScale = nullptr;
|
||||
VARP mOutputScale = nullptr;
|
||||
VARP mClampValue;
|
||||
|
@ -870,5 +938,5 @@ Module* NN::ConvInt8(const ConvParameters& para, int bits, NN::FeatureScaleStatM
|
|||
return new ConvBNReluFusedModule({conv}, featureMethod, method, bits);
|
||||
}
|
||||
|
||||
} // namespace Train
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
|
@ -0,0 +1,761 @@
|
|||
//
|
||||
// PipelineModule.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2020/01/09.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include "PipelineModule.hpp"
|
||||
#include "MNN_generated.h"
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include "StaticModule.hpp"
|
||||
#include "IfModule.hpp"
|
||||
#include "WhileModule.hpp"
|
||||
using namespace MNN::Express;
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
//#define DYNAMIC
|
||||
#define PIPELINE_MODULE "_pipeline_module__"
|
||||
class ExprModule : public Module {
|
||||
public:
|
||||
ExprModule(EXPRP expr) {
|
||||
mExpr = expr;
|
||||
setName(expr->name());
|
||||
mInputs = expr->inputs();
|
||||
auto op = mExpr->get();
|
||||
if (op) {
|
||||
auto typeName = EnumNameOpType(op->type());
|
||||
setType(typeName);
|
||||
}
|
||||
for (int i = 0; i < mInputs.size(); ++i) {
|
||||
auto inputExpr = mInputs[i]->expr().first;
|
||||
if (inputExpr->get() != nullptr) {
|
||||
mInputs[i] = nullptr;
|
||||
mInputIndexes.emplace_back(i);
|
||||
continue;
|
||||
}
|
||||
switch (inputExpr->inputType()) {
|
||||
case VARP::INPUT:
|
||||
mInputs[i] = nullptr;
|
||||
mInputIndexes.emplace_back(i);
|
||||
break;
|
||||
case VARP::CONSTANT:
|
||||
break;
|
||||
case VARP::TRAINABLE:
|
||||
addParameter(mInputs[i]);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
virtual std::vector<VARP> onForward(const std::vector<VARP>& inputs) override {
|
||||
MNN_ASSERT(mInputIndexes.size() == inputs.size());
|
||||
if (nullptr == mExpr->get()) {
|
||||
return {Variable::create(mExpr)};
|
||||
}
|
||||
std::vector<VARP> tempInputs = mInputs;
|
||||
for (int i = 0; i < inputs.size(); ++i) {
|
||||
tempInputs[mInputIndexes[i]] = inputs[i];
|
||||
}
|
||||
std::vector<VARP> outputVars;
|
||||
auto newExpr = Expr::create(mExpr->extra(), std::move(tempInputs), mExpr->outputSize());
|
||||
newExpr->setName(mExpr->name());
|
||||
for (int i = 0; i < mExpr->outputSize(); ++i) {
|
||||
outputVars.emplace_back(Variable::create(newExpr, i));
|
||||
}
|
||||
return outputVars;
|
||||
}
|
||||
const std::vector<int>& inputIndexes() const {
|
||||
return mInputIndexes;
|
||||
}
|
||||
|
||||
private:
|
||||
Module* clone(CloneContext* ctx) const override {
|
||||
ExprModule* module(new ExprModule(ctx->getOrClone(mExpr)));
|
||||
for (const VARP& var : mInputs) {
|
||||
module->mInputs.push_back(ctx->getOrClone(var));
|
||||
}
|
||||
module->mInputIndexes = mInputIndexes;
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
|
||||
EXPRP mExpr;
|
||||
std::vector<VARP> mInputs;
|
||||
std::vector<int> mInputIndexes;
|
||||
};
|
||||
|
||||
Module* PipelineModule::extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph) {
|
||||
std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(EXPRP)> transformFunction;
|
||||
if (fortrain) {
|
||||
transformFunction =
|
||||
[&subGraph](EXPRP source) {
|
||||
if (source->get() == nullptr) {
|
||||
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
|
||||
}
|
||||
std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
|
||||
if (nullptr != m) {
|
||||
m->setName(source->name());
|
||||
return std::make_pair(std::vector<int>{}, m);
|
||||
}
|
||||
auto convExtracted = NN::Utils::ExtractConvolution(source);
|
||||
if (convExtracted.weight == nullptr) {
|
||||
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
|
||||
}
|
||||
std::shared_ptr<Module> module(NN::Conv(convExtracted));
|
||||
module->setName(source->name());
|
||||
return std::make_pair(std::vector<int>{0}, module);
|
||||
};
|
||||
} else {
|
||||
transformFunction = [&subGraph](EXPRP source) {
|
||||
if (source->get() == nullptr) {
|
||||
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
|
||||
}
|
||||
std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
|
||||
if (nullptr != m) {
|
||||
m->setName(source->name());
|
||||
return std::make_pair(std::vector<int>{}, m);
|
||||
}
|
||||
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
|
||||
};
|
||||
}
|
||||
return new PipelineModule(inputs, outputs, transformFunction);
|
||||
}
|
||||
|
||||
PipelineModule::PipelineModule(std::vector<VARP> inputs, std::vector<VARP> outputs, const Transformer& transformFunction) {
|
||||
setType(PIPELINE_MODULE);
|
||||
std::vector<EXPRP> executeOrder;
|
||||
std::set<EXPRP> inputExpr;
|
||||
for (auto v : inputs) {
|
||||
inputExpr.insert(v->expr().first);
|
||||
}
|
||||
for (auto output : outputs) {
|
||||
Expr::visit(output->expr().first,
|
||||
[&executeOrder, &inputExpr](EXPRP expr) {
|
||||
if (expr->visited()) {
|
||||
return false;
|
||||
}
|
||||
if (inputExpr.find(expr)!= inputExpr.end()) {
|
||||
expr->setVisited(true);
|
||||
executeOrder.emplace_back(expr);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
[&executeOrder](EXPRP expr) {
|
||||
//FUNC_PRINT_ALL(var->name().c_str(), s);
|
||||
if (!expr->visited()) {
|
||||
executeOrder.emplace_back(expr);
|
||||
expr->setVisited(true);
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
for (auto expr : executeOrder) {
|
||||
expr->setVisited(false);
|
||||
}
|
||||
// Set Indexes
|
||||
std::map<EXPRP, int> indexes;
|
||||
int currentIndexes = 0;
|
||||
for (auto expr : executeOrder) {
|
||||
indexes[expr] = currentIndexes;
|
||||
currentIndexes += expr->outputSize();
|
||||
}
|
||||
std::set<EXPRP> inputSets;
|
||||
mInputIndexes.clear();
|
||||
mStackSize = currentIndexes;
|
||||
for (auto v : inputs) {
|
||||
auto inputExpr = v->expr();
|
||||
mInputIndexes.emplace_back(indexes[inputExpr.first] + inputExpr.second);
|
||||
inputSets.insert(inputExpr.first);
|
||||
}
|
||||
|
||||
// Create All SubModule
|
||||
for (auto expr : executeOrder) {
|
||||
if (inputSets.find(expr) != inputSets.end()) {
|
||||
continue;
|
||||
}
|
||||
std::pair<std::vector<int>, std::shared_ptr<Module> > moduleResult;
|
||||
bool extracted = false;
|
||||
if (!transformFunction) {
|
||||
moduleResult = std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
|
||||
} else {
|
||||
moduleResult = transformFunction(expr);
|
||||
}
|
||||
if (moduleResult.second == nullptr) {
|
||||
std::shared_ptr<Module> module(new ExprModule(expr));
|
||||
moduleResult.first = ((ExprModule*)module.get())->inputIndexes();
|
||||
moduleResult.second = module;
|
||||
} else {
|
||||
extracted = true;
|
||||
}
|
||||
auto subInputs = expr->inputs();
|
||||
auto& exprInputIndexes = moduleResult.first;
|
||||
std::vector<int> inputIndexes;
|
||||
if (exprInputIndexes.empty() && extracted) {
|
||||
inputIndexes.resize(subInputs.size());
|
||||
for (int i = 0; i < inputIndexes.size(); ++i) {
|
||||
auto inputExpr = subInputs[i]->expr();
|
||||
inputIndexes[i] = indexes[inputExpr.first] + inputExpr.second;
|
||||
}
|
||||
} else {
|
||||
inputIndexes.resize(exprInputIndexes.size());
|
||||
for (int i = 0; i < inputIndexes.size(); ++i) {
|
||||
auto inputExpr = subInputs[exprInputIndexes[i]]->expr();
|
||||
inputIndexes[i] = indexes[inputExpr.first] + inputExpr.second;
|
||||
}
|
||||
}
|
||||
std::vector<int> outputIndexes(expr->outputSize());
|
||||
for (int i = 0; i < outputIndexes.size(); ++i) {
|
||||
outputIndexes[i] = indexes[expr] + i;
|
||||
}
|
||||
mSubModules.emplace_back(std::make_tuple(moduleResult.second, inputIndexes, outputIndexes));
|
||||
registerModel({moduleResult.second});
|
||||
}
|
||||
mOutputIndexes.clear();
|
||||
for (auto output : outputs) {
|
||||
auto outputExpr = output->expr();
|
||||
mOutputIndexes.emplace_back(indexes[outputExpr.first] + outputExpr.second);
|
||||
}
|
||||
}
|
||||
bool PipelineModule::turnQuantize(Module* module, const int bit, NN::FeatureScaleStatMethod featureScaleStatMethod, NN::ScaleUpdateMethod scaleUpdateMethod) {
|
||||
if (nullptr == module || module->type() != PIPELINE_MODULE) {
|
||||
MNN_ERROR("Invalide module for quantized\n");
|
||||
return false;
|
||||
}
|
||||
((PipelineModule*)module)->toTrainQuant(bit, featureScaleStatMethod, scaleUpdateMethod);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<int> PipelineModule::countOutputReference(std::vector<int> outputIndices) {
|
||||
MNN_ASSERT(outputIndices.size() > 0);
|
||||
std::vector<int> countResult(outputIndices.size(), 0);
|
||||
|
||||
for (int i = 0; i < mSubModules.size(); i++) {
|
||||
auto &m = mSubModules[i];
|
||||
auto& theModule = std::get<0>(m);
|
||||
auto name = theModule->name();
|
||||
auto &inputIndices = std::get<1>(m);
|
||||
|
||||
for (int j = 0; j < inputIndices.size(); j++) {
|
||||
int index = inputIndices[j];
|
||||
for (int k = 0; k < countResult.size(); k++) {
|
||||
if (index == outputIndices[k]) {
|
||||
countResult[k]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return countResult;
|
||||
}
|
||||
|
||||
void PipelineModule::toTrainQuant(const int bits, NN::FeatureScaleStatMethod featureScaleStatMethod,
|
||||
NN::ScaleUpdateMethod scaleUpdateMethod) {
|
||||
std::vector<int> needEraseIndices;
|
||||
|
||||
for (int i = 0; i < mSubModules.size(); i++) {
|
||||
auto& m = mSubModules[i];
|
||||
auto& theModule = std::get<0>(m);
|
||||
auto moduleType = theModule->type();
|
||||
//auto& inputIndices = std::get<1>(m);
|
||||
auto& outputIndices = std::get<2>(m);
|
||||
|
||||
if (moduleType == "Conv" && i < mSubModules.size() - 1) {
|
||||
auto& p1 = mSubModules[i+1];
|
||||
auto p1Module = std::get<0>(p1);
|
||||
auto& p1ModuleType = p1Module->type();
|
||||
auto& p1InputIndices = std::get<1>(p1);
|
||||
auto& p1OutputIndices = std::get<2>(p1);
|
||||
|
||||
auto convOutputCount = countOutputReference(outputIndices);
|
||||
bool convSingleOutputReference = ((outputIndices.size() == 1) && (convOutputCount[0] == 1));
|
||||
|
||||
// only conv
|
||||
if ((!convSingleOutputReference) || (p1ModuleType == "Conv") ||
|
||||
(p1ModuleType != "BatchNorm" && p1ModuleType != "ReLU" && p1ModuleType != "ReLU6")) {
|
||||
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||
registerModel({theModule});
|
||||
continue;
|
||||
}
|
||||
// conv + bn + ?
|
||||
if (p1ModuleType == "BatchNorm") {
|
||||
bool convBnConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
|
||||
if (!convBnConnected) {
|
||||
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||
registerModel({theModule});
|
||||
continue;
|
||||
}
|
||||
|
||||
// last conv + bn
|
||||
if (i == mSubModules.size() - 2) {
|
||||
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||
registerModel({theModule});
|
||||
outputIndices = p1OutputIndices;
|
||||
needEraseIndices.emplace_back(i + 1);
|
||||
continue;
|
||||
}
|
||||
// maybe there is a relu or relu6 after conv + bn
|
||||
auto& p2 = mSubModules[i+2];
|
||||
auto& p2Module = std::get<0>(p2);
|
||||
auto p2ModuleType = p2Module->type();
|
||||
auto& p2InputIndices = std::get<1>(p2);
|
||||
auto& p2OutputIndices = std::get<2>(p2);
|
||||
|
||||
auto bnOutputCount = countOutputReference(p1OutputIndices);
|
||||
bool bnSingleOutputReference = ((p1OutputIndices.size() == 1) && (bnOutputCount[0] == 1));
|
||||
|
||||
// only conv + bn
|
||||
if ((!bnSingleOutputReference) || (p2ModuleType != "ReLU" && p2ModuleType != "ReLU6")) {
|
||||
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||
registerModel({theModule});
|
||||
outputIndices = p1OutputIndices;
|
||||
needEraseIndices.emplace_back(i + 1);
|
||||
continue;
|
||||
} else { // conv + bn + relu or conv + bn + relu6
|
||||
bool convBnReluConnected = ((bnSingleOutputReference) && (p2InputIndices.size() == 1) && (p2InputIndices[0] == p1OutputIndices[0]));
|
||||
if (!convBnReluConnected) {
|
||||
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||
registerModel({theModule});
|
||||
outputIndices = p1OutputIndices;
|
||||
needEraseIndices.emplace_back(i + 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
theModule.reset(NN::ConvBNReluFused({theModule, p1Module, p2Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||
registerModel({theModule});
|
||||
outputIndices = p2OutputIndices;
|
||||
needEraseIndices.emplace_back(i + 1);
|
||||
needEraseIndices.emplace_back(i + 2);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// conv + relu or conv + relu6
|
||||
if (p1ModuleType == "ReLU" || p1ModuleType == "ReLU6") {
|
||||
bool convReluConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
|
||||
if (!convReluConnected) {
|
||||
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||
registerModel({theModule});
|
||||
continue;
|
||||
}
|
||||
|
||||
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||
registerModel({theModule});
|
||||
outputIndices = p1OutputIndices;
|
||||
needEraseIndices.emplace_back(i + 1);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == mSubModules.size() - 1 && moduleType == "Conv") {
|
||||
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||
registerModel({theModule});
|
||||
}
|
||||
}
|
||||
|
||||
// erase useless submodules
|
||||
const int eraseSize = needEraseIndices.size();
|
||||
int alreadyErasedCount = 0;
|
||||
for (int i = 0; i < eraseSize; i++) {
|
||||
auto position = needEraseIndices[i] - alreadyErasedCount;
|
||||
auto type = std::get<0>(mSubModules[position])->type();
|
||||
MNN_ASSERT(type == "BatchNorm" || type == "ReLU" || type == "ReLU6");
|
||||
mSubModules.erase(mSubModules.begin() + position);
|
||||
alreadyErasedCount++;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<VARP> PipelineModule::onForward(const std::vector<VARP>& inputs) {
|
||||
std::vector<VARP> mStack(mStackSize);
|
||||
for (int i = 0; i < mInputIndexes.size(); ++i) {
|
||||
mStack[mInputIndexes[i]] = inputs[i];
|
||||
}
|
||||
for (int index = 0; index < mSubModules.size(); ++index) {
|
||||
auto& m = mSubModules[index];
|
||||
std::vector<VARP> tempInputs(std::get<1>(m).size());
|
||||
for (int i = 0; i < tempInputs.size(); ++i) {
|
||||
tempInputs[i] = mStack[std::get<1>(m)[i]];
|
||||
MNN_ASSERT(nullptr != tempInputs[i]);
|
||||
}
|
||||
std::vector<VARP> tempOutputs = std::get<0>(m)->onForward(tempInputs);
|
||||
MNN_ASSERT(tempOutputs.size() == std::get<2>(m).size());
|
||||
for (int i = 0; i < tempOutputs.size(); ++i) {
|
||||
mStack[std::get<2>(m)[i]] = tempOutputs[i];
|
||||
MNN_ASSERT(nullptr != tempOutputs[i]);
|
||||
}
|
||||
}
|
||||
std::vector<VARP> outputs(mOutputIndexes.size());
|
||||
for (int i = 0; i < mOutputIndexes.size(); ++i) {
|
||||
outputs[i] = mStack[mOutputIndexes[i]];
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
void PipelineModule::onClearCache() {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
static std::map<std::string, SubGraph> _createSubGraph(const MNN::Net* net, bool dynamic) {
|
||||
std::map<std::string, SubGraph> subGraphMap;
|
||||
auto subGraphs = net->subgraphs();
|
||||
if (nullptr == subGraphs) {
|
||||
return subGraphMap;
|
||||
}
|
||||
for (int i=0; i<subGraphs->size(); ++i) {
|
||||
auto graph = subGraphs->GetAs<SubGraphProto>(i);
|
||||
std::vector<std::string> subInputs;
|
||||
std::vector<std::string> subOutputs;
|
||||
if (nullptr != graph->inputs()) {
|
||||
for (int v=0; v<graph->inputs()->size(); ++v) {
|
||||
auto index = graph->inputs()->data()[v];
|
||||
subInputs.emplace_back(graph->tensors()->GetAsString(index)->str());
|
||||
}
|
||||
}
|
||||
for (int v=0; v<graph->outputs()->size(); ++v) {
|
||||
auto index = graph->outputs()->data()[v];
|
||||
subOutputs.emplace_back(graph->tensors()->GetAsString(index)->str());
|
||||
}
|
||||
// Pack to Net for loading
|
||||
std::shared_ptr<Module> submodule;
|
||||
{
|
||||
std::unique_ptr<SubGraphProtoT> _tempInfo(graph->UnPack());
|
||||
std::unique_ptr<NetT> _tempNet(new NetT);
|
||||
_tempNet->oplists = std::move(_tempInfo->nodes);
|
||||
_tempNet->tensorName = std::move(_tempInfo->tensors);
|
||||
flatbuffers::FlatBufferBuilder builder(1024);
|
||||
auto offset = Net::Pack(builder, _tempNet.get());
|
||||
builder.Finish(offset);
|
||||
if (dynamic) {
|
||||
submodule.reset(PipelineModule::load(subInputs, subOutputs, (const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), dynamic));
|
||||
} else {
|
||||
submodule.reset(new StaticModule((const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), subInputs, subOutputs));
|
||||
}
|
||||
if (graph->name() != nullptr) {
|
||||
submodule->setName(graph->name()->str());
|
||||
}
|
||||
}
|
||||
auto key = graph->name()->str();
|
||||
SubGraph subgraph;
|
||||
subgraph.inputs = std::move(subInputs);
|
||||
subgraph.outputs = std::move(subOutputs);
|
||||
subgraph.m = submodule;
|
||||
subGraphMap.insert(std::make_pair(key, subgraph));
|
||||
}
|
||||
return subGraphMap;
|
||||
}
|
||||
|
||||
struct SubModuleInfo {
|
||||
std::vector<int> opList;
|
||||
std::vector<int> inputs;;
|
||||
std::vector<int> outputs;
|
||||
std::vector<uint8_t> tensorMask;
|
||||
};
|
||||
static std::vector<SubModuleInfo> _createSubModuleInfo(const MNN::Net* net, const std::set<int>& inputIndexes, const std::set<int>& outputIndexes) {
|
||||
std::vector<SubModuleInfo> submodule;
|
||||
SubModuleInfo current;
|
||||
std::vector<int> inputOps;
|
||||
|
||||
// Seperate the graph to serveral submodule
|
||||
for (int i=0; i<net->oplists()->size(); ++i) {
|
||||
auto op = net->oplists()->GetAs<Op>(i);
|
||||
// Collect Input
|
||||
if (op->type() == OpType_Input) {
|
||||
inputOps.emplace_back(i);
|
||||
continue;
|
||||
}
|
||||
if (op->type() == OpType_If || op->type() == OpType_While) {
|
||||
if (current.opList.size() > 0) {
|
||||
// Not empty
|
||||
submodule.emplace_back(std::move(current));
|
||||
}
|
||||
SubModuleInfo controlOp;
|
||||
controlOp.opList = {i};
|
||||
submodule.emplace_back(std::move(controlOp));
|
||||
continue;
|
||||
}
|
||||
current.opList.emplace_back(i);
|
||||
}
|
||||
if (!current.opList.empty()) {
|
||||
submodule.emplace_back(std::move(current));
|
||||
}
|
||||
|
||||
/**Compute All SubModule's inputs and outputs*/
|
||||
// 0: not use, 1: input, 2: output, 3: mid, 4: valid output
|
||||
for (int moduleIndex=0; moduleIndex < submodule.size(); ++moduleIndex) {
|
||||
auto& m = submodule[moduleIndex];
|
||||
if (1 == m.opList.size()) {
|
||||
// Fast way to determine
|
||||
auto op = net->oplists()->GetAs<Op>(m.opList[0]);
|
||||
if (nullptr != op->inputIndexes()) {
|
||||
m.inputs.resize(op->inputIndexes()->size());
|
||||
::memcpy(m.inputs.data(), op->inputIndexes()->data(), m.inputs.size() * sizeof(int));
|
||||
}
|
||||
if (nullptr != op->outputIndexes()) {
|
||||
m.outputs.resize(op->outputIndexes()->size());
|
||||
::memcpy(m.outputs.data(), op->outputIndexes()->data(), m.outputs.size() * sizeof(int));
|
||||
}
|
||||
} else {
|
||||
m.tensorMask = std::vector<uint8_t>(net->tensorName()->size(), 0);
|
||||
auto& tensorMask = m.tensorMask;
|
||||
for (auto opIndex : m.opList) {
|
||||
auto op = net->oplists()->GetAs<Op>(opIndex);
|
||||
if (nullptr != op->inputIndexes()) {
|
||||
for (int v=0; v<op->inputIndexes()->size(); ++v) {
|
||||
auto index = op->inputIndexes()->data()[v];
|
||||
tensorMask[index] = tensorMask[index] | 1;
|
||||
}
|
||||
}
|
||||
if (nullptr != op->outputIndexes()) {
|
||||
for (int v=0; v<op->outputIndexes()->size(); ++v) {
|
||||
auto index = op->outputIndexes()->data()[v];
|
||||
tensorMask[index] = tensorMask[index] | 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i=0; i<tensorMask.size(); ++i) {
|
||||
if (0 == tensorMask[i]) {
|
||||
continue;
|
||||
}
|
||||
if (1 == tensorMask[i]) {
|
||||
m.inputs.emplace_back(i);
|
||||
continue;
|
||||
}
|
||||
if (2 == tensorMask[i]) {
|
||||
m.outputs.emplace_back(i);
|
||||
continue;
|
||||
}
|
||||
if (3 == tensorMask[i]) {
|
||||
if (outputIndexes.find(i) != outputIndexes.end()) {
|
||||
m.outputs.emplace_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Check if the module's input is valid
|
||||
for (int i=0; i<m.inputs.size(); ++i) {
|
||||
auto index = m.inputs[i];
|
||||
if (inputIndexes.find(index) != inputIndexes.end()) {
|
||||
continue;
|
||||
}
|
||||
bool find = false;
|
||||
for (int sub=0; sub < moduleIndex; ++sub) {
|
||||
for (auto out : submodule[sub].outputs) {
|
||||
if (out == index) {
|
||||
find = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (find) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (find) {
|
||||
continue;
|
||||
}
|
||||
// Find from module
|
||||
for (int sub=0; sub < moduleIndex; ++sub) {
|
||||
if (submodule[sub].tensorMask.empty()) {
|
||||
continue;
|
||||
}
|
||||
if (submodule[sub].tensorMask[index] == 2) {
|
||||
find = true;
|
||||
break;
|
||||
}
|
||||
if (submodule[sub].tensorMask[index] == 3) {
|
||||
submodule[sub].outputs.emplace_back(index);
|
||||
submodule[sub].tensorMask[index] = 2;
|
||||
find = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
MNN_ASSERT(find);
|
||||
}
|
||||
}
|
||||
for (auto& m : submodule) {
|
||||
m.tensorMask.clear();
|
||||
}
|
||||
return submodule;
|
||||
}
|
||||
|
||||
static Module* _createSubModule(const MNN::Net* net, const SubModuleInfo& info, const std::map<std::string, SubGraph>& subs) {
|
||||
if (1 == info.opList.size()) {
|
||||
auto op = net->oplists()->GetAs<Op>(info.opList[0]);
|
||||
if (OpType_If == op->type()) {
|
||||
return IfModule::create(op, subs);
|
||||
}
|
||||
if (OpType_While == op->type()) {
|
||||
return WhileModule::create(op, subs);
|
||||
}
|
||||
MNN_ASSERT(false);
|
||||
}
|
||||
std::unique_ptr<NetT> _tempNet(new NetT);
|
||||
// Copy Tensor Name
|
||||
_tempNet->tensorName.resize(net->tensorName()->size());
|
||||
for (int i=0; i<net->tensorName()->size(); ++i) {
|
||||
_tempNet->tensorName[i] = net->tensorName()->GetAsString(i)->str();
|
||||
}
|
||||
// Create Input node
|
||||
std::vector<std::string> inputNames;
|
||||
for (auto index : info.inputs) {
|
||||
std::unique_ptr<OpT> inputOp(new OpT);
|
||||
inputOp->outputIndexes = {index};
|
||||
inputOp->type = OpType_Input;
|
||||
inputOp->main.type = OpParameter_Input;
|
||||
inputOp->main.value = new InputT;
|
||||
inputOp->main.AsInput()->dims = {0, 0, -1, -1};
|
||||
_tempNet->oplists.emplace_back(std::move(inputOp));
|
||||
inputNames.emplace_back(_tempNet->tensorName[index]);
|
||||
}
|
||||
// Create compute node
|
||||
for (auto opIndex : info.opList) {
|
||||
std::unique_ptr<OpT> op(net->oplists()->GetAs<Op>(opIndex)->UnPack());
|
||||
_tempNet->oplists.emplace_back(std::move(op));
|
||||
}
|
||||
// Get output names
|
||||
std::vector<std::string> outputNames;
|
||||
for (auto index : info.outputs) {
|
||||
outputNames.emplace_back(_tempNet->tensorName[index]);
|
||||
}
|
||||
// Create Net Buffer
|
||||
flatbuffers::FlatBufferBuilder builder(1024);
|
||||
auto offset = Net::Pack(builder, _tempNet.get());
|
||||
builder.Finish(offset);
|
||||
_tempNet.reset();
|
||||
return new StaticModule((const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), inputNames, outputNames);
|
||||
}
|
||||
|
||||
Module* PipelineModule::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic) {
|
||||
// Create Subgraph
|
||||
auto net = GetNet(buffer);
|
||||
auto subGraphs = net->subgraphs();
|
||||
if (nullptr == net->oplists() || nullptr == net->tensorName()) {
|
||||
MNN_ERROR("Invalid net, for null oplist or tensorName\n");
|
||||
return nullptr;
|
||||
}
|
||||
if (!dynamic) {
|
||||
if (nullptr == subGraphs) {
|
||||
// Has no control flow, can just use static module
|
||||
return new StaticModule(buffer, length, inputs, outputs);
|
||||
}
|
||||
}
|
||||
auto subGraphMap = _createSubGraph(net, dynamic);
|
||||
if (dynamic) {
|
||||
// For dynamic mode
|
||||
auto varMaps = Variable::loadMap(buffer, length);
|
||||
std::vector<VARP> inputVars(inputs.size());
|
||||
for (int i=0; i<inputs.size(); ++i) {
|
||||
inputVars[i] = varMaps[inputs[i]];
|
||||
}
|
||||
std::vector<VARP> outputVars(outputs.size());
|
||||
for (int i=0; i<outputs.size(); ++i) {
|
||||
outputVars[i] = varMaps[outputs[i]];
|
||||
}
|
||||
return extract(inputVars, outputVars, false, subGraphMap);
|
||||
}
|
||||
std::set<int> inputIndexes;
|
||||
std::set<int> outputIndexes;
|
||||
std::map<std::string, int> inputsMap;
|
||||
std::map<std::string, int> outputsMap;
|
||||
for (int i=0; i<net->tensorName()->size(); ++i) {
|
||||
auto tname = net->tensorName()->GetAsString(i)->str();
|
||||
for (auto& s : inputs) {
|
||||
if (tname == s) {
|
||||
inputIndexes.emplace(i);
|
||||
inputsMap.insert(std::make_pair(s, i));
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (auto& s : outputs) {
|
||||
if (tname == s) {
|
||||
outputIndexes.emplace(i);
|
||||
outputsMap.insert(std::make_pair(s, i));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<int> inputIndexesVec(inputs.size());
|
||||
for (int i=0; i<inputs.size(); ++i) {
|
||||
inputIndexesVec[i] = inputsMap[inputs[i]];
|
||||
}
|
||||
std::vector<int> outputIndexesVec(outputs.size());
|
||||
for (int i=0; i<outputs.size(); ++i) {
|
||||
outputIndexesVec[i] = outputsMap[outputs[i]];
|
||||
}
|
||||
|
||||
auto subModulesInfo = _createSubModuleInfo(net, inputIndexes, outputIndexes);
|
||||
std::vector<std::shared_ptr<Module>> subModules(subModulesInfo.size());
|
||||
for (int i=0; i<subModulesInfo.size(); ++i) {
|
||||
subModules[i].reset(_createSubModule(net, subModulesInfo[i], subGraphMap));
|
||||
}
|
||||
auto result = new PipelineModule;
|
||||
/**
|
||||
Compute:
|
||||
std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
|
||||
std::vector<int> mInputIndexes;
|
||||
std::vector<int> mOutputIndexes;
|
||||
int mStackSize = 0;
|
||||
*/
|
||||
// Make Stack, first: origin, second: new
|
||||
std::map<int, int> stackMap;
|
||||
int stackIndex = 0;
|
||||
for (auto& m : subModulesInfo) {
|
||||
for (auto index : m.inputs) {
|
||||
if (stackMap.find(index) == stackMap.end()) {
|
||||
stackMap.insert(std::make_pair(index, stackIndex));
|
||||
stackIndex++;
|
||||
}
|
||||
}
|
||||
for (auto index : m.outputs) {
|
||||
if (stackMap.find(index) == stackMap.end()) {
|
||||
stackMap.insert(std::make_pair(index, stackIndex));
|
||||
stackIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
result->mStackSize = stackMap.size();
|
||||
for (int i=0; i<subModulesInfo.size(); ++i) {
|
||||
auto& info = subModulesInfo[i];
|
||||
// Reindex stack index
|
||||
std::vector<int> subInputs(info.inputs.size());
|
||||
for (int i=0; i<info.inputs.size(); ++i) {
|
||||
subInputs[i] = stackMap[info.inputs[i]];
|
||||
}
|
||||
std::vector<int> subOutputs(info.outputs.size());
|
||||
for (int i=0; i<info.outputs.size(); ++i) {
|
||||
subOutputs[i] = stackMap[info.outputs[i]];
|
||||
}
|
||||
result->mSubModules.emplace_back(std::make_tuple(subModules[i], subInputs, subOutputs));
|
||||
}
|
||||
for (int i=0; i<inputIndexesVec.size(); ++i) {
|
||||
inputIndexesVec[i] = stackMap[inputIndexesVec[i]];
|
||||
}
|
||||
for (int i=0; i<outputIndexesVec.size(); ++i) {
|
||||
outputIndexesVec[i] = stackMap[outputIndexesVec[i]];
|
||||
}
|
||||
result->mInputIndexes = std::move(inputIndexesVec);
|
||||
result->mOutputIndexes = std::move(outputIndexesVec);
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
Module* PipelineModule::clone(CloneContext* ctx) const {
|
||||
PipelineModule* module(new PipelineModule);
|
||||
for (const auto& it : mSubModules) {
|
||||
const std::shared_ptr<Module>& submodule = std::get<0>(it);
|
||||
const std::vector<int>& input_indices = std::get<1>(it);
|
||||
const std::vector<int>& output_indices = std::get<2>(it);
|
||||
std::shared_ptr<Module> replica_submodule(submodule->clone(ctx));
|
||||
module->mSubModules.push_back(
|
||||
std::make_tuple(replica_submodule, input_indices, output_indices));
|
||||
module->registerModel({replica_submodule});
|
||||
}
|
||||
module->mInputIndexes = mInputIndexes;
|
||||
module->mOutputIndexes = mOutputIndexes;
|
||||
module->mStackSize = mStackSize;
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
|
@ -8,16 +8,20 @@
|
|||
|
||||
#ifndef PipelineModule_hpp
|
||||
#define PipelineModule_hpp
|
||||
#include "Module.hpp"
|
||||
#include "NN.hpp"
|
||||
#include <MNN/expr/Module.hpp>
|
||||
#include <MNN/expr/NN.hpp>
|
||||
#include <MNN/expr/ExprCreator.hpp>
|
||||
namespace MNN {
|
||||
namespace Train {
|
||||
namespace Express {
|
||||
|
||||
class MNN_PUBLIC PipelineModule : public Module {
|
||||
public:
|
||||
typedef std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(Express::EXPRP)> Transformer;
|
||||
static Module* extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain);
|
||||
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic = false);
|
||||
static Module* extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph = {});
|
||||
static Module* extractOrigin(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain) {
|
||||
return extract(inputs, outputs, fortrain);
|
||||
}
|
||||
static bool turnQuantize(Module* module, const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor, NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
|
||||
void toTrainQuant(const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor,
|
||||
NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
|
||||
|
@ -26,14 +30,18 @@ public:
|
|||
std::vector<int> countOutputReference(std::vector<int> outputIndices);
|
||||
|
||||
private:
|
||||
PipelineModule(){}
|
||||
PipelineModule(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs,
|
||||
const Transformer& transformFunction = {});
|
||||
|
||||
Module* clone(CloneContext* ctx) const override;
|
||||
|
||||
std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
|
||||
std::vector<Express::VARP> mStack;
|
||||
std::vector<int> mInputIndexes;
|
||||
std::vector<int> mOutputIndexes;
|
||||
int mStackSize = 0;
|
||||
};
|
||||
} // namespace Train
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
||||
|
||||
#endif
|
|
@ -0,0 +1,186 @@
|
|||
//
|
||||
// StaticModule.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on b'2020/09/10'.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include "StaticModule.hpp"
|
||||
#include <MNN/expr/ExprCreator.hpp>
|
||||
#include <MNN/AutoTime.hpp>
|
||||
#include "core/TensorUtils.hpp"
|
||||
#include "core/Session.hpp"
|
||||
#include <MNN/expr/Executor.hpp>
|
||||
#include <MNN/AutoTime.hpp>
|
||||
#include <MNN/expr/ExecutorScope.hpp>
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
StaticModule::StaticModule(const void* buffer, size_t length, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool shapeFix) : mInputs(inputs), mOutputs(outputs) {
|
||||
mShapeFix = shapeFix;
|
||||
mOutputNumbers = (int)outputs.size();
|
||||
/** Compute:
|
||||
std::vector<int, int> mOutputFromTensor;
|
||||
std::vector<int, int> mOutputFromInput;
|
||||
*/
|
||||
for (int i=0; i<outputs.size(); ++i) {
|
||||
auto& t = outputs[i];
|
||||
bool fromInput = false;
|
||||
for (int j=0; j<inputs.size(); ++j) {
|
||||
if (inputs[j] == t) {
|
||||
fromInput = true;
|
||||
mOutputFromInput.emplace_back(std::make_pair(i, j));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (fromInput) {
|
||||
continue;
|
||||
}
|
||||
mOutputFromTensor.emplace_back(i);
|
||||
}
|
||||
if (mOutputFromTensor.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
mNet.reset(Interpreter::createFromBuffer(buffer, length));
|
||||
#ifdef MNN_EXPR_ENABLE_PROFILER
|
||||
mNet->setSessionMode(Interpreter::Session_Debug);
|
||||
#else
|
||||
mNet->setSessionMode(Interpreter::Session_Release);
|
||||
#endif
|
||||
if (mShapeFix) {
|
||||
mNet->setSessionMode(Interpreter::Session_Input_Inside);
|
||||
} else {
|
||||
mNet->setSessionMode(Interpreter::Session_Input_User);
|
||||
}
|
||||
auto rt = Express::ExecutorScope::Current()->getRuntime();
|
||||
// TODO: Add Config
|
||||
ScheduleConfig config;
|
||||
config.numThread = 1;
|
||||
config.type = rt.first.begin()->first;
|
||||
config.saveTensors = outputs;
|
||||
mSession = mNet->createSession(config, rt);
|
||||
mInputTensors.resize(inputs.size());
|
||||
for (int i=0; i<inputs.size(); ++i) {
|
||||
mInputTensors[i] = mNet->getSessionInput(mSession, inputs[i].c_str());
|
||||
}
|
||||
mOutputTensors.resize(mOutputFromTensor.size());
|
||||
for (int i=0; i<mOutputFromTensor.size(); ++i) {
|
||||
mOutputTensors[i] = mNet->getSessionOutput(mSession, outputs[mOutputFromTensor[i]].c_str());
|
||||
}
|
||||
}
|
||||
StaticModule:: ~ StaticModule() {
|
||||
// Do nothing
|
||||
}
|
||||
std::vector<Express::VARP> StaticModule::onForward(const std::vector<Express::VARP>& inputs) {
|
||||
AUTOTIME;
|
||||
std::vector<Express::VARP> outputs(mOutputNumbers);
|
||||
for (auto& iter : mOutputFromInput) {
|
||||
outputs[iter.first] = inputs[iter.second];
|
||||
}
|
||||
if (mOutputFromTensor.empty()) {
|
||||
return outputs;
|
||||
}
|
||||
MNN_ASSERT(inputs.size() == mInputTensors.size());
|
||||
for (int i=0; i<inputs.size(); ++i) {
|
||||
auto info = inputs[i]->getInfo();
|
||||
mInputTensors[i]->buffer().type = info->type;
|
||||
auto des = TensorUtils::getDescribe(mInputTensors[i]);
|
||||
if (info->order == Express::NCHW) {
|
||||
des->dimensionFormat = MNN_DATA_FORMAT_NCHW;
|
||||
}
|
||||
if (info->order == Express::NHWC) {
|
||||
des->dimensionFormat = MNN_DATA_FORMAT_NHWC;
|
||||
}
|
||||
if (info->order == Express::NC4HW4) {
|
||||
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
|
||||
}
|
||||
mNet->resizeTensor(mInputTensors[i], info->dim);
|
||||
}
|
||||
if (!mShapeFix) {
|
||||
for (int i=0; i<inputs.size(); ++i) {
|
||||
mInputTensors[i]->buffer().host = (uint8_t*)inputs[i]->readMap<void>();
|
||||
}
|
||||
// FIXME: Use Interpreter's API
|
||||
mSession->setNeedResize();
|
||||
}
|
||||
mNet->resizeSession(mSession);
|
||||
if (mShapeFix) {
|
||||
for (int i=0; i<inputs.size(); ++i) {
|
||||
// For Shape only usage input, don't alloc memory
|
||||
if (nullptr != mInputTensors[i]->host<void>()) {
|
||||
::memcpy(mInputTensors[i]->host<void>(), inputs[i]->readMap<void>(), mInputTensors[i]->size());
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef MNN_EXPR_ENABLE_PROFILER
|
||||
auto globalExecutor = ExecutorScope::Current();
|
||||
Timer cost;
|
||||
TensorCallBackWithInfo beforeCallBack = [&cost] (const std::vector<Tensor*>&, const OperatorInfo* info) {
|
||||
cost.reset();
|
||||
return true;
|
||||
};
|
||||
TensorCallBackWithInfo afterCallBack = [&cost, globalExecutor] (const std::vector<Tensor*>&, const OperatorInfo* info) {
|
||||
auto costTimes = (float)cost.durationInUs() / 1000.0f;
|
||||
globalExecutor->addOpCostTime(info->type(), costTimes);
|
||||
globalExecutor->addOpFlops(info->type(), info->flops());
|
||||
return true;
|
||||
};
|
||||
mNet->runSessionWithCallBackInfo(mSession, beforeCallBack, afterCallBack);
|
||||
#else
|
||||
mNet->runSession(mSession);
|
||||
#endif
|
||||
for (int i=0; i<mOutputTensors.size(); ++i) {
|
||||
Express::Variable::Info info;
|
||||
info.dim = mOutputTensors[i]->shape();
|
||||
info.type = mOutputTensors[i]->getType();
|
||||
auto format = TensorUtils::getDescribe(mOutputTensors[i])->dimensionFormat;
|
||||
info.order = Express::NHWC;
|
||||
if (format == MNN_DATA_FORMAT_NCHW) {
|
||||
info.order = Express::NCHW;
|
||||
} else if (format == MNN_DATA_FORMAT_NC4HW4) {
|
||||
info.order = Express::NC4HW4;
|
||||
}
|
||||
outputs[mOutputFromTensor[i]] = Express::Variable::create(Express::Expr::create(std::move(info), mOutputTensors[i]->host<void>(), Express::VARP::CONSTANT, true), 0);
|
||||
//::memcpy(outputs[i]->writeMap<void>(), mOutputTensors[i]->host<void>(), mOutputTensors[i]->size());
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
||||
Module* StaticModule::clone(CloneContext* ctx) const {
|
||||
StaticModule* module(new StaticModule);
|
||||
module->mInputs = mInputs;
|
||||
module->mOutputs = mOutputs;
|
||||
|
||||
module->mShapeFix = mShapeFix;
|
||||
module->mOutputNumbers = mOutputNumbers;
|
||||
module->mOutputFromInput = mOutputFromInput;
|
||||
module->mOutputFromTensor = mOutputFromTensor;
|
||||
if (mOutputFromTensor.empty()) {
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
|
||||
module->mNet = mNet;
|
||||
|
||||
auto rt = Express::ExecutorScope::Current()->getRuntime();
|
||||
ScheduleConfig config;
|
||||
config.numThread = 1;
|
||||
config.type = rt.first.begin()->first;
|
||||
config.saveTensors = mOutputs;
|
||||
module->mSession = module->mNet->createSession(config, rt);
|
||||
|
||||
module->mInputTensors.resize(mInputs.size());
|
||||
module->mOutputTensors.resize(mOutputFromTensor.size());
|
||||
for (int i=0; i<mInputs.size(); ++i) {
|
||||
module->mInputTensors[i] =
|
||||
module->mNet->getSessionInput(module->mSession, mInputs[i].c_str());
|
||||
}
|
||||
for (int i=0; i<mOutputFromTensor.size(); ++i) {
|
||||
module->mOutputTensors[i] = module->mNet->getSessionOutput(
|
||||
module->mSession, mOutputs[mOutputFromTensor[i]].c_str());
|
||||
}
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
//
|
||||
// StaticModule.hpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on b'2020/09/10'.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#ifndef StaticModule_hpp
|
||||
#define StaticModule_hpp
|
||||
|
||||
#include <MNN/expr/Module.hpp>
|
||||
#include <MNN/Interpreter.hpp>
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
class StaticModule : public Module {
|
||||
public:
|
||||
StaticModule(const void* buffer, size_t length, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool shapeFix = false);
|
||||
virtual ~ StaticModule();
|
||||
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
|
||||
|
||||
private:
|
||||
StaticModule() = default;
|
||||
|
||||
Module* clone(CloneContext* ctx) const override;
|
||||
|
||||
std::vector<std::string> mInputs;
|
||||
std::vector<std::string> mOutputs;
|
||||
|
||||
std::shared_ptr<Interpreter> mNet;
|
||||
Session* mSession;
|
||||
std::vector<Tensor*> mInputTensors;
|
||||
std::vector<Tensor*> mOutputTensors;
|
||||
bool mShapeFix;
|
||||
int mOutputNumbers;
|
||||
|
||||
// First: outputIndex, Second: outputTensor Index
|
||||
std::vector<int> mOutputFromTensor;
|
||||
// First: outputIndex, Second: input var index
|
||||
std::vector<std::pair<int, int>> mOutputFromInput;
|
||||
};
|
||||
}
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,186 @@
|
|||
//
|
||||
// WhileModule.cpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on b'2020/09/10'.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#include "WhileModule.hpp"
|
||||
#include <MNN/expr/ExprCreator.hpp>
|
||||
#include "MNN_generated.h"
|
||||
//#define MNN_OPEN_TIME_TRACE
|
||||
#include <MNN/AutoTime.hpp>
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
static int _findPos(const std::vector<std::string>& names, const std::string& key) {
|
||||
for (int i=0; i<names.size(); ++i) {
|
||||
if (names[i] == key) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
WhileModule* WhileModule::create(const Op* op, const std::map<std::string, SubGraph>& subGraph) {
|
||||
auto module = new WhileModule;
|
||||
auto whileParam = op->main_as_WhileParam();
|
||||
auto& body = subGraph.find(whileParam->body_graph()->str())->second;
|
||||
auto& cond = subGraph.find(whileParam->cond_graph()->str())->second;
|
||||
module->mBody = body.m;
|
||||
module->mCond = cond.m;
|
||||
/** Compute map index
|
||||
int mCondInputNumber;
|
||||
int mBodyInputNumber;
|
||||
|
||||
// First mCondInputs' index, Second: inputs's index
|
||||
std::vector<std::pair<int, int>> mInputForCond;
|
||||
|
||||
// First mBodyInputs' index, Second: inputs's index
|
||||
std::vector<std::pair<int, int>> mInputForBody;
|
||||
std::vector<int> mOutputFromBody;
|
||||
std::vector<std::pair<int, int>> mUpdateForCond;
|
||||
std::vector<std::pair<int, int>> mUpdateForBody;
|
||||
std::vector<std::pair<int, int>> mCondUpdateForCond;
|
||||
std::vector<std::pair<int, int>> mCondUpdateForBody;
|
||||
*/
|
||||
// Map Inputs
|
||||
module->mBodyInputNumber = body.inputs.size();
|
||||
module->mCondInputNumber = cond.inputs.size();
|
||||
for (int i=0; i<whileParam->aliases_inputs()->size(); ++i) {
|
||||
auto index = i;
|
||||
auto data = whileParam->aliases_inputs()->GetAs<StringVec>(i);
|
||||
for (int s=0; s<data->data()->size(); ++s) {
|
||||
auto name = data->data()->GetAsString(s)->str();
|
||||
auto bodyInputPos = _findPos(body.inputs, name);
|
||||
if (bodyInputPos >= 0) {
|
||||
module->mInputForBody.emplace_back(std::make_pair(bodyInputPos, i));
|
||||
}
|
||||
auto condInputPos = _findPos(cond.inputs, name);
|
||||
if (condInputPos >= 0) {
|
||||
module->mInputForCond.emplace_back(std::make_pair(condInputPos, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Map update
|
||||
auto update = whileParam->aliases_updates();
|
||||
std::map<int, int> replaceOutputs;
|
||||
for (int i=0; i<update->size(); ++i) {
|
||||
auto data = update->GetAs<StringVec>(i);
|
||||
int bodyInputPos = -1;
|
||||
int condInputPos = -1;
|
||||
int bodyOutputPos = -1;
|
||||
int condOutputPos = -1;
|
||||
MNN_ASSERT(2 == data->data()->size());
|
||||
auto outputName = data->data()->GetAsString(0)->str();
|
||||
auto inputName = data->data()->GetAsString(1)->str();
|
||||
bodyInputPos = _findPos(body.inputs, inputName);
|
||||
condInputPos = _findPos(cond.inputs, inputName);
|
||||
bodyOutputPos = _findPos(body.outputs, outputName);
|
||||
condOutputPos = _findPos(cond.outputs, outputName);
|
||||
|
||||
auto updateBodyOutputPos = _findPos(body.outputs, inputName);
|
||||
|
||||
MNN_ASSERT(bodyOutputPos == -1 || condOutputPos == -1);
|
||||
if (condOutputPos >= 0) {
|
||||
if (bodyInputPos >= 0) {
|
||||
module->mCondUpdateForBody.emplace_back(std::make_pair(bodyInputPos, condOutputPos));
|
||||
}
|
||||
if (condInputPos >= 0) {
|
||||
module->mCondUpdateForCond.emplace_back(std::make_pair(condInputPos, condOutputPos));
|
||||
}
|
||||
}
|
||||
if (bodyOutputPos >= 0) {
|
||||
if (bodyInputPos >= 0) {
|
||||
module->mUpdateForBody.emplace_back(std::make_pair(bodyInputPos, bodyOutputPos));
|
||||
}
|
||||
if (condInputPos >= 0) {
|
||||
module->mUpdateForCond.emplace_back(std::make_pair(condInputPos, bodyOutputPos));
|
||||
}
|
||||
if (updateBodyOutputPos >= 0) {
|
||||
replaceOutputs.insert(std::make_pair(updateBodyOutputPos, bodyOutputPos));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Map outputs
|
||||
auto output = whileParam->aliases_outputs();
|
||||
for (int i=0; i<output->size(); ++i) {
|
||||
auto data = output->GetAsString(i);
|
||||
auto pos = _findPos(body.outputs, data->str());
|
||||
MNN_ASSERT(pos >= 0);
|
||||
if (replaceOutputs.find(pos) != replaceOutputs.end()) {
|
||||
pos = replaceOutputs[pos];
|
||||
}
|
||||
module->mOutputFromBody.emplace_back(pos);
|
||||
}
|
||||
return module;
|
||||
}
|
||||
|
||||
std::vector<Express::VARP> WhileModule::onForward(const std::vector<Express::VARP>& inputsI) {
|
||||
std::vector<Express::VARP> condInputs(mCondInputNumber);
|
||||
std::vector<Express::VARP> bodyInputs(mBodyInputNumber);
|
||||
auto& inputs = inputsI;
|
||||
for (auto& p : mInputForCond) {
|
||||
condInputs[p.first] = inputs[p.second];
|
||||
}
|
||||
for (auto& p : mInputForBody) {
|
||||
bodyInputs[p.first] = inputs[p.second];
|
||||
}
|
||||
|
||||
std::vector<Express::VARP> outputs(mOutputFromBody.size());
|
||||
while (true) {
|
||||
auto res = mCond->onForward(condInputs)[0];
|
||||
auto resPtr = res->readMap<int>();
|
||||
if (resPtr[0] <= 0) {
|
||||
break;
|
||||
}
|
||||
auto bodyOutputs = mBody->onForward(bodyInputs);
|
||||
Express::Variable::prepareCompute(bodyOutputs);
|
||||
for (int i=0; i<bodyOutputs.size(); ++i) {
|
||||
auto p = bodyOutputs[i];
|
||||
if (p->expr().first->get() != nullptr) {
|
||||
auto ptr = p->readMap<void>();
|
||||
auto info = p->getInfo();
|
||||
auto newV = Express::_Input(info->dim, info->order, info->type);
|
||||
if (nullptr != ptr) {
|
||||
::memcpy(newV->writeMap<void>(), ptr, info->type.bytes() * info->size);
|
||||
}
|
||||
bodyOutputs[i] = newV;
|
||||
}
|
||||
}
|
||||
for (int i=0; i<mOutputFromBody.size(); ++i) {
|
||||
outputs[i] = bodyOutputs[mOutputFromBody[i]];
|
||||
}
|
||||
for (auto& p : mUpdateForCond) {
|
||||
condInputs[p.first] = bodyOutputs[p.second];
|
||||
}
|
||||
for (auto& p : mUpdateForBody) {
|
||||
bodyInputs[p.first] = bodyOutputs[p.second];
|
||||
}
|
||||
for (auto& p : mCondUpdateForCond) {
|
||||
condInputs[p.first] = res;
|
||||
}
|
||||
for (auto& p : mCondUpdateForBody) {
|
||||
bodyInputs[p.first] = res;
|
||||
}
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
||||
Module* WhileModule::clone(CloneContext* ctx) const {
|
||||
WhileModule* module(new WhileModule);
|
||||
module->mCondInputNumber = mCondInputNumber;
|
||||
module->mBodyInputNumber = mBodyInputNumber;
|
||||
module->mInputForCond = mInputForCond;
|
||||
module->mInputForBody = mInputForBody;
|
||||
module->mOutputFromBody = mOutputFromBody;
|
||||
module->mUpdateForCond = mUpdateForCond;
|
||||
module->mUpdateForBody = mUpdateForBody;
|
||||
module->mCondUpdateForCond = mCondUpdateForCond;
|
||||
module->mCondUpdateForBody = mCondUpdateForBody;
|
||||
module->mCond.reset(mCond->clone(ctx));
|
||||
module->mBody.reset(mBody->clone(ctx));
|
||||
return this->cloneBaseTo(ctx, module);
|
||||
}
|
||||
|
||||
};
|
||||
};
|
|
@ -0,0 +1,46 @@
|
|||
//
|
||||
// WhileModule.hpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on b'2020/09/10'.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
#ifndef WhileModule_hpp
|
||||
#define WhileModule_hpp
|
||||
#include <MNN/expr/Module.hpp>
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
class WhileModule : public Module {
|
||||
public:
|
||||
virtual ~ WhileModule() {
|
||||
// Do nothing
|
||||
}
|
||||
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
|
||||
static WhileModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
|
||||
|
||||
private:
|
||||
WhileModule(){}
|
||||
|
||||
Module* clone(CloneContext* ctx) const override;
|
||||
|
||||
int mCondInputNumber;
|
||||
int mBodyInputNumber;
|
||||
|
||||
// First mCondInputs' index, Second: inputs's index
|
||||
std::vector<std::pair<int, int>> mInputForCond;
|
||||
|
||||
// First mBodyInputs' index, Second: inputs's index
|
||||
std::vector<std::pair<int, int>> mInputForBody;
|
||||
std::vector<int> mOutputFromBody;
|
||||
std::vector<std::pair<int, int>> mUpdateForCond;
|
||||
std::vector<std::pair<int, int>> mUpdateForBody;
|
||||
|
||||
std::vector<std::pair<int, int>> mCondUpdateForCond;
|
||||
std::vector<std::pair<int, int>> mCondUpdateForBody;
|
||||
|
||||
std::shared_ptr<Module> mCond;
|
||||
std::shared_ptr<Module> mBody;
|
||||
};
|
||||
}
|
||||
}
|
||||
#endif
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <MNN/ErrorCode.hpp>
|
||||
#include <MNN/MNNForwardType.h>
|
||||
|
@ -67,6 +68,7 @@ class Session;
|
|||
struct Content;
|
||||
class Tensor;
|
||||
class Backend;
|
||||
class Runtime;
|
||||
|
||||
class MNN_PUBLIC OperatorInfo {
|
||||
struct Info;
|
||||
|
@ -89,6 +91,7 @@ protected:
|
|||
|
||||
typedef std::function<bool(const std::vector<Tensor*>&, const std::string& /*opName*/)> TensorCallBack;
|
||||
typedef std::function<bool(const std::vector<Tensor*>&, const OperatorInfo*)> TensorCallBackWithInfo;
|
||||
typedef std::pair<std::map<MNNForwardType, std::shared_ptr<Runtime>>, std::shared_ptr<Runtime>> RuntimeInfo;
|
||||
|
||||
/** net data holder. multiple sessions could share same net. */
|
||||
class MNN_PUBLIC Interpreter {
|
||||
|
@ -108,7 +111,43 @@ public:
|
|||
static Interpreter* createFromBuffer(const void* buffer, size_t size);
|
||||
~Interpreter();
|
||||
|
||||
enum SessionMode {
|
||||
/** About CallBack, Default Session_Debug*/
|
||||
/** runSessionWithCallBack is allowed and can get internal op info*/
|
||||
Session_Debug = 0,
|
||||
/** runSessionWithCallBack is not valid and can't get any info of op in session*/
|
||||
Session_Release = 1,
|
||||
|
||||
/** About input tenosr, Default Session_Input_Inside*/
|
||||
/** The input tensor is alloced by session, input data after session resized*/
|
||||
Session_Input_Inside = 2,
|
||||
/** The input tensor is alloced by user, set input data before session resize*/
|
||||
Session_Input_User = 3,
|
||||
};
|
||||
/**
|
||||
* @brief The API shoud be called before create session.
|
||||
* @param mode session mode
|
||||
* @return void
|
||||
*/
|
||||
void setSessionMode(SessionMode mode);
|
||||
|
||||
/**
|
||||
* @brief The API shoud be called before create session.
|
||||
* If the cache exist, try to load cache from file.
|
||||
* After createSession, try to save cache to file.
|
||||
* @param cacheFile cache file name
|
||||
* @param keySize the first `keySize` bytes used as the key to check if the `cacheFile` exists.
|
||||
* @return void
|
||||
*/
|
||||
void setCacheFile(const char* cacheFile, size_t keySize = 128);
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief create runtimeInfo seperately with schedule config.
|
||||
* @param config session schedule configs.
|
||||
*/
|
||||
static RuntimeInfo createRuntime(const std::vector<ScheduleConfig>& configs);
|
||||
|
||||
/**
|
||||
* @brief create session with schedule config. created session will be managed in net.
|
||||
* @param config session schedule config.
|
||||
|
@ -116,6 +155,13 @@ public:
|
|||
*/
|
||||
Session* createSession(const ScheduleConfig& config);
|
||||
|
||||
/**
|
||||
* @brief create session with schedule config and user-specified runtime.
|
||||
* @param config session schedule config, runtime runtimeInfo used by the created session.
|
||||
* @return created session if success, NULL otherwise.
|
||||
*/
|
||||
Session* createSession(const ScheduleConfig& config, const RuntimeInfo& runtime);
|
||||
|
||||
/**
|
||||
* @brief create multi-path session with schedule configs. created session will be managed in net.
|
||||
* @param configs session schedule configs.
|
||||
|
@ -123,6 +169,14 @@ public:
|
|||
*/
|
||||
Session* createMultiPathSession(const std::vector<ScheduleConfig>& configs);
|
||||
|
||||
/**
|
||||
* @brief create multi-path session with schedule configs and user-specified runtime.
|
||||
created session will be managed in net.
|
||||
* @param configs session schedule configs.
|
||||
* @return created session if success, NULL otherwise.
|
||||
*/
|
||||
Session* createMultiPathSession(const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtime);
|
||||
|
||||
/**
|
||||
* @brief release session.
|
||||
* @param session given session.
|
||||
|
@ -204,17 +258,39 @@ public:
|
|||
*/
|
||||
Tensor* getSessionOutput(const Session* session, const char* name);
|
||||
|
||||
enum SessionInfoCode {
|
||||
/** memory session used in MB, float* */
|
||||
MEMORY = 0,
|
||||
|
||||
/** float operation needed in session in M, float* */
|
||||
FLOPS = 1,
|
||||
|
||||
/** Backends in session in M, int*, length >= the configs when create session */
|
||||
BACKENDS = 2,
|
||||
|
||||
ALL
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief get all input tensors.
|
||||
* @brief get session info
|
||||
* @param session given session.
|
||||
* @return all input tensors mapped with name.
|
||||
* @param code given info code.
|
||||
* @param void* given info ptr, see SessionInfoCode for detail
|
||||
* @return true if support the code, false otherwise.
|
||||
*/
|
||||
const std::map<std::string, Tensor*>& getSessionOutputAll(const Session* session) const;
|
||||
bool getSesionInfo(const Session* session, SessionInfoCode code, void* ptr);
|
||||
|
||||
/**
|
||||
* @brief get all output tensors.
|
||||
* @param session given session.
|
||||
* @return all output tensors mapped with name.
|
||||
*/
|
||||
const std::map<std::string, Tensor*>& getSessionOutputAll(const Session* session) const;
|
||||
/**
|
||||
* @brief get all input tensors.
|
||||
* @param session given session.
|
||||
* @return all input tensors mapped with name.
|
||||
*/
|
||||
const std::map<std::string, Tensor*>& getSessionInputAll(const Session* session) const;
|
||||
|
||||
public:
|
||||
|
|
|
@ -38,13 +38,7 @@
|
|||
} \
|
||||
}
|
||||
#else
|
||||
#define MNN_ASSERT(x) \
|
||||
{ \
|
||||
int res = (x); \
|
||||
if (!res) { \
|
||||
MNN_ERROR("Error for %d\n", __LINE__); \
|
||||
} \
|
||||
}
|
||||
#define MNN_ASSERT(x)
|
||||
#endif
|
||||
|
||||
#define FUNC_PRINT(x) MNN_PRINT(#x "=%d in %s, %d \n", x, __func__, __LINE__);
|
||||
|
|
|
@ -23,8 +23,8 @@ typedef enum {
|
|||
/*Hand write metal*/
|
||||
MNN_FORWARD_METAL = 1,
|
||||
|
||||
/*Use IOS's MPS instead of hand-write metal, Not Support yet*/
|
||||
MNN_FORWARD_MPS = 2,
|
||||
/*NVIDIA GPU API*/
|
||||
MNN_FORWARD_CUDA = 2,
|
||||
|
||||
/*Android / Common Device GPU API*/
|
||||
MNN_FORWARD_OPENCL = 3,
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <vector>
|
||||
#include <MNN/HalideRuntime.h>
|
||||
#include <MNN/MNNDefine.h>
|
||||
#define MNN_MAX_TENSOR_DIM 6
|
||||
|
||||
namespace MNN {
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <MNN/ErrorCode.hpp>
|
||||
#include <MNN/expr/Expr.hpp>
|
||||
#include <MNN/Tensor.hpp>
|
||||
#include <MNN/Interpreter.hpp>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
|
@ -17,41 +18,19 @@
|
|||
namespace MNN {
|
||||
class Backend;
|
||||
class Execution;
|
||||
class Runtime;
|
||||
struct Op;
|
||||
namespace Express {
|
||||
class MNN_PUBLIC Executor {
|
||||
public:
|
||||
class ComputeCache {
|
||||
public:
|
||||
void setShapeDirty(int offset, Variable::Info* info);
|
||||
void setContentDirty();
|
||||
void setContentReady();
|
||||
void syncInput(int offset, const Variable::Info* info);
|
||||
void syncOutput(int offset, Variable::Info* info);
|
||||
|
||||
struct TensorContent {
|
||||
std::shared_ptr<Tensor> tensor;
|
||||
int refCount = 0;
|
||||
void reset();
|
||||
bool aliveOutside = false;
|
||||
};
|
||||
class ComputeCache;
|
||||
struct Unit;
|
||||
virtual ~ ComputeCache() {}
|
||||
ComputeCache() {}
|
||||
virtual ErrorCode compute() = 0;
|
||||
virtual ErrorCode resize() = 0;
|
||||
protected:
|
||||
// Get the index tensor with the need of needBackend
|
||||
// If the Tensor don't belong to the backend, need use needBackend to alloc it and return
|
||||
virtual Tensor* getTensor(int index, bool host) = 0;
|
||||
void _setShapeDirty();
|
||||
friend class Executor;
|
||||
bool mContentDirty = true;
|
||||
bool mShapeDirty = true;
|
||||
};
|
||||
static void setShapeDirty(ComputeCache* cache);
|
||||
static void setContentDirty(ComputeCache* cache);
|
||||
static void* mapOutput(ComputeCache* cache, int offset, Tensor* dest);
|
||||
struct Requirement {
|
||||
std::vector<bool> contentNeedContent;
|
||||
std::vector<bool> shapeNeedContent;
|
||||
std::vector<bool> supportError;
|
||||
};
|
||||
~Executor();
|
||||
Requirement getRequirement(Expr* expr) const;
|
||||
|
@ -65,25 +44,27 @@ public:
|
|||
};
|
||||
void gc(GCFlag flag = FULL);
|
||||
static std::shared_ptr<Executor> getGlobalExecutor();
|
||||
|
||||
static std::shared_ptr<Executor> newExecutor(MNNForwardType type,
|
||||
const BackendConfig& config,
|
||||
int numberThread);
|
||||
void resetProfile();
|
||||
void dumpProfile();
|
||||
void addOpCostTime(int op, float costTime);
|
||||
void addOpCostTime(const std::string& type, float costTime);
|
||||
void addOpFlops(const std::string& type, float flops);
|
||||
class Profiler;
|
||||
static RuntimeInfo getRuntime();
|
||||
private:
|
||||
void _createSingle(EXPRP expr);
|
||||
void _create(const std::vector<EXPRP>& outputs, std::set<std::shared_ptr<Executor::ComputeCache>>&& inputCaches, std::vector<ComputeCache::TensorContent>&& tensors, bool forceCPU);
|
||||
void _makeCache(const std::vector<EXPRP>& outputs, bool forceCPU);
|
||||
void _create(const std::vector<EXPRP>& outputs, std::set<std::shared_ptr<Executor::ComputeCache>>&& inputCaches, std::set<std::shared_ptr<Expr::Inside>>&& inputNode, bool forceCPU);
|
||||
|
||||
void _addToCache(const std::vector<std::shared_ptr<ComputeCache>>& caches);
|
||||
void _resetCache();
|
||||
void _visit(EXPRP expr, std::set<std::shared_ptr<Executor::ComputeCache>>& inputCaches, std::vector<ComputeCache::TensorContent>& tensors);
|
||||
void _visit(EXPRP expr, std::set<std::shared_ptr<Executor::ComputeCache>>& inputCaches, std::set<std::shared_ptr<Expr::Inside>>& inputNode);
|
||||
|
||||
Executor(std::shared_ptr<Backend> backend);
|
||||
std::shared_ptr<Backend> mBackend;
|
||||
std::shared_ptr<Backend> mBackupBackend;
|
||||
Executor(std::shared_ptr<Runtime> backend, MNNForwardType type);
|
||||
std::pair<std::shared_ptr<Runtime>, MNNForwardType> mRuntime;
|
||||
std::pair<std::shared_ptr<Runtime>, MNNForwardType> mBackupRuntime;
|
||||
std::mutex mMutex;
|
||||
std::vector<std::shared_ptr<Tensor>> mStack;
|
||||
std::vector<Tensor*> mStackInputs;
|
||||
std::vector<Tensor*> mStackOutputs;
|
||||
std::shared_ptr<Profiler> mProfiler;
|
||||
};
|
||||
} // namespace Express
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
//
|
||||
// ExecutorScope.hpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2020/10/26.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#ifndef MNN_EXPR_EXECUTOR_SCOPE_HPP_
|
||||
#define MNN_EXPR_EXECUTOR_SCOPE_HPP_
|
||||
|
||||
#include <MNN/expr/Executor.hpp>
|
||||
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
|
||||
struct ExecutorScope final {
|
||||
public:
|
||||
ExecutorScope() = delete;
|
||||
explicit ExecutorScope(const ExecutorScope&) = delete;
|
||||
explicit ExecutorScope(const std::shared_ptr<Executor>& current);
|
||||
|
||||
explicit ExecutorScope(const std::string& scope_name,
|
||||
const std::shared_ptr<Executor>& current);
|
||||
|
||||
virtual ~ExecutorScope();
|
||||
|
||||
static const std::shared_ptr<Executor> Current();
|
||||
};
|
||||
|
||||
} // namespace MNN
|
||||
} // namespace Express
|
||||
#endif // MNN_EXPR_EXECUTOR_SCOPE_HPP_
|
|
@ -87,6 +87,7 @@ public:
|
|||
};
|
||||
bool fix(InputType type) const;
|
||||
private:
|
||||
friend class Variable;
|
||||
std::shared_ptr<Variable> mContent;
|
||||
};
|
||||
inline bool operator==(Variable* src, VARP dst) {
|
||||
|
@ -107,7 +108,6 @@ public:
|
|||
INTS dim;
|
||||
halide_type_t type;
|
||||
int size;
|
||||
void* ptr = nullptr;
|
||||
void syncSize();
|
||||
};
|
||||
const std::string& name() const;
|
||||
|
@ -173,7 +173,7 @@ private:
|
|||
class MNN_PUBLIC Expr {
|
||||
public:
|
||||
struct Inside;
|
||||
static EXPRP create(Variable::Info&& info);
|
||||
static EXPRP create(Variable::Info&& info, const void* ptr, VARP::InputType type, bool copy = true);
|
||||
static EXPRP create(const OpT* op, std::vector<VARP> inputs, int outputSize = 1);
|
||||
static EXPRP create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize = 1);
|
||||
static EXPRP create(std::unique_ptr<OpT>&& op, std::vector<VARP> inputs, int outputSize = 1) {
|
||||
|
@ -188,7 +188,7 @@ public:
|
|||
return mInputs;
|
||||
}
|
||||
int outputSize() const {
|
||||
return mOutputNames.size();
|
||||
return (int)mOutputNames.size();
|
||||
}
|
||||
static void replace(EXPRP oldExpr, EXPRP newExpr);
|
||||
bool requireInfo();
|
||||
|
|
|
@ -8,9 +8,14 @@
|
|||
|
||||
#ifndef MNN_Train_Module_hpp
|
||||
#define MNN_Train_Module_hpp
|
||||
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <MNN/expr/Expr.hpp>
|
||||
|
||||
namespace MNN {
|
||||
namespace Train {
|
||||
namespace Express {
|
||||
class MNN_PUBLIC Module {
|
||||
public:
|
||||
Module() = default;
|
||||
|
@ -21,9 +26,6 @@ public:
|
|||
bool loadParameters(const std::vector<Express::VARP>& parameters);
|
||||
void setIsTraining(const bool isTraining);
|
||||
bool getIsTraining();
|
||||
static std::shared_ptr<Module> transform(const std::vector<Express::VARP>& inputs,
|
||||
const std::vector<Express::VARP>& outputs);
|
||||
|
||||
void clearCache();
|
||||
|
||||
const std::string& name() const {
|
||||
|
@ -38,12 +40,45 @@ public:
|
|||
void setType(std::string type) {
|
||||
mType = std::move(type);
|
||||
}
|
||||
// Return the parameter index
|
||||
int addParameter(Express::VARP parameter);
|
||||
|
||||
void setParameter(Express::VARP parameter, int index);
|
||||
static Module* createEmpty(const std::vector<Express::VARP>& parameters);
|
||||
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic = false);
|
||||
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const char* fileName, bool dynamic = false);
|
||||
|
||||
static Module* clone(const Module* module, const bool shareParams = false);
|
||||
|
||||
class CloneContext {
|
||||
public:
|
||||
CloneContext() = default;
|
||||
explicit CloneContext(const bool shareParams)
|
||||
: mShareParams(shareParams) {}
|
||||
virtual ~CloneContext() = default;
|
||||
|
||||
const bool shareParams() const { return mShareParams; }
|
||||
|
||||
EXPRP getOrClone(const EXPRP expr);
|
||||
VARP getOrClone(const VARP var);
|
||||
|
||||
private:
|
||||
bool mShareParams = false;
|
||||
std::unordered_map<const Expr*, EXPRP> mExprMap;
|
||||
std::unordered_map<const Variable*, VARP> mVarMap;
|
||||
};
|
||||
|
||||
virtual Module* clone(CloneContext* ctx) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
protected:
|
||||
void registerModel(const std::vector<std::shared_ptr<Module>>& children);
|
||||
void addParameter(Express::VARP parameter);
|
||||
virtual void onClearCache() {
|
||||
}
|
||||
|
||||
Module* cloneBaseTo(CloneContext* ctx, Module* module) const;
|
||||
|
||||
private:
|
||||
void _collectParameters(std::vector<Express::VARP>& result) const;
|
||||
std::vector<std::shared_ptr<Module>> mChildren;
|
||||
|
@ -52,6 +87,13 @@ private:
|
|||
std::string mName;
|
||||
std::string mType;
|
||||
};
|
||||
|
||||
struct SubGraph {
|
||||
std::vector<std::string> inputs;
|
||||
std::vector<std::string> outputs;
|
||||
std::shared_ptr<Module> m;
|
||||
};
|
||||
|
||||
} // namespace Train
|
||||
} // namespace MNN
|
||||
|
|
@ -9,11 +9,10 @@
|
|||
#ifndef MNN_Train_NN_hpp
|
||||
#define MNN_Train_NN_hpp
|
||||
#include <MNN/expr/ExprCreator.hpp>
|
||||
#include "Distributions.hpp"
|
||||
#include "Module.hpp"
|
||||
#include <MNN/expr/Module.hpp>
|
||||
#include <vector>
|
||||
namespace MNN {
|
||||
namespace Train {
|
||||
namespace Express {
|
||||
class Initializer;
|
||||
|
||||
class MNN_PUBLIC NN {
|
||||
|
@ -29,7 +28,7 @@ public:
|
|||
};
|
||||
enum FeatureScaleStatMethod {
|
||||
PerTensor = 0,
|
||||
PerChannel = 1
|
||||
PerChannel = 1 // Depercerate
|
||||
};
|
||||
/* Unlike enum in class, class in class need be dllimport or dllexport explcility.
|
||||
Compiling in other system will not be affected.
|
||||
|
@ -86,7 +85,7 @@ public:
|
|||
static ConvParameters ExtractConvolution(Express::EXPRP expr);
|
||||
|
||||
// Extract BatchNormal and Dropout
|
||||
static Module* ExtractNotRunableOp(Express::EXPRP expr);
|
||||
static Module* ExtractNotRunableOp(Express::EXPRP expr, const std::map<std::string, SubGraph>& subgraphs);
|
||||
};
|
||||
};
|
||||
|
|
@ -31,25 +31,30 @@ MNN_PUBLIC VARP _Const(const void* ptr, INTS shape = {}, Dimensionformat format
|
|||
MNN_PUBLIC VARP _TrainableParam(float value, INTS dims, Dimensionformat format);
|
||||
MNN_PUBLIC VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format,
|
||||
halide_type_t type = halide_type_of<float>());
|
||||
MNN_PUBLIC VARP _InnerProduct(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS outputShape);
|
||||
MNN_PUBLIC VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1},
|
||||
INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
|
||||
|
||||
MNN_PUBLIC VARP _Conv(float weight, float bias, VARP x, INTS channel, INTS kernelSize, PaddingMode pad = VALID,
|
||||
INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1);
|
||||
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
||||
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
|
||||
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false, int nbits = 8);
|
||||
MNN_PUBLIC VARP _Conv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
||||
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
|
||||
MNN_PUBLIC VARP _Deconv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1},
|
||||
INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
|
||||
|
||||
MNN_PUBLIC VARP _Deconv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
||||
PaddingMode pad, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
|
||||
|
||||
MNN_PUBLIC VARP _MaxPool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0});
|
||||
MNN_PUBLIC VARP _AvePool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0});
|
||||
MNN_PUBLIC VARP _Reshape(VARP x, INTS shape, Dimensionformat original_format = NHWC);
|
||||
MNN_PUBLIC VARP _Reshape(VARP x, INTS shape, Dimensionformat original_format = NCHW);
|
||||
MNN_PUBLIC VARP _Reshape(VARP x, VARP shape);
|
||||
MNN_PUBLIC VARP _Scale(VARP x, int channels, std::vector<float>&& scales, std::vector<float>&& bias);
|
||||
|
||||
MNN_PUBLIC VARP _Relu(VARP x, float slope = 0.0f);
|
||||
MNN_PUBLIC VARP _Relu6(VARP x);
|
||||
MNN_PUBLIC VARP _Relu6(VARP x, float minValue = 0.0f, float maxValue = 6.0f);
|
||||
MNN_PUBLIC VARP _PRelu(VARP x, std::vector<float> &&slopes);
|
||||
MNN_PUBLIC VARP _Softmax(VARP logits, int axis = -1);
|
||||
MNN_PUBLIC VARP _Softplus(VARP features);
|
||||
|
@ -76,7 +81,7 @@ MNN_PUBLIC VARP _Pad(VARP x, VARP paddings, PadValueMode mode = CONSTANT);
|
|||
MNN_PUBLIC VARP _ExpandDims(VARP input, int axis);
|
||||
MNN_PUBLIC VARP _ExpandDims(VARP input, VARP axis);
|
||||
|
||||
MNN_PUBLIC VARP _Shape(VARP input);
|
||||
MNN_PUBLIC VARP _Shape(VARP input, bool nchw = false);
|
||||
MNN_PUBLIC VARP _Stack(VARPS values, int axis=0);
|
||||
enum InterpolationMethod {BILINEAR, NEAREST};
|
||||
MNN_PUBLIC VARP _CropAndResize(VARP image, VARP boxes, VARP box_ind, VARP crop_size,
|
||||
|
@ -92,6 +97,7 @@ MNN_PUBLIC VARP _GatherND(VARP params, VARP indices);
|
|||
MNN_PUBLIC VARP _Selu(VARP features, float scale, float alpha);
|
||||
MNN_PUBLIC VARP _Size(VARP input);
|
||||
MNN_PUBLIC VARP _Elu(VARP features, float alpha=1.0);
|
||||
MNN_PUBLIC VARP _Threshold(VARP features, float alpha=1.0);
|
||||
MNN_PUBLIC VARP _MatrixBandPart(VARP input, VARP num_lower, VARP num_upper);
|
||||
MNN_PUBLIC std::vector<VARP> _Moments(VARP x, INTS axis, VARP shift, bool keepDims);
|
||||
MNN_PUBLIC VARP _SetDiff1D(VARP x, VARP y);
|
||||
|
@ -123,7 +129,8 @@ MNN_PUBLIC VARP _ZeroGrad(VARP x);
|
|||
|
||||
// Int8 Inference
|
||||
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize,
|
||||
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu);
|
||||
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, int nbits = 8);
|
||||
MNN_PUBLIC VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim);
|
||||
MNN_PUBLIC VARP _FloatToInt8(VARP x, VARP scale, char minValue, char maxValue);
|
||||
MNN_PUBLIC VARP _Int8ToFloat(VARP x, VARP scale);
|
||||
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
//
|
||||
// RuntimeScope.hpp
|
||||
// MNN
|
||||
//
|
||||
// Created by MNN on 2020/10/26.
|
||||
// Copyright © 2018, Alibaba Group Holding Limited
|
||||
//
|
||||
|
||||
#ifndef MNN_EXPR_SCOPE_HPP_
|
||||
#define MNN_EXPR_SCOPE_HPP_
|
||||
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <mutex>
|
||||
|
||||
#include <MNN/Interpreter.hpp>
|
||||
|
||||
namespace MNN {
|
||||
namespace Express {
|
||||
|
||||
template <typename T>
|
||||
class Scope {
|
||||
public:
|
||||
Scope();
|
||||
virtual ~Scope() = default;
|
||||
|
||||
struct ScopedContent {
|
||||
std::string scope_name;
|
||||
T content;
|
||||
};
|
||||
void EnterScope(const ScopedContent& current);
|
||||
void EnterScope(const T& current);
|
||||
void EnterScope(const std::string& scope_name, const T& current);
|
||||
|
||||
void ExitScope();
|
||||
|
||||
const ScopedContent& Current() const;
|
||||
|
||||
int ScopedLevel() const { return scoped_level_; }
|
||||
|
||||
private:
|
||||
std::string MakeScopeName(const std::string& prefix, int level) const;
|
||||
|
||||
mutable std::mutex mutex_;
|
||||
int scoped_level_ = 0;
|
||||
std::vector<ScopedContent> scoped_contents_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
Scope<T>::Scope() : scoped_level_(0) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Scope<T>::EnterScope(const ScopedContent& current) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
++scoped_level_;
|
||||
scoped_contents_.push_back(current);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Scope<T>::EnterScope(const T& current) {
|
||||
EnterScope("scope", current);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Scope<T>::EnterScope(const std::string& scope_name,
|
||||
const T& current) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
int scoped_level = ScopedLevel();
|
||||
std::string name = MakeScopeName(scope_name, scoped_level++);
|
||||
ScopedContent content{name, current};
|
||||
++scoped_level_;
|
||||
scoped_contents_.push_back(content);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Scope<T>::ExitScope() {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
--scoped_level_;
|
||||
scoped_contents_.resize(scoped_level_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const typename Scope<T>::ScopedContent& Scope<T>::Current() const {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
MNN_CHECK(scoped_contents_.size() > 0, "Scope level should not be 0.");
|
||||
return scoped_contents_.back();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string Scope<T>::MakeScopeName(const std::string& prefix,
|
||||
int level) const {
|
||||
char s[16];
|
||||
snprintf(s, 16, "%d", level);
|
||||
return prefix + "/" + std::string(s);
|
||||
}
|
||||
|
||||
} // namespace Express
|
||||
} // namespace MNN
|
||||
|
||||
#endif // MNN_EXPR_SCOPE_HPP_
|
|
@ -1,12 +1,14 @@
|
|||
# MNN_Windows
|
||||
# |------- MNN_Windows_lib
|
||||
# |---------- Dynamic_Library
|
||||
# |---------- Static_Library
|
||||
# |------- MNN_Windows_tools
|
||||
# MNN
|
||||
# |-- Debug
|
||||
# | |--- MD
|
||||
# | |--- MT
|
||||
# |-- Release
|
||||
# |--- MD
|
||||
# |--- MT
|
||||
|
||||
$erroractionpreference = "stop"
|
||||
|
||||
Set-Variable -Name WINDOWS_PACKAGE_NAME -Value "MNN_Windows"
|
||||
Set-Variable -Name WINDOWS_PACKAGE_NAME -Value "MNN"
|
||||
|
||||
#clear and create package directory
|
||||
powershell ./schema/generate.ps1
|
||||
|
@ -14,32 +16,50 @@ Set-Variable -Name WINDOWS_PACKAGE_PATH -Value "$(pwd)\$WINDOWS_PACKAGE_NAME"
|
|||
Remove-Item $WINDOWS_PACKAGE_PATH -Recurse -ErrorAction Ignore
|
||||
mkdir $WINDOWS_PACKAGE_PATH\
|
||||
cd $WINDOWS_PACKAGE_PATH
|
||||
mkdir -p MNN_Windows_lib\Dynamic_Library
|
||||
mkdir -p MNN_Windows_lib\Static_Library
|
||||
mkdir MNN_Windows_tools
|
||||
mkdir -p Debug\MD
|
||||
mkdir -p Debug\MT
|
||||
mkdir -p Release\MD
|
||||
mkdir -p Release\MT
|
||||
cd ..
|
||||
|
||||
Remove-Item build -Recurse -ErrorAction Ignore
|
||||
mkdir build
|
||||
cd build
|
||||
pushd build
|
||||
# tools without dependency, static library without sep_build
|
||||
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_BUILD_CONVERTER=ON -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_TRAIN=ON -DMNN_BUILD_DEMO=ON -DMNN_BUILD_QUANTOOLS=ON -DMNN_EVALUATION=ON ..
|
||||
#cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_BUILD_CONVERTER=ON -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_TRAIN=ON -DMNN_BUILD_DEMO=ON -DMNN_BUILD_QUANTOOLS=ON -DMNN_EVALUATION=ON ..
|
||||
#ninja
|
||||
#pushd $WINDOWS_PACKAGE_PATH
|
||||
#cp ..\build\*.exe MNN_Windows_tools
|
||||
#cp ..\build\*.pdb MNN_Windows_tools
|
||||
#cp ..\build\MNN.lib MNN_Windows_lib\Static_Library
|
||||
#popd
|
||||
|
||||
Remove-Item CMakeCache.txt -ErrorAction Ignore
|
||||
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Debug -DMNN_WIN_RUNTIME_MT=ON -DMNN_OPENCL=ON ..
|
||||
ninja
|
||||
pushd $WINDOWS_PACKAGE_PATH
|
||||
cp ..\build\*.exe MNN_Windows_tools
|
||||
cp ..\build\*.pdb MNN_Windows_tools
|
||||
cp ..\build\MNN.lib MNN_Windows_lib\Static_Library
|
||||
cp MNN.lib $WINDOWS_PACKAGE_PATH\Debug\MT
|
||||
cp MNN.dll $WINDOWS_PACKAGE_PATH\Debug\MT
|
||||
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Debug\MT
|
||||
|
||||
Remove-Item CMakeCache.txt -ErrorAction Ignore
|
||||
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Debug -DMNN_WIN_RUNTIME_MT=OFF -DMNN_OPENCL=ON ..
|
||||
ninja
|
||||
cp MNN.lib $WINDOWS_PACKAGE_PATH\Debug\MD
|
||||
cp MNN.dll $WINDOWS_PACKAGE_PATH\Debug\MD
|
||||
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Debug\MD
|
||||
|
||||
Remove-Item CMakeCache.txt -ErrorAction Ignore
|
||||
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Release -DMNN_WIN_RUNTIME_MT=ON -DMNN_OPENCL=ON ..
|
||||
ninja
|
||||
cp MNN.lib $WINDOWS_PACKAGE_PATH\Release\MT
|
||||
cp MNN.dll $WINDOWS_PACKAGE_PATH\Release\MT
|
||||
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Release\MT
|
||||
|
||||
Remove-Item CMakeCache.txt -ErrorAction Ignore
|
||||
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Release -DMNN_WIN_RUNTIME_MT=OFF -DMNN_OPENCL=ON ..
|
||||
ninja
|
||||
cp MNN.lib $WINDOWS_PACKAGE_PATH\Release\MD
|
||||
cp MNN.dll $WINDOWS_PACKAGE_PATH\Release\MD
|
||||
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Release\MD
|
||||
|
||||
popd
|
||||
|
||||
#dynamic library without sep_build
|
||||
rm .\CMakeCache.txt
|
||||
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF ..
|
||||
ninja
|
||||
cd $WINDOWS_PACKAGE_PATH
|
||||
cp ..\build\MNN.lib MNN_Windows_lib\Dynamic_Library
|
||||
cp ..\build\MNN.dll MNN_Windows_lib\Dynamic_Library
|
||||
cp ..\build\MNN.pdb MNN_Windows_lib\Dynamic_Library
|
||||
|
||||
# Compress MNN_Windows_lib and MNN_Windows_tools
|
||||
Compress-Archive -Path MNN_Windows_lib -DestinationPath MNN_Windows_lib.zip -Update -CompressionLevel Optimal
|
||||
Compress-Archive -Path MNN_Windows_tools -DestinationPath MNN_Windows_tools.zip -Update -CompressionLevel Optimal
|
|
@ -8,15 +8,14 @@ set_target_properties(
|
|||
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN.so
|
||||
)
|
||||
|
||||
add_library( MNN_Arm82 SHARED IMPORTED GLOBAL)
|
||||
set_target_properties(
|
||||
MNN_Arm82
|
||||
PROPERTIES IMPORTED_LOCATION
|
||||
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_Arm82.so
|
||||
)
|
||||
|
||||
add_library( MNN_CL SHARED IMPORTED GLOBAL )
|
||||
set_target_properties( MNN_CL
|
||||
PROPERTIES IMPORTED_LOCATION
|
||||
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_CL.so
|
||||
)
|
||||
|
||||
add_library( MNN_Express SHARED IMPORTED GLOBAL )
|
||||
set_target_properties( MNN_Express
|
||||
PROPERTIES IMPORTED_LOCATION
|
||||
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_Express.so
|
||||
)
|
||||
|
|
|
@ -5,7 +5,6 @@ adb push ./libMNN_CL.so /data/local/tmp/MNN/libMNN_CL.so
|
|||
adb push ./libMNN_Vulkan.so /data/local/tmp/MNN/libMNN_Vulkan.so
|
||||
adb push ./libMNN_GL.so /data/local/tmp/MNN/libMNN_GL.so
|
||||
adb push ./libMNN_Express.so /data/local/tmp/MNN/libMNN_Express.so
|
||||
adb push ./libMNN_Arm82.so /data/local/tmp/MNN/libMNN_Arm82.so
|
||||
adb push ./MNNV2Basic.out /data/local/tmp/MNN/MNNV2Basic.out
|
||||
adb shell "cd /data/local/tmp/MNN && rm -r output"
|
||||
adb shell "cd /data/local/tmp/MNN && mkdir output"
|
||||
|
@ -18,3 +17,4 @@ adb push ./timeProfile.out /data/local/tmp/MNN/timeProfile.out
|
|||
adb push ./train.out /data/local/tmp/MNN/train.out
|
||||
adb push ./benchmark.out /data/local/tmp/MNN/benchmark.out
|
||||
adb push ./benchmarkExprModels.out /data/local/tmp/MNN/benchmarkExprModels.out
|
||||
adb push ./run_test.out /data/local/tmp/MNN/run_test.out
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -4,6 +4,8 @@
|
|||
<dict>
|
||||
<key>CFBundleDevelopmentRegion</key>
|
||||
<string>$(DEVELOPMENT_LANGUAGE)</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleName</key>
|
||||
|
|
|
@ -1,57 +0,0 @@
|
|||
#!bin/sh
|
||||
|
||||
echo "Register Op Begin"
|
||||
|
||||
function read_dir(){
|
||||
str1=`grep -e $2 $1/*.$4|sed s/[[:space:]]//g`
|
||||
array=(${str1//\;/ })
|
||||
for var in ${array[@]}; do
|
||||
`echo $var|awk -F $3 '{
|
||||
a="___";
|
||||
b="__();";
|
||||
c="extern void ";
|
||||
print(c""a""$3"__"$4""b) >> "extern";
|
||||
print (a""$3"__"$4""b) >> "call"
|
||||
}'`
|
||||
done
|
||||
}
|
||||
|
||||
start=$(date +%s)
|
||||
|
||||
SEP='[:(,)]'
|
||||
FILE_EXTERN_CPP='cpp'
|
||||
FILE_EXTERN_MM='mm'
|
||||
|
||||
SHELL_FOLDER=$(dirname $0)'/../../..'
|
||||
# handle CPU
|
||||
CPUFILE=$SHELL_FOLDER/source/backend/cpu/CPUOPRegister.cpp
|
||||
echo "// This file is generated by Shell for ops register\nnamespace MNN {\n#ifdef MNN_CODEGEN_REGISTER" > $CPUFILE
|
||||
echo "Start Register CPU"
|
||||
CPU=$SHELL_FOLDER/source/backend/cpu
|
||||
CPU_KEY='REGISTER_CPU_OP_CREATOR'
|
||||
read_dir $CPU $CPU_KEY $SEP $FILE_EXTERN_CPP
|
||||
cat extern >> $CPUFILE
|
||||
rm extern
|
||||
echo '\nvoid registerCPUOps() {' >> $CPUFILE
|
||||
cat call >> $CPUFILE
|
||||
echo '}\n#endif\n}' >> $CPUFILE
|
||||
rm call
|
||||
|
||||
# handle Shape
|
||||
echo "Start Register Shape"
|
||||
SHAPEFILE=$SHELL_FOLDER/source/shape/ShapeRegister.cpp
|
||||
SHAPE=$SHELL_FOLDER/source/shape
|
||||
SHAPE_KEY="REGISTER_SHAPE"
|
||||
echo "// This file is generated by Shell for ops register\nnamespace MNN {\n#ifdef MNN_CODEGEN_REGISTER" > $SHAPEFILE
|
||||
read_dir $SHAPE $SHAPE_KEY $SEP $FILE_EXTERN_CPP
|
||||
cat extern >> $SHAPEFILE
|
||||
rm extern
|
||||
echo '\nvoid registerShapeOps() {' >> $SHAPEFILE
|
||||
cat call >> $SHAPEFILE
|
||||
echo '}\n#endif\n}' >> $SHAPEFILE
|
||||
rm call
|
||||
|
||||
echo "Register Op End"
|
||||
|
||||
dur=$(echo "$(date +%s) - $start" | bc)
|
||||
printf "Execution time: %.6f seconds" $dur
|
|
@ -8,10 +8,14 @@
|
|||
|
||||
#import "AppDelegate.h"
|
||||
#import "MNNTestSuite.h"
|
||||
#import <MNN/expr/Executor.hpp>
|
||||
|
||||
@implementation AppDelegate
|
||||
|
||||
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
|
||||
MNN::BackendConfig config;
|
||||
// If want to test metal, change MNN_FORWARD_CPU to MNN_FORWARD_METAL
|
||||
MNN::Express::Executor::getGlobalExecutor()->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 1);
|
||||
MNNTestSuite::runAll();
|
||||
return YES;
|
||||
}
|
||||
|
|
|
@ -8,6 +8,9 @@ import cv2
|
|||
def inference():
|
||||
""" inference mobilenet_v1 using a specific picture """
|
||||
interpreter = MNN.Interpreter("mobilenet_v1.mnn")
|
||||
interpreter.setCacheFile('.tempcache')
|
||||
config = {}
|
||||
config['precision'] = 'low'
|
||||
session = interpreter.createSession()
|
||||
input_tensor = interpreter.getSessionInput(session)
|
||||
image = cv2.imread('ILSVRC2012_val_00049999.JPEG')
|
||||
|
|
|
@ -96,8 +96,7 @@ def demo():
|
|||
train_dataloader = MNN.data.DataLoader(train_dataset, batch_size = 64, shuffle = True)
|
||||
test_dataloader = MNN.data.DataLoader(test_dataset, batch_size = 100, shuffle = False)
|
||||
|
||||
opt = MNN.optim.SGD(0.01, 0.9, 0.0005)
|
||||
opt.append(model.parameters)
|
||||
opt = MNN.optim.SGD(model, 0.01, 0.9, 0.0005)
|
||||
|
||||
F.set_thread_number(4)
|
||||
|
||||
|
|
|
@ -125,8 +125,7 @@ def demo():
|
|||
|
||||
net = Net(feature_extractor, num_classes)
|
||||
|
||||
opt = MNN.optim.SGD(1e-3, 0.9, 0.00004)
|
||||
opt.append(net.parameters)
|
||||
opt = MNN.optim.SGD(net, 1e-3, 0.9, 0.00004)
|
||||
|
||||
for epoch in range(10):
|
||||
train_func(net, train_dataloader, opt, num_classes)
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
import numpy as np
|
||||
import MNN
|
||||
nn = MNN.nn
|
||||
F = MNN.expr
|
||||
|
||||
v0 = F.const([0.3,0.1, -0.3,0.4], [4])
|
||||
v2 = F.const([0.3,0.1, -0.3,0.4], [4])
|
||||
v1 = v0 * v0
|
||||
|
||||
outputDiff = F.const([0.05, 0.03, 0.02, 0.01], [4])
|
||||
|
||||
v0Grad = nn.grad(v1, [v0, v2], [outputDiff], "")
|
||||
print(v0Grad)
|
||||
print(v0Grad[0].read())
|
||||
F.save(v0Grad, "temp.grad")
|
|
@ -0,0 +1,36 @@
|
|||
import numpy as np
|
||||
import MNN
|
||||
nn = MNN.nn
|
||||
F = MNN.expr
|
||||
|
||||
class Net(nn.Module):
|
||||
"""construct a lenet 5 model"""
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.conv(1, 20, [5, 5])
|
||||
self.conv2 = nn.conv(20, 50, [5, 5])
|
||||
self.fc1 = nn.linear(800, 500)
|
||||
self.fc2 = nn.linear(500, 10)
|
||||
self.step = F.const([10], [], F.NCHW, F.int)
|
||||
self.lr = F.const([0.0004],[], F.NCHW, F.float)
|
||||
def forward(self, x):
|
||||
x = F.relu(self.conv1(x))
|
||||
x = F.max_pool(x, [2, 2], [2, 2])
|
||||
x = F.relu(self.conv2(x))
|
||||
x = F.max_pool(x, [2, 2], [2, 2])
|
||||
x = F.reshape(x, [0, -1])
|
||||
x = F.relu(self.fc1(x))
|
||||
x = self.fc2(x)
|
||||
x = F.softmax(x, 1)
|
||||
return x
|
||||
|
||||
|
||||
model = Net()
|
||||
F.save(model.parameters, 'mnist.snapshot')
|
||||
|
||||
|
||||
model2 = Net()
|
||||
model2.load_parameters(F.load_as_list('mnist.snapshot'))
|
||||
|
||||
print(model2.lr.read())
|
||||
print(model2.step.read())
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue