Github release 1.1.0

This commit is contained in:
Hui Shu 2020-11-05 16:41:56 +08:00
parent 939a80dba8
commit d6795ad031
1296 changed files with 98954 additions and 55065 deletions

8
.gitignore vendored
View File

@ -330,7 +330,6 @@ project/android/.idea/caches/build_file_checksums.ser
# FIXME(haijing): Xcode pre-build stage breaks compilation of flatbuffers by setting envs that do cmake cross-compilation for iOS # FIXME(haijing): Xcode pre-build stage breaks compilation of flatbuffers by setting envs that do cmake cross-compilation for iOS
# schema/current # schema/current
schema/private schema/private
schema/current
tools/converter/source/IR tools/converter/source/IR
benchmark/benchmark.txt benchmark/benchmark.txt
@ -345,18 +344,13 @@ pymnn/android/.idea/modules.xml
pymnn/android/.idea/runConfigurations.xml pymnn/android/.idea/runConfigurations.xml
pymnn/android/.idea/vcs.xml pymnn/android/.idea/vcs.xml
pymnn/android/.idea/caches/build_file_checksums.ser pymnn/android/.idea/caches/build_file_checksums.ser
pymnn/src/pybind_private/
buildios buildios
build*/ build*/
include/MNN/VCS.h include/MNN/VCS.h
source/backend/opencl/execution/cl/codegen/opencl_program.cc
source/backend/opencl/execution/cl/opencl_program.cc
# FIXME(haijing): MTL issues.....
# source/backend/metal/MetalOPRegister.mm
source/backend/opengl/AllShader.cpp source/backend/opengl/AllShader.cpp
include/MNN/backend/opengl/shaders/AllShader.h include/MNN/backend/opengl/shaders/AllShader.h
source/backend/vulkan/compiler/AllShader.cpp
include/MNN/backend/vulkan/shaders/AllShader.h
.idea .idea
project/ios/ios_64 project/ios/ios_64
project/ios/ios_32 project/ios/ios_32

View File

@ -49,6 +49,7 @@ include(FindPythonInterp REQUIRED)
option(MNN_USE_SYSTEM_LIB "For opencl and vulkan, use system lib or use dlopen" OFF) option(MNN_USE_SYSTEM_LIB "For opencl and vulkan, use system lib or use dlopen" OFF)
option(MNN_BUILD_HARD "Build -mfloat-abi=hard or not" OFF) option(MNN_BUILD_HARD "Build -mfloat-abi=hard or not" OFF)
option(MNN_BUILD_SHARED_LIBS "MNN build shared or static lib" ON) option(MNN_BUILD_SHARED_LIBS "MNN build shared or static lib" ON)
option(MNN_WIN_RUNTIME_MT "MNN use /MT on Windows dll" OFF)
option(MNN_FORBID_MULTI_THREAD "Disable Multi Thread" OFF) option(MNN_FORBID_MULTI_THREAD "Disable Multi Thread" OFF)
option(MNN_OPENMP "Use OpenMP's thread pool implementation. Does not work on iOS or Mac OS" OFF) option(MNN_OPENMP "Use OpenMP's thread pool implementation. Does not work on iOS or Mac OS" OFF)
option(MNN_USE_THREAD_POOL "Use MNN's own thread pool implementation" ON) option(MNN_USE_THREAD_POOL "Use MNN's own thread pool implementation" ON)
@ -62,14 +63,14 @@ option(MNN_SUPPORT_TFLITE_QUAN "Enable MNN's tflite quantized op" ON)
option(MNN_DEBUG_MEMORY "MNN Debug Memory Access" OFF) option(MNN_DEBUG_MEMORY "MNN Debug Memory Access" OFF)
option(MNN_DEBUG_TENSOR_SIZE "Enable Tensor Size" OFF) option(MNN_DEBUG_TENSOR_SIZE "Enable Tensor Size" OFF)
option(MNN_GPU_TRACE "Enable MNN Gpu Debug" OFF) option(MNN_GPU_TRACE "Enable MNN Gpu Debug" OFF)
option(MNN_OPENCL_LWS_TUNE "Enable MNN OpenCL Lws Tuning" ON)
option(MNN_PORTABLE_BUILD "Link the static version of third party libraries where possible to improve the portability of built executables" OFF) option(MNN_PORTABLE_BUILD "Link the static version of third party libraries where possible to improve the portability of built executables" OFF)
option(MNN_SEP_BUILD "Build MNN Backends and expression seperately. Only works with MNN_BUILD_SHARED_LIBS=ON" ON) option(MNN_SEP_BUILD "Build MNN Backends and expression seperately. Only works with MNN_BUILD_SHARED_LIBS=ON" ON)
option(NATIVE_LIBRARY_OUTPUT "Native Library Path" OFF) option(NATIVE_LIBRARY_OUTPUT "Native Library Path" OFF)
option(NATIVE_INCLUDE_OUTPUT "Native Include Path" OFF) option(NATIVE_INCLUDE_OUTPUT "Native Include Path" OFF)
option(MNN_AAPL_FMWK "Build MNN.framework instead of traditional .a/.dylib" OFF) option(MNN_AAPL_FMWK "Build MNN.framework instead of traditional .a/.dylib" OFF)
option(MNN_FMA_ENABLE "x86 routine use fma extension" OFF)
option(MNN_WITH_PLUGIN "Build with plugin op support." OFF) option(MNN_WITH_PLUGIN "Build with plugin op support." OFF)
option(MNN_BUILD_MINI "Build MNN-MINI that just supports fixed shape models." OFF)
option(MNN_USE_SSE "Use SSE optimization for x86 if possiable" ON)
IF(NOT MNN_BUILD_SHARED_LIBS) IF(NOT MNN_BUILD_SHARED_LIBS)
message(WARNING "Close MNN_SEP_BUILD for static library") message(WARNING "Close MNN_SEP_BUILD for static library")
@ -79,27 +80,29 @@ IF(APPLE AND MNN_AAPL_FMWK AND MNN_SEP_BUILD)
message(WARNING "MNN_SEP_BUILD AND MNN_AAPL_FMWK can't coexist. Turning off MNN_SEP_BUILD") message(WARNING "MNN_SEP_BUILD AND MNN_AAPL_FMWK can't coexist. Turning off MNN_SEP_BUILD")
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE) SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
ENDIF() ENDIF()
IF(MSVC OR WIN32) IF(WIN32)
IF(MNN_SEP_BUILD) IF(MNN_SEP_BUILD)
message(WARNING "MNN_SEP_BUILD IS TROUBLESOME ON Windows. Forcing OFF...") message(WARNING "MNN_SEP_BUILD IS TROUBLESOME ON Windows. Forcing OFF...")
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE) SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
ENDIF() ENDIF()
SET(MNN_USE_SYSTEM_LIB ON CACHE BOOL "<docstring>" FORCE) add_definitions(-D_CRT_SECURE_NO_WARNINGS)
# generate optimized (release) exe and library with pdb debug file, https://stackoverflow.com/a/31264946 IF(MSVC)
SET(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") # generate optimized (release) exe and library with pdb debug file, https://stackoverflow.com/a/31264946
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") SET(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
SET(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi") SET(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi") SET(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
ENDIF()
ENDIF() ENDIF()
include(${CMAKE_CURRENT_LIST_DIR}/cmake/macros.cmake) include(${CMAKE_CURRENT_LIST_DIR}/cmake/macros.cmake)
IF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT MNN_BUILD_SHARED_LIBS AND NOT (MSVC OR WIN32)) IF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT MNN_BUILD_SHARED_LIBS AND NOT (MSVC OR WIN32))
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE) SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
IF(MNN_BUILD_CONVERTER) IF(MNN_BUILD_CONVERTER)
SET(MNN_PORTABLE_BUILD ON CACHE BOOL "<docstring>" FORCE) SET(MNN_PORTABLE_BUILD ON CACHE BOOL "<docstring>" FORCE)
@ -117,6 +120,9 @@ endif()
if(MNN_SUPPORT_TFLITE_QUAN) if(MNN_SUPPORT_TFLITE_QUAN)
add_definitions(-DMNN_SUPPORT_TFLITE_QUAN) add_definitions(-DMNN_SUPPORT_TFLITE_QUAN)
endif() endif()
if(MNN_BUILD_MINI)
add_definitions(-DMNN_BUILD_MINI)
endif()
# debug options # debug options
if(MNN_DEBUG_MEMORY) if(MNN_DEBUG_MEMORY)
@ -128,9 +134,6 @@ endif()
if(MNN_GPU_TRACE) if(MNN_GPU_TRACE)
add_definitions(-DMNN_GPU_FORCE_FINISH) add_definitions(-DMNN_GPU_FORCE_FINISH)
endif() endif()
if(MNN_OPENCL_LWS_TUNE)
add_definitions(-DMNN_OPENCL_LWS_TUNE)
endif()
# backend options # backend options
option(MNN_METAL "Enable Metal" OFF) option(MNN_METAL "Enable Metal" OFF)
@ -138,11 +141,8 @@ option(MNN_OPENCL "Enable OpenCL" OFF)
option(MNN_OPENGL "Enable OpenGL" OFF) option(MNN_OPENGL "Enable OpenGL" OFF)
option(MNN_VULKAN "Enable Vulkan" OFF) option(MNN_VULKAN "Enable Vulkan" OFF)
option(MNN_ARM82 "Enable ARM82" OFF) option(MNN_ARM82 "Enable ARM82" OFF)
option(MNN_CUDA "Enable CUDA" OFF)
# codegen register ops option(MNN_TENSORRT "Enable TensorRT" OFF)
if (MNN_METAL)
add_definitions(-DMNN_CODEGEN_REGISTER)
endif()
# target options # target options
option(MNN_BUILD_BENCHMARK "Build benchmark or not" OFF) option(MNN_BUILD_BENCHMARK "Build benchmark or not" OFF)
@ -165,11 +165,13 @@ message(STATUS "\tOpenCL: ${MNN_OPENCL}")
message(STATUS "\tOpenGL: ${MNN_OPENGL}") message(STATUS "\tOpenGL: ${MNN_OPENGL}")
message(STATUS "\tVulkan: ${MNN_VULKAN}") message(STATUS "\tVulkan: ${MNN_VULKAN}")
message(STATUS "\tARM82: ${MNN_ARM82}") message(STATUS "\tARM82: ${MNN_ARM82}")
message(STATUS "\tTensorRT: ${MNN_TENSORRT}")
message(STATUS "\tCUDA: ${MNN_CUDA}")
message(STATUS "\tOpenMP: ${MNN_OPENMP}") message(STATUS "\tOpenMP: ${MNN_OPENMP}")
message(STATUS "\tHidden: ${MNN_HIDDEN}") message(STATUS "\tHidden: ${MNN_HIDDEN}")
message(STATUS "\tBuild Path: ${CMAKE_CURRENT_BINARY_DIR}") message(STATUS "\tBuild Path: ${CMAKE_CURRENT_BINARY_DIR}")
if(WIN32) if(MSVC)
if(${CMAKE_VERSION} VERSION_LESS "3.14.0") if(${CMAKE_VERSION} VERSION_LESS "3.14.0")
message(FATAL_ERROR "MNN requires CMake 3.14+ to build on Windows!") message(FATAL_ERROR "MNN requires CMake 3.14+ to build on Windows!")
endif() endif()
@ -178,14 +180,14 @@ if(WIN32)
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
if (MNN_BUILD_SHARED_LIBS) if (MNN_WIN_RUNTIME_MT)
if(${flag_var} MATCHES "/MT")
string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
endif()
else ()
if(${flag_var} MATCHES "/MD") if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif() endif()
else ()
if(${flag_var} MATCHES "/MT")
string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
endif()
endif () endif ()
endforeach() endforeach()
elseif(CMAKE_SYSTEM_NAME MATCHES "^Android" OR CMAKE_SYSTEM_NAME MATCHES "^Linux") elseif(CMAKE_SYSTEM_NAME MATCHES "^Android" OR CMAKE_SYSTEM_NAME MATCHES "^Linux")
@ -270,6 +272,8 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "^Linux")
endif() endif()
include_directories(${CMAKE_CURRENT_LIST_DIR}/include/ include_directories(${CMAKE_CURRENT_LIST_DIR}/include/
${CMAKE_CURRENT_LIST_DIR}/source/ ${CMAKE_CURRENT_LIST_DIR}/source/
${CMAKE_CURRENT_LIST_DIR}/express/
${CMAKE_CURRENT_LIST_DIR}/tools/
${CMAKE_CURRENT_LIST_DIR}/schema/current/ ${CMAKE_CURRENT_LIST_DIR}/schema/current/
${CMAKE_CURRENT_LIST_DIR}/3rd_party/ ${CMAKE_CURRENT_LIST_DIR}/3rd_party/
${CMAKE_CURRENT_LIST_DIR}/3rd_party/flatbuffers/include ${CMAKE_CURRENT_LIST_DIR}/3rd_party/flatbuffers/include
@ -293,12 +297,12 @@ FILE(GLOB MNN_CV_SRC ${CMAKE_CURRENT_LIST_DIR}/source/cv/*)
add_library(MNNCV OBJECT ${MNN_CV_SRC}) add_library(MNNCV OBJECT ${MNN_CV_SRC})
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCV>) list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCV>)
list(APPEND MNN_TARGETS MNNCV) list(APPEND MNN_TARGETS MNNCV)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(X86_64)|(x64)|(X64)|(amd64)|(AMD64)|(i686)") if (MNN_USE_SSE)
if(WIN32 OR MSVC) if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(X86_64)|(x64)|(X64)|(amd64)|(AMD64)|(i686)")
target_compile_options(MNNCV PRIVATE /arch:AVX) if (NOT MSVC)
else() target_compile_options(MNNCV PRIVATE -msse3)
target_compile_options(MNNCV PRIVATE -msse3) target_compile_options(MNNCV PRIVATE -mavx)
target_compile_options(MNNCV PRIVATE -mavx) endif()
endif() endif()
endif() endif()
@ -308,11 +312,19 @@ add_library(MNNMath OBJECT ${MNN_Math_SRC})
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMath>) list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMath>)
list(APPEND MNN_TARGETS MNNMath) list(APPEND MNN_TARGETS MNNMath)
# Shape # Transform
FILE(GLOB MNN_Shape_SRC ${CMAKE_CURRENT_LIST_DIR}/source/shape/*) FILE(GLOB MNN_Transform_SRC ${CMAKE_CURRENT_LIST_DIR}/source/shape/* ${CMAKE_CURRENT_LIST_DIR}/source/geometry/*)
add_library(MNNShape OBJECT ${MNN_Shape_SRC}) add_library(MNNTransform OBJECT ${MNN_Transform_SRC})
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNShape>) IF (NOT MNN_BUILD_MINI)
list(APPEND MNN_TARGETS MNNShape) list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNTransform>)
ENDIF()
list(APPEND MNN_TARGETS MNNTransform)
# Utils
FILE(GLOB MNN_Utils_SRC ${CMAKE_CURRENT_LIST_DIR}/source/utils/*)
add_library(MNNUtils OBJECT ${MNN_Utils_SRC})
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNUtils>)
list(APPEND MNN_TARGETS MNNUtils)
# Compute # Compute
FILE(GLOB MNN_Compute_SRC ${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/compute/*) FILE(GLOB MNN_Compute_SRC ${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/compute/*)
@ -327,7 +339,9 @@ list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCPU>)
list(APPEND MNN_TARGETS MNNCPU) list(APPEND MNN_TARGETS MNNCPU)
# X86_64 AVX/SSE # X86_64 AVX/SSE
if (MNN_USE_SSE)
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/x86_x64/CMakeLists.txt) include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/x86_x64/CMakeLists.txt)
endif()
# AArch32/64 Assemblies # AArch32/64 Assemblies
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/arm/CMakeLists.txt) include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/arm/CMakeLists.txt)
@ -377,7 +391,7 @@ if (NOT APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
if (WIN32) if (MSVC)
set(OpenMP_C_FLAGS "/openmp ${OpenMP_C_FLAGS}") set(OpenMP_C_FLAGS "/openmp ${OpenMP_C_FLAGS}")
set(OpenMP_CXX_FLAGS "/openmp ${OpenMP_CXX_FLAGS}") set(OpenMP_CXX_FLAGS "/openmp ${OpenMP_CXX_FLAGS}")
endif() endif()
@ -387,20 +401,22 @@ endif()
set(CMAKE_CXX_FLAGS_ORIGIN ${CMAKE_CXX_FLAGS}) set(CMAKE_CXX_FLAGS_ORIGIN ${CMAKE_CXX_FLAGS})
set(CMAKE_C_FLAGS_ORIGIN ${CMAKE_C_FLAGS}) set(CMAKE_C_FLAGS_ORIGIN ${CMAKE_C_FLAGS})
if ((NOT (MSVC OR WIN32)) AND MNN_HIDDEN) if ((NOT MSVC) AND MNN_HIDDEN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden -fvisibility=hidden") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden -fvisibility=hidden")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
if (NOT APPLE) # Omit frame pointer may cause difficult debug
if ((NOT APPLE) AND (NOT WIN32))
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer")
endif() endif()
endif() endif()
if (NOT (MSVC OR WIN32)) if (NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions ") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions ")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
endif() endif()
# Metal # Metal
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/metal/CMakeLists.txt) set(MNN_DEPS "")
set(MNN_EXTRA_DEPENDS "")
list(APPEND MNN_DEPS MNN) list(APPEND MNN_DEPS MNN)
# Plugin # Plugin
@ -409,6 +425,14 @@ if(MNN_WITH_PLUGIN)
include(${CMAKE_CURRENT_LIST_DIR}/source/plugin/CMakeLists.txt) include(${CMAKE_CURRENT_LIST_DIR}/source/plugin/CMakeLists.txt)
endif() endif()
# Metal
if(MNN_METAL AND APPLE)
add_definitions(-DMNN_METAL_ENABLED=1)
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/metal/CMakeLists.txt)
list(APPEND MNN_TARGETS MNNMetal)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMetal>)
endif()
# Vulkan # Vulkan
IF(MNN_VULKAN) IF(MNN_VULKAN)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/vulkan/) add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/vulkan/)
@ -446,22 +470,34 @@ IF(MNN_OPENGL)
ENDIF() ENDIF()
ENDIF() ENDIF()
# CUDA
IF(MNN_CUDA)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/cuda/)
list(APPEND MNN_TARGETS MNN_CUDA)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_CUDA>)
list(APPEND MNN_EXTRA_DEPENDS ${MNN_CUDA_LIBS})
ENDIF()
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR IOS_ARCH STREQUAL "arm64") IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR IOS_ARCH STREQUAL "arm64")
# ARM82 Assemblies # ARM82 Assemblies
IF(MNN_ARM82) IF(MNN_ARM82)
add_definitions(-DENABLE_ARMV82) add_definitions(-DENABLE_ARMV82)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/arm82/) add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/arm82/)
IF(MNN_SEP_BUILD) list(APPEND MNN_TARGETS MNN_Arm82)
list(APPEND MNN_DEPS MNN_Arm82) list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_Arm82>)
ELSE()
list(APPEND MNN_TARGETS MNN_Arm82)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_Arm82>)
ENDIF()
ENDIF() ENDIF()
ENDIF() ENDIF()
# Express # Express
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/express/) add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/express/)
# TensorRT
IF(MNN_TENSORRT)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/tensorrt/)
list(APPEND MNN_TARGETS MNN_TRT)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_TRT>)
list(APPEND MNN_EXTRA_DEPENDS ${MNN_TRT_LIBS})
ENDIF()
IF(MNN_SEP_BUILD) IF(MNN_SEP_BUILD)
add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS}) add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS})
target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS}) target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS})
@ -471,7 +507,7 @@ ELSE()
list(APPEND MNN_TARGETS MNNExpress) list(APPEND MNN_TARGETS MNNExpress)
IF(MNN_BUILD_SHARED_LIBS) IF(MNN_BUILD_SHARED_LIBS)
add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS}) add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS})
if (MSVC OR WIN32) if (WIN32)
foreach(TARGET ${MNN_TARGETS}) foreach(TARGET ${MNN_TARGETS})
target_compile_definitions(${TARGET} PRIVATE "-DBUILDING_MNN_DLL") target_compile_definitions(${TARGET} PRIVATE "-DBUILDING_MNN_DLL")
target_compile_definitions(${TARGET} INTERFACE "-DUSING_MNN_DLL") target_compile_definitions(${TARGET} INTERFACE "-DUSING_MNN_DLL")
@ -484,7 +520,7 @@ ELSE()
ENDIF() ENDIF()
target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS}) target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS})
ENDIF() ENDIF()
if (MSVC OR WIN32) if (MSVC)
target_link_options(MNN PRIVATE "/IGNORE:4049,4217") target_link_options(MNN PRIVATE "/IGNORE:4049,4217")
endif() endif()
@ -504,9 +540,11 @@ if(APPLE)
target_link_libraries(MNN PUBLIC ${FOUNDATION}) target_link_libraries(MNN PUBLIC ${FOUNDATION})
find_library(METAL Metal REQUIRED) find_library(METAL Metal REQUIRED)
target_link_libraries(MNN PUBLIC ${METAL}) target_link_libraries(MNN PUBLIC ${METAL})
find_library(GRAPHIC CoreGraphics)
target_link_libraries(MNN PUBLIC ${GRAPHIC})
ENDIF() ENDIF()
endif() endif()
add_dependencies(MNN MNNCore MNNCV MNNShape MNNMath MNNCompute MNNCPU GenVCSHDR) add_dependencies(MNN MNNCore MNNCV MNNTransform MNNMath MNNCompute MNNCPU GenVCSHDR)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/tools/converter) add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/tools/converter)
if(CMAKE_SYSTEM_NAME MATCHES "^Linux") if(CMAKE_SYSTEM_NAME MATCHES "^Linux")
@ -532,12 +570,6 @@ if (NOT MNN_BUILD_SHARED_LIBS)
endif() endif()
endif() endif()
list(APPEND MNN_TARGETS MNN) list(APPEND MNN_TARGETS MNN)
FOREACH(TARGET ${MNN_TARGETS})
IF((NOT MSVC) AND (NOT WIN32))
else()
target_compile_definitions(${TARGET} PRIVATE _CRT_SECURE_NO_WARNINGS)
endif()
ENDFOREACH()
list(REMOVE_ITEM MNN_TARGETS MNN) list(REMOVE_ITEM MNN_TARGETS MNN)
IF(MNN_BUILD_DEMO) IF(MNN_BUILD_DEMO)
include(${CMAKE_CURRENT_LIST_DIR}/demo/exec/CMakeLists.txt) include(${CMAKE_CURRENT_LIST_DIR}/demo/exec/CMakeLists.txt)

View File

@ -46,6 +46,7 @@ Pod::Spec.new do |s|
'schema/current/*.{h}',\ 'schema/current/*.{h}',\
'3rd_party/flatbuffers/include/flatbuffers/*.{h}',\ '3rd_party/flatbuffers/include/flatbuffers/*.{h}',\
'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\ 'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/geometry/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\ 'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\ 'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
'source/shape/*.{h,c,m,mm,cc,hpp,cpp}',\ 'source/shape/*.{h,c,m,mm,cc,hpp,cpp}',\
@ -58,4 +59,4 @@ Pod::Spec.new do |s|
s.pod_target_xcconfig = {'METAL_LIBRARY_FILE_BASE' => 'mnn', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include" "$(PODS_TARGET_SRCROOT)/3rd_party/flatbuffers/include" "$(PODS_TARGET_SRCROOT)/source" "$(PODS_TARGET_SRCROOT)/3rd_party/half"', 'GCC_PREPROCESSOR_DEFINITIONS' => '$(inherited) MNN_CODEGEN_REGISTER=1 MNN_SUPPORT_TFLITE_QUAN=1'} s.pod_target_xcconfig = {'METAL_LIBRARY_FILE_BASE' => 'mnn', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include" "$(PODS_TARGET_SRCROOT)/3rd_party/flatbuffers/include" "$(PODS_TARGET_SRCROOT)/source" "$(PODS_TARGET_SRCROOT)/3rd_party/half"', 'GCC_PREPROCESSOR_DEFINITIONS' => '$(inherited) MNN_CODEGEN_REGISTER=1 MNN_SUPPORT_TFLITE_QUAN=1'}
s.user_target_xcconfig = { 'OTHER_LDFLAGS' => '-force_load $(BUILD_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)/MNN/libMNN.a', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include"' } s.user_target_xcconfig = { 'OTHER_LDFLAGS' => '-force_load $(BUILD_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)/MNN/libMNN.a', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include"' }
end end

View File

@ -66,7 +66,7 @@ Interpreter由Engine和Backends构成。前者负责模型的加载、计算图
三群: 三群:
<img src="doc/DingTalkQR3.png" height="256"/> <img src="doc/DingTalkQR23.png" height="256"/>
## License ## License
Apache 2.0 Apache 2.0

View File

@ -0,0 +1,89 @@
//
// CPUBatchMatMul.cpp
// MNN
//
// Created by MNN on 2019/03/25.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "backend/cpu/CPUBatchMatMul.hpp"
#include "backend/cpu/CPUBackend.hpp"
#include "math/Matrix.hpp"
namespace MNN {
CPUBatchMatMul::CPUBatchMatMul(Backend* backend, bool adjX, bool adjY) : Execution(backend) {
mMatMul.reset(new CPUMatMul(backend, adjX, adjY, true));
}
ErrorCode CPUBatchMatMul::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
auto input0 = inputs[0];
auto input1 = inputs[1];
auto output = outputs[0];
// Fill output by zero if one of inputs is empty.
if (input0->elementSize() == 0 || input1->elementSize() == 0) {
return NO_ERROR;
}
auto dimensions = input0->dimensions();
mMatrixA.reset(Tensor::createDevice<float>({input0->length(input0->dimensions()-2), input0->length(input0->dimensions()-1)}));
mMatrixB.reset(Tensor::createDevice<float>({input1->length(input1->dimensions()-2), input1->length(input0->dimensions()-1)}));
mMatrixC.reset(Tensor::createDevice<float>({output->length(output->dimensions()-2), output->length(output->dimensions()-1)}));
mTempInputs = {mMatrixA.get(), mMatrixB.get()};
mTempOutputs = {mMatrixC.get()};
auto res = backend()->onAcquireBuffer(mMatrixA.get(), Backend::DYNAMIC);
res = res && backend()->onAcquireBuffer(mMatrixB.get(), Backend::DYNAMIC);
res = res && backend()->onAcquireBuffer(mMatrixC.get(), Backend::DYNAMIC);
if (!res) {
return OUT_OF_MEMORY;
}
int batch = 1;
for (int i = 0; i < dimensions - 2; ++i) {
batch *= input0->length(i);
}
mBatch = batch;
auto code = mMatMul->onResize(mTempInputs, mTempOutputs);
backend()->onReleaseBuffer(mMatrixA.get(), Backend::DYNAMIC);
backend()->onReleaseBuffer(mMatrixB.get(), Backend::DYNAMIC);
backend()->onReleaseBuffer(mMatrixC.get(), Backend::DYNAMIC);
return code;
}
ErrorCode CPUBatchMatMul::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
auto input0 = inputs[0];
auto input1 = inputs[1];
auto output = outputs[0];
// Fill output by zero if one of inputs is empty.
if (input0->elementSize() == 0 || input1->elementSize() == 0) {
::memset(output->host<float>(), 0, output->size());
return NO_ERROR;
}
const int dimensions = input0->dimensions();
MNN_ASSERT(dimensions >= 3);
const int input0Stride = input0->length(dimensions - 1) * input0->length(dimensions - 2);
const int input1Stride = input1->length(dimensions - 1) * input1->length(dimensions - 2);
const int outputStride = output->length(dimensions - 1) * output->length(dimensions - 2);
const auto input0Ptr = input0->host<float>();
const auto input1Ptr = input1->host<float>();
float* const outputPtr = output->host<float>();
for (int i = 0; i < mBatch; ++i) {
::memcpy(mMatrixA->host<float>(), input0Ptr + i * input0Stride, input0Stride * sizeof(float));
::memcpy(mMatrixB->host<float>(), input1Ptr + i * input1Stride, input1Stride * sizeof(float));
mMatMul->onExecute(mTempInputs, mTempOutputs);
::memcpy(outputPtr + i * outputStride, mMatrixC->host<float>(), outputStride * sizeof(float));
}
return NO_ERROR;
}
class CPUBatchMatMulCreator : public CPUBackend::Creator {
public:
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
const MNN::Op* op, Backend* backend) const override {
return new CPUBatchMatMul(backend, op->main_as_BatchMatMulParam()->adjX(), op->main_as_BatchMatMulParam()->adjY());
}
};
REGISTER_CPU_OP_CREATOR(CPUBatchMatMulCreator, OpType_BatchMatMul);
} // namespace MNN

View File

@ -0,0 +1,35 @@
//
// CPUBatchMatMul.hpp
// MNN
//
// Created by MNN on 2019/03/25.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef CPUBatchMatMul_hpp
#define CPUBatchMatMul_hpp
#include "backend/cpu/CPUMatMul.hpp"
namespace MNN {
class CPUBatchMatMul : public Execution {
public:
CPUBatchMatMul(Backend *backend, bool adjX, bool adjY);
virtual ~CPUBatchMatMul() = default;
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
private:
int mBatch;
std::shared_ptr<Execution> mMatMul;
std::vector<Tensor*> mTempInputs;
std::vector<Tensor*> mTempOutputs;
std::shared_ptr<Tensor> mMatrixA;
std::shared_ptr<Tensor> mMatrixB;
std::shared_ptr<Tensor> mMatrixC;
};
} // namespace MNN
#endif /* CPUBatchMatMul_hpp */

View File

@ -18,7 +18,6 @@
#include "backend/cpu/compute/ConvOpt.h" #include "backend/cpu/compute/ConvOpt.h"
#include "backend/cpu/CPUBackend.hpp" #include "backend/cpu/CPUBackend.hpp"
#include "backend/cpu/compute/ConvolutionFloatFactory.h" #include "backend/cpu/compute/ConvolutionFloatFactory.h"
#include "math/Vec4.hpp"
#define MIN_CON_PLANESIZE 256 #define MIN_CON_PLANESIZE 256

View File

@ -10,7 +10,9 @@
#include <math.h> #include <math.h>
#include "backend/cpu/CPUBackend.hpp" #include "backend/cpu/CPUBackend.hpp"
#include "core/Macro.h" #include "core/Macro.h"
#include "math/Vec4.hpp" #include "math/Vec.hpp"
using Vec4 = MNN::Math::Vec<float, 4>;
namespace MNN { namespace MNN {
@ -39,12 +41,12 @@ ErrorCode CPUCosineSimilarity::onExecute(const std::vector<Tensor*>& inputs, con
const auto x1ChannelPtr = x1DataBatchPtr + j; const auto x1ChannelPtr = x1DataBatchPtr + j;
const auto x2ChannelPtr = x2DataBatchPtr + j; const auto x2ChannelPtr = x2DataBatchPtr + j;
Math::Vec4 innerProduct(.0f); Vec4 innerProduct(.0f);
Math::Vec4 x1Square(.0f); Vec4 x1Square(.0f);
Math::Vec4 x2Square(.0f); Vec4 x2Square(.0f);
for (int c = 0; c < channel; ++c) { for (int c = 0; c < channel; ++c) {
Math::Vec4 x1Data = Math::Vec4::load(x1ChannelPtr + c * channleStride); Vec4 x1Data = Vec4::load(x1ChannelPtr + c * channleStride);
Math::Vec4 x2Data = Math::Vec4::load(x2ChannelPtr + c * channleStride); Vec4 x2Data = Vec4::load(x2ChannelPtr + c * channleStride);
auto x1Xx2 = x1Data * x2Data; auto x1Xx2 = x1Data * x2Data;
innerProduct = innerProduct + x1Xx2; innerProduct = innerProduct + x1Xx2;
x1Square = x1Square + x1Data * x1Data; x1Square = x1Square + x1Data * x1Data;

View File

@ -12,8 +12,8 @@
#include "core/Concurrency.h" #include "core/Concurrency.h"
#include "core/Macro.h" #include "core/Macro.h"
#include "math/Vec4.hpp" #include "math/Vec.hpp"
using MNN::Math::Vec4; using Vec4 = MNN::Math::Vec<float, 4>;
namespace MNN { namespace MNN {

View File

@ -21,7 +21,7 @@ public:
auto parameter = op->main_as_InnerProduct(); auto parameter = op->main_as_InnerProduct();
int outputCount = parameter->outputCount(); int outputCount = parameter->outputCount();
int srcCount = parameter->weight()->size() / outputCount; int srcCount = parameter->weight()->size() / outputCount;
mWeight.reset(CPUConvolution::reorderWeightSize(srcCount, outputCount, 1, 4)); mWeight.reset(CPUConvolution::reorderWeightSize(srcCount, outputCount, 1, 4, 4));
if (mWeight.get() == nullptr) { if (mWeight.get() == nullptr) {
mValid = false; mValid = false;
return; return;

View File

@ -180,6 +180,14 @@ ErrorCode CPULSTM::onResize(const std::vector<Tensor *> &inputs, const std::vect
::memcpy(mBiasC->host<float>(), mLSTM->bias()->float32s()->data(), mBiasC->size()); ::memcpy(mBiasC->host<float>(), mLSTM->bias()->float32s()->data(), mBiasC->size());
::memcpy(mWeightH->host<float>(), mLSTM->weightH()->float32s()->data(), mWeightH->size()); ::memcpy(mWeightH->host<float>(), mLSTM->weightH()->float32s()->data(), mWeightH->size());
} }
if (mGateHaveBias) {
// Merge bias
auto biasPtr = mBiasC->host<float>();
auto biasPtr2 = biasPtr + 4 * numUnits;
for (int i=0; i<4*numUnits; ++i) {
biasPtr[i] = biasPtr[i] + biasPtr2[i];
}
}
} }
if (inputs.size() > 1) { if (inputs.size() > 1) {
@ -260,16 +268,8 @@ ErrorCode CPULSTM::onExecute(const std::vector<Tensor *> &inputs, const std::vec
MNN_CONCURRENCY_END(); MNN_CONCURRENCY_END();
float* biasStartPtr = mBiasC->host<float>(); float* biasStartPtr = mBiasC->host<float>();
if(!mGateHaveBias){
biasStartPtr = nullptr;
}
mRetriveOutputFunction(mGates.host<float>(), biasStartPtr); mRetriveOutputFunction(mGates.host<float>(), biasStartPtr);
float* recurrenceBiasStartPtr = mBiasC->host<float>();
if(mGateHaveBias){
recurrenceBiasStartPtr += 4 * numUnits;
}
// tranform // tranform
const float *contData = nullptr; const float *contData = nullptr;
if (inputs.size() > 1) { if (inputs.size() > 1) {
@ -330,14 +330,11 @@ ErrorCode CPULSTM::onExecute(const std::vector<Tensor *> &inputs, const std::vec
} }
// add bias // add bias
auto biasPtr = recurrenceBiasStartPtr + oc; //MNN_PRINT("%f, %f, %f, %f\n", I, O, F, G);
I = sigmoid(*biasPtr + I); I = sigmoid(I);
biasPtr = biasPtr + numUnits; F = sigmoid(F);
F = sigmoid(*biasPtr + F); O = sigmoid(O);
biasPtr = biasPtr + numUnits; G = tanhf(G);
O = sigmoid(*biasPtr + O);
biasPtr = biasPtr + numUnits;
G = tanhf(*biasPtr + G);
auto newCell = F * cellData[oc] + I * G; auto newCell = F * cellData[oc] + I * G;
cellData[oc] = newCell; cellData[oc] = newCell;

View File

@ -0,0 +1,311 @@
//
// CPUSoftmax.cpp
// MNN
//
// Created by MNN on 2018/07/16.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "backend/cpu/CPUSoftmax.hpp"
#include <math.h>
#include "backend/cpu/CPUBackend.hpp"
#include "backend/cpu/compute/CommonOptFunction.h"
#include "core/Concurrency.h"
#include "core/Macro.h"
#include "core/TensorUtils.hpp"
#ifdef MNN_USE_NEON
#include <arm_neon.h>
#endif
namespace MNN {
int CPUSoftmax::_softmax1(const float *srcData, float *dstData, int outside, int channel, int threadNum) {
// Max and sub
MNN_CONCURRENCY_BEGIN(tId, threadNum)
{
const float *srcY = srcData + tId * channel;
float *dstY = dstData + tId * channel;
for (int y = (int)tId; y < outside; y += threadNum, srcY += channel * threadNum, dstY += channel * threadNum) {
float maxValue = srcY[0];
{
int c = 1;
#ifdef MNN_USE_NEON
#if !(defined(__ARM_FEATURE_FMA) && defined(__aarch64__))
#define vmaxvq_f32(v) \
({ \
float __m = v[0]; \
for (int i = 1; i < 4; i++) { \
if (v[i] > __m) \
__m = v[i]; \
} \
__m; \
})
#endif
if (c + 3 < channel) {
float32x4_t maxx4 = vld1q_f32(srcY + c);
c += 4;
for (; c + 3 < channel; c += 4) {
maxx4 = vmaxq_f32(maxx4, vld1q_f32(srcY + c));
}
float value = vmaxvq_f32(maxx4);
if (value > maxValue)
maxValue = value;
}
#endif
for (; c < channel; ++c) {
float value = srcY[c];
if (value > maxValue)
maxValue = value;
}
}
for (int c = 0; c < channel; ++c) {
dstY[c] = -srcY[c] + maxValue;
}
}
}
MNN_CONCURRENCY_END();
//Exp
auto schedule = ((CPUBackend*)backend())->multiThreadDivide(channel * outside);
int sizeDivide = schedule.first;
int scheduleNumber = schedule.second;
MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
int start = sizeDivide * (int)tId;
int realSize = sizeDivide;
if (tId == scheduleNumber -1 ) {
realSize = channel * outside - start;
}
if (realSize > 0) {
MNNExp(dstData + start, dstData + start, realSize);
}
}
MNN_CONCURRENCY_END();
// Sum and div
MNN_CONCURRENCY_BEGIN(tId, threadNum);
{
float *dstY = dstData + tId * channel;
for (int y = (int)tId; y < outside; y += threadNum, dstY += channel * threadNum) {
// sum
float sumValue = 0;
for (int c = 0; c < channel; ++c) {
sumValue += dstY[c];
}
// div
{
int c = 0;
#ifdef MNN_USE_NEON
float div = 1.f / sumValue;
for (; c + 3 < channel; c += 4) {
vst1q_f32(dstY + c, vmulq_n_f32(vld1q_f32(dstY + c), div));
}
#endif
for (; c < channel; ++c) {
dstY[c] /= sumValue;
}
}
}
}
MNN_CONCURRENCY_END();
return 0;
}
int CPUSoftmax::_softmaxCommon(const float *srcData, float *dstData, int inside, int outside, int channel,
float *maxValue, float *sumValue, int threadNum) {
if (inside == 1)
return _softmax1(srcData, dstData, outside, channel, threadNum);
const int stepY = inside * channel;
MNN_CONCURRENCY_BEGIN(tId, threadNum);
{
const float *srcY = srcData + tId * stepY;
float *dstY = dstData + tId * stepY;
float *maxValueSub = maxValue + tId * inside;
for (int y = (int)tId; y < outside; y += threadNum, srcY += stepY * threadNum, dstY += stepY * threadNum) {
memcpy(maxValueSub, srcY, sizeof(float) * inside);
const float *src = srcY + inside;
for (int c = 1; c < channel; ++c, src += inside) {
for (int x = 0; x < inside; ++x) {
if (src[x] > maxValueSub[x])
maxValueSub[x] = src[x];
}
}
src = srcY;
float *dst = dstY;
for (int c = 0; c < channel; ++c, src += inside, dst += inside) {
for (int x = 0; x < inside; ++x) {
dst[x] = -src[x] + maxValueSub[x];
}
}
}
}
MNN_CONCURRENCY_END();
auto totalSize = channel * inside * outside;
//Exp
auto schedule = ((CPUBackend*)backend())->multiThreadDivide(totalSize);
int sizeDivide = schedule.first;
int scheduleNumber = schedule.second;
MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
int start = sizeDivide * (int)tId;
int realSize = sizeDivide;
if (tId == scheduleNumber -1 ) {
realSize = totalSize - start;
}
if (realSize > 0) {
MNNExp(dstData + start, dstData + start, realSize);
}
}
MNN_CONCURRENCY_END();
MNN_CONCURRENCY_BEGIN(tId, threadNum);
{
const float *srcY = srcData + tId * stepY;
float *dstY = dstData + tId * stepY;
float *sumValueSub = sumValue + tId * inside;
for (int y = (int)tId; y < outside; y += threadNum, srcY += stepY * threadNum, dstY += stepY * threadNum) {
memset(sumValueSub, 0, sizeof(float) * inside);
float *dst = dstY;
for (int c = 0; c < channel; ++c, dst += inside) {
for (int x = 0; x < inside; ++x) {
sumValueSub[x] += dst[x];
}
}
dst = dstY;
for (int c = 0; c < channel; ++c, dst += inside) {
for (int x = 0; x < inside; ++x) {
dst[x] /= sumValueSub[x];
}
}
}
}
MNN_CONCURRENCY_END();
return 0;
}
ErrorCode CPUSoftmax::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
auto input = inputs[0];
const int dimensions = input->buffer().dimensions;
const auto layout = TensorUtils::getDescribe(input)->dimensionFormat;
mNeedUnpackC4 = layout == MNN_DATA_FORMAT_NC4HW4;
if (mNeedUnpackC4) {
int totalSize = 1;
for (int i = 1; i < dimensions; ++i) {
totalSize *= input->length(i);
}
mStorage.buffer().dim[0].extent = input->length(0);
mStorage.buffer().dim[1].extent = totalSize;
TensorUtils::getDescribe(&mStorage)->dimensionFormat = MNN_DATA_FORMAT_NHWC;
mStorage.buffer().dimensions = 2;
mStorage.buffer().type = input->getType();
backend()->onAcquireBuffer(&mStorage, Backend::DYNAMIC);
}
int inside = 1;
int dims = input->buffer().dimensions;
for (int i = mAxis + 1; i < dims; ++i) {
inside *= input->length(i);
}
if (inside != 1) { // not run _softmax1, we need maxValue Tensor and sumValue Tensor.
int threadNum = ((CPUBackend *)backend())->threadNumber();
mMaxValue.buffer().dim[0].extent = inside * threadNum;
mMaxValue.buffer().dimensions = 1;
mMaxValue.setType(DataType_DT_FLOAT);
backend()->onAcquireBuffer(&mMaxValue, Backend::DYNAMIC);
mSumValue.buffer().dim[0].extent = inside * threadNum;
mSumValue.buffer().dimensions = 1;
mSumValue.setType(DataType_DT_FLOAT);
backend()->onAcquireBuffer(&mSumValue, Backend::DYNAMIC);
backend()->onReleaseBuffer(&mMaxValue, Backend::DYNAMIC);
backend()->onReleaseBuffer(&mSumValue, Backend::DYNAMIC);
}
if (mNeedUnpackC4) {
backend()->onReleaseBuffer(&mStorage, Backend::DYNAMIC);
}
return NO_ERROR;
}
ErrorCode CPUSoftmax::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
MNN_ASSERT(1 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto inputTensor = inputs[0];
auto outputTensor = outputs[0];
const auto inputDataPtr = inputTensor->host<float>();
auto outputDataPtr = outputTensor->host<float>();
const int batch = inputTensor->batch();
const auto dims = inputTensor->buffer().dimensions;
float *tempData = nullptr;
if (mNeedUnpackC4) {
tempData = mStorage.host<float>();
}
int areaInput = 1;
for (int i = 2; i < dims; ++i) {
areaInput *= inputTensor->length(i);
}
int inside = 1;
int outside = 1;
int channel = 1;
for (int i = 0; i < mAxis; ++i) {
outside *= inputTensor->length(i);
}
channel = inputTensor->length(mAxis);
for (int i = mAxis + 1; i < dims; ++i) {
inside *= inputTensor->length(i);
}
int threadNum = ((CPUBackend *)backend())->threadNumber();
if (!mNeedUnpackC4) {
_softmaxCommon(inputDataPtr, outputDataPtr, inside, outside, channel, mMaxValue.host<float>(),
mSumValue.host<float>(), threadNum);
return NO_ERROR;
}
auto outputSize = outputTensor->elementSize();
int batchSize = outputSize / batch;
for (int batchIndex = 0; batchIndex < batch; ++batchIndex) {
auto inputData = inputDataPtr + batchIndex * batchSize;
MNNUnpackC4(outputDataPtr + batchIndex * mStorage.length(1), inputData, areaInput, inputTensor->channel());
}
_softmaxCommon(outputDataPtr, tempData, inside, outside, channel, mMaxValue.host<float>(), mSumValue.host<float>(), threadNum);
for (int batchIndex = 0; batchIndex < batch; ++batchIndex) {
auto outputData = outputDataPtr + batchIndex * batchSize;
auto tempPtr = tempData + batchIndex * mStorage.length(1);
MNNPackC4(outputData, tempPtr, areaInput, outputTensor->channel());
}
return NO_ERROR;
}
CPUSoftmax::CPUSoftmax(Backend *b, int axis) : MNN::Execution(b), mAxis(axis), mStorage(2), mNeedUnpackC4(false) {
// nothing to do
}
class CPUSoftmaxCreator : public CPUBackend::Creator {
public:
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
const MNN::Op *op, Backend *backend) const override {
auto axis = op->main_as_Axis()->axis();
if (axis < 0) {
axis = inputs[0]->dimensions() + axis;
}
return new CPUSoftmax(backend, axis);
}
};
REGISTER_CPU_OP_CREATOR(CPUSoftmaxCreator, OpType_Softmax);
} // namespace MNN

View File

@ -0,0 +1,35 @@
//
// CPUSoftmax.hpp
// MNN
//
// Created by MNN on 2018/07/16.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef CPUSoftmax_hpp
#define CPUSoftmax_hpp
#include "core/Execution.hpp"
namespace MNN {
class CPUSoftmax : public Execution {
public:
CPUSoftmax(Backend *b, int axis);
virtual ~CPUSoftmax() = default;
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
private:
int _softmaxCommon(const float *srcData, float *dstData, int inside, int outside, int channel, float *maxValue,
float *sumValue, int threadNum);
int _softmax1(const float *srcData, float *dstData, int outside, int channel, int threadNum);
int mAxis;
Tensor mStorage;
Tensor mMaxValue;
Tensor mSumValue;
bool mNeedUnpackC4;
};
} // namespace MNN
#endif /* CPUSoftmax_hpp */

View File

@ -13,10 +13,8 @@
#include "backend/cpu/compute/ConvOpt.h" #include "backend/cpu/compute/ConvOpt.h"
#include "core/Macro.h" #include "core/Macro.h"
#include "core/TensorUtils.hpp" #include "core/TensorUtils.hpp"
#include "math/Vec4.hpp" #include "math/Vec.hpp"
using namespace MNN::Math; using Vec4 = MNN::Math::Vec<float, 4>;
typedef Vec4 float4;
#define SOURCE_BLOCK 64 #define SOURCE_BLOCK 64
#define WEIGHT_BLOCK 256 #define WEIGHT_BLOCK 256

View File

@ -0,0 +1,128 @@
//
// GeometryCropAndResize.cpp
// MNN
//
// Created by MNN on 2020/08/5.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "geometry/GeometryComputer.hpp"
#include "core/OpCommonUtils.hpp"
#include "geometry/GeometryComputerUtils.hpp"
#include "ConvertUtils.hpp"
namespace MNN {
class GeometryCropAndResize : public GeometryComputer {
public:
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& res) const override {
MNN_ASSERT(4 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto img = inputs[0];
auto boxes = inputs[1];
auto box_ind = inputs[2];
auto crop_size = inputs[3];
auto output = outputs[0];
auto extrapolation = op->main_as_CropAndResize()->extrapolationValue();
auto method = op->main_as_CropAndResize()->method();
// resizeType of Interp : 1-NEAREST, 2-BILINEAR
const int resizeType = method == CropAndResizeMethod_BILINEAR ? 2 : 1;
int batch = img->length(0), ih = img->length(1), iw = img->length(2),
depth = img->length(3), boxNum = boxes->length(0);
const int cropHeight = crop_size->host<uint32_t>()[0],
cropWidth = crop_size->host<uint32_t>()[1];
auto des = TensorUtils::getDescribe(output);
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
des->dimensionFormat = MNN_DATA_FORMAT_NHWC;
des->regions.clear();
des->regions.reserve(boxNum);
for (int i = 0; i < boxNum; i++) {
const float y1 = boxes->host<float>()[i*4];
const float x1 = boxes->host<float>()[i*4+1];
const float y2 = boxes->host<float>()[i*4+2];
const float x2 = boxes->host<float>()[i*4+3];
const int ind = box_ind->host<uint32_t>()[i];
const float ch = (y2 - y1) * (ih - 1), cw = (x2 - x1) * (iw - 1);
const float yScale = ch / static_cast<float>(cropHeight - 1);
const float xScale = cw / static_cast<float>(cropWidth - 1);
const float yOffset = y1 * (ih - 1), xOffset = x1 * (iw - 1);
// select croped image from images, convert it's format from NHWC to NC4HW4
std::shared_ptr<Tensor> cropValue(new Tensor);
{
cropValue->buffer().type = halide_type_of<float>();
cropValue->buffer().dimensions = 4;
cropValue->setLength(0, 1);
cropValue->setLength(1, depth);
cropValue->setLength(2, ih);
cropValue->setLength(3, iw);
auto des = TensorUtils::getDescribe(cropValue.get());
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
des->regions.clear();
Tensor::InsideDescribe::Region region;
region.origin = img;
region.size[1] = depth;
region.size[2] = ih * iw;
region.src.offset = ind * ih * iw * depth;
region.dst.offset = 0;
region.src.stride[1] = 1;
region.src.stride[2] = depth;
region.dst.stride[1] = ih * iw;
region.dst.stride[2] = 1;
des->regions.emplace_back(std::move(region));
res.extras.emplace_back(cropValue);
}
// using Interp Op deal with crop and resize for selected image
std::shared_ptr<Tensor> resizeValue;
{
resizeValue.reset(Tensor::createDevice<float>({1, depth, cropHeight, cropWidth}));
auto des = TensorUtils::getDescribe(resizeValue.get());
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
std::unique_ptr<OpT> interp(new OpT);
interp->type = OpType_Interp;
interp->main.type = OpParameter_Interp;
interp->main.value = new InterpT;
interp->main.AsInterp()->widthScale = xScale;
interp->main.AsInterp()->heightScale = yScale;
interp->main.AsInterp()->widthOffset = xOffset;
interp->main.AsInterp()->heightOffset = yOffset;
interp->main.AsInterp()->alignCorners = false;
interp->main.AsInterp()->resizeType = resizeType;
auto cmd = GeometryComputerUtils::makeCommand(interp.get(), {cropValue.get()}, {resizeValue.get()});
res.extras.emplace_back(resizeValue);
res.command.emplace_back(cmd);
}
// convert resize image's format from NC4HW4 to NHWC, add it to output's batch
{
Tensor::InsideDescribe::Region region;
region.origin = resizeValue.get();
region.size[1] = cropHeight * cropWidth;
region.size[2] = depth;
region.src.offset = 0;
region.dst.offset = i * cropHeight * cropWidth * depth;
region.src.stride[1] = 1;
region.src.stride[2] = cropHeight * cropWidth;
region.dst.stride[1] = depth;
region.dst.stride[2] = 1;
des->regions.emplace_back(std::move(region));
}
}
return true;
}
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
//return {false};
return {true};
}
};
static void _create() {
std::shared_ptr<GeometryComputer> comp(new GeometryCropAndResize);
// GeometryComputer::registerGeometryComputer(comp, {OpType_CropAndResize});
}
REGISTER_GEOMETRY(GeometryCropAndResize, _create);
} // namespace MNN

View File

@ -0,0 +1,304 @@
//
// GeometryGather.cpp
// MNN
//
// Created by MNN on 2020/06/09.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "geometry/GeometryComputer.hpp"
#include "core/OpCommonUtils.hpp"
namespace MNN {
class GeometryGather : public DefaultGeometryComputer {
public:
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
MNN_ASSERT(inputs.size() == 2);
MNN_ASSERT(1 == outputs.size());
auto embedding = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
const int firstDimStride = embedding->buffer().dim[0].stride;
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && firstDimStride != 0) {
std::vector<bool> res(outputs.size(), true);
return res;
}
return std::vector<bool>(outputs.size(), false);
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
MNN_ASSERT(2 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto embedding = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
const int firstDimStride = embedding->buffer().dim[0].stride;
if (TensorUtils::getDescribe(indices)->usage != MNN::Tensor::InsideDescribe::CONSTANT || firstDimStride == 0) {
Command cmd;
cmd.op = op;
cmd.inputs = std::move(inputs);
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
auto bytes = embedding->buffer().type.bytes();
const size_t indicesCount = indices->elementSize();
const auto limit = embedding->length(0);
const int* indicesData = indices->host<int32_t>();
auto outputDes = TensorUtils::getDescribe(output);
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
for (int i = 0; i < indicesCount; i++) {
if (indicesData[i] < 0 || indicesData[i] > limit) {
MNN_PRINT("Gather indice error\n");
return false;
}
Tensor::InsideDescribe::Region slice;
slice.origin = embedding;
slice.size[0] = 1;
slice.size[1] = 1;
slice.size[2] = firstDimStride;
slice.src.offset = firstDimStride * indicesData[i];
slice.dst.offset = i * firstDimStride;
slice.src.stride[0] = 1;
slice.src.stride[1] = 1;
slice.src.stride[2] = 1;
slice.dst.stride[0] = 1;
slice.dst.stride[1] = 1;
slice.dst.stride[2] = 1;
outputDes->regions.emplace_back(std::move(slice));
}
return true;
}
};
class GeometryGatherND : public DefaultGeometryComputer {
public:
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
MNN_ASSERT(inputs.size() == 2);
MNN_ASSERT(1 == outputs.size());
auto params = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
int mSliceN = 1;
int mSliceSize = 1;
for (int i = 0; i < indices->dimensions() - 1; ++i) {
mSliceN *= indices->length(i);
}
auto indiceNd = indices->length(indices->dimensions() - 1);
std::vector<int> mDimsToCount;
mDimsToCount.resize(indiceNd);
for (int i = indiceNd; i < params->dimensions(); ++i) {
mSliceSize *= params->length(i);
}
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && mSliceSize != 0) {
std::vector<bool> res(outputs.size(), true);
return res;
} else {
std::vector<bool> res(outputs.size(), false);
return res;
}
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
MNN_ASSERT(2 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto params = inputs[0];
auto indice = inputs[1];
auto output = outputs[0];
int mSliceN = 1;
int mSliceSize = 1;
for (int i = 0; i < indice->dimensions() - 1; ++i) {
mSliceN *= indice->length(i);
}
auto indiceNd = indice->length(indice->dimensions() - 1);
std::vector<int> mDimsToCount;
mDimsToCount.resize(indiceNd);
for (int i = indiceNd; i < params->dimensions(); ++i) {
mSliceSize *= params->length(i);
}
if (TensorUtils::getDescribe(indice)->usage != MNN::Tensor::InsideDescribe::CONSTANT || mSliceSize == 0) {
Command cmd;
cmd.op = op;
cmd.inputs = std::move(inputs);
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
auto paramSize = params->elementSize();
for (int i = 0; i < indiceNd; ++i) {
mDimsToCount[i] = paramSize / params->length(i);
paramSize = mDimsToCount[i];
}
mDimsToCount.resize(indiceNd);
auto indiceData = indice->host<int32_t>();
auto outputDes = TensorUtils::getDescribe(output);
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
for (int i = 0; i < mSliceN; i++) {
int fromPos = 0;
for (int j = 0; j < indiceNd; ++j) {
fromPos += mDimsToCount[j] * indiceData[i * indiceNd + j];
}
Tensor::InsideDescribe::Region slice;
slice.origin = params;
slice.size[0] = 1;
slice.size[1] = 1;
slice.size[2] = mSliceSize;
slice.src.offset = fromPos;
slice.dst.offset = i * mSliceSize;
slice.src.stride[0] = 1;
slice.src.stride[1] = 1;
slice.src.stride[2] = 1;
slice.dst.stride[0] = 1;
slice.dst.stride[1] = 1;
slice.dst.stride[2] = 1;
outputDes->regions.emplace_back(std::move(slice));
}
return true;
}
};
class GeometryGatherV2 : public DefaultGeometryComputer {
public:
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
MNN_ASSERT(inputs.size() >= 2);
MNN_ASSERT(1 == outputs.size());
auto params = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
int axis = 0;
if (inputs.size() == 3) {
const Tensor* axisTensor = inputs[2];
axis = axisTensor->host<int32_t>()[0];
}
MNN_ASSERT(axis > -params->buffer().dimensions && axis < params->buffer().dimensions);
if (axis < 0) {
axis = params->buffer().dimensions + axis;
}
const int gatherDimSize = params->buffer().dim[axis].extent;
const int N = indices->elementSize();
MNN_ASSERT(gatherDimSize <= std::numeric_limits<int32_t>::max());
int inside = 1;
for (int i = axis + 1; i < params->dimensions(); ++i) {
inside *= params->length(i);
}
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && inside != 0) {
std::vector<bool> res(outputs.size(), true);
return res;
}
return std::vector<bool>(outputs.size(), false);
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
MNN_ASSERT(inputs.size() >= 2);
MNN_ASSERT(1 == outputs.size());
auto params = inputs[0];
auto indices = inputs[1];
auto output = outputs[0];
int axis = 0;
if (inputs.size() == 3) {
const Tensor* axisTensor = inputs[2];
axis = axisTensor->host<int32_t>()[0];
}
MNN_ASSERT(axis > -params->buffer().dimensions && axis < params->buffer().dimensions);
if (axis < 0) {
axis = params->buffer().dimensions + axis;
}
const int gatherDimSize = params->buffer().dim[axis].extent;
const int N = indices->elementSize();
MNN_ASSERT(gatherDimSize <= std::numeric_limits<int32_t>::max());
int inside = 1;
int outside = 1;
for (int i = 0; i < axis; ++i) {
outside *= params->length(i);
}
for (int i = axis + 1; i < params->dimensions(); ++i) {
inside *= params->length(i);
}
if (TensorUtils::getDescribe(indices)->usage != MNN::Tensor::InsideDescribe::CONSTANT || inside == 0) {
Command cmd;
cmd.op = op;
cmd.inputs = std::move(inputs);
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
const int limit = params->length(axis);
auto bytes = output->buffer().type.bytes();
const int insideStride = inside;
const int outputOutsideStride = inside * N;
const int inputOutsideStride = inside * inputs[0]->length(axis);
const int* indicesPtr = indices->host<int32_t>();
auto outputDes = TensorUtils::getDescribe(output);
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
for (int o = 0; o < outside; ++o) {
for (int i = 0; i < N; i++) {
if (indicesPtr[i] < 0 || indicesPtr[i] > limit) {
continue;
}
Tensor::InsideDescribe::Region slice;
slice.origin = params;
slice.size[0] = 1;
slice.size[1] = 1;
slice.size[2] = insideStride;
slice.src.offset = inputOutsideStride * o + insideStride * indicesPtr[i];
slice.dst.offset = outputOutsideStride * o + i * insideStride;
slice.src.stride[0] = 1;
slice.src.stride[1] = 1;
slice.src.stride[2] = 1;
slice.dst.stride[0] = 1;
slice.dst.stride[1] = 1;
slice.dst.stride[2] = 1;
outputDes->regions.emplace_back(std::move(slice));
}
}
return true;
}
};
static void _create() {
// std::shared_ptr<GeometryComputer> comp(new GeometryGather);
// GeometryComputer::registerGeometryComputer(comp, {OpType_Gather});
//
// std::shared_ptr<GeometryComputer> comp2(new GeometryGatherND);
// GeometryComputer::registerGeometryComputer(comp2, {OpType_GatherND});
//
// std::shared_ptr<GeometryComputer> comp3(new GeometryGatherV2);
// GeometryComputer::registerGeometryComputer(comp3, {OpType_GatherV2});
}
REGISTER_GEOMETRY(GeometryGather, _create);
} // namespace MNN

View File

@ -0,0 +1,214 @@
//
// GeometrySoftmax.cpp
// MNN
//
// Created by MNN on 2020/06/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "geometry/GeometryComputer.hpp"
#include "core/OpCommonUtils.hpp"
#include "geometry/GeometryComputerUtils.hpp"
namespace MNN {
class GeometrySoftmax : public GeometryComputer {
public:
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
auto axis = op->main_as_Axis()->axis();
if (axis < 0) {
axis = inputs[0]->dimensions() + axis;
}
if (axis == 1) {
return std::vector<bool>(outputs.size(), false);
}
return std::vector<bool>(outputs.size(), true);
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& res) const override {
MNN_ASSERT(1 == inputs.size());
MNN_ASSERT(1 == outputs.size());
auto input = inputs[0];
auto output = outputs[0];
auto dims = input->buffer().dimensions;
auto axis = op->main_as_Axis()->axis();
if (axis < 0) {
axis = inputs[0]->dimensions() + axis;
}
if (axis == 1) {
Command cmd;
cmd.op = op;
cmd.inputs = std::move(inputs);
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
int inside = 1;
int outside = 1;
int channel = 1;
for (int i = 0; i < axis; ++i) {
outside *= input->length(i);
}
channel = input->length(axis);
for (int i = axis + 1; i < dims; ++i) {
inside *= input->length(i);
}
//input transform to NCHW format
std::shared_ptr<Tensor> tmpInput;
{
tmpInput.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto outputDes = TensorUtils::getDescribe(tmpInput.get());
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region desReg;
desReg.size[0] = outside;
desReg.size[1] = channel;
desReg.size[2] = inside;
desReg.dst.offset = 0;
desReg.dst.stride[0] = channel*inside;
desReg.dst.stride[1] = inside;
desReg.dst.stride[2] = 1;
desReg.src.offset = 0;
desReg.src.stride[0] = channel*inside;
desReg.src.stride[1] = inside;
desReg.src.stride[2] = 1;
desReg.origin = input;
outputDes->regions.emplace_back(std::move(desReg));
res.extras.emplace_back(tmpInput);
}
//reduction max, axis=1
std::shared_ptr<Tensor> maxValue;
{
maxValue.reset(Tensor::createDevice<float>({outside, 1, inside}));
res.extras.emplace_back(maxValue);
res.command.emplace_back(GeometryComputerUtils::makeReduce(ReductionType_MAXIMUM, tmpInput.get(), maxValue.get()));
}
//broadcast reduction axis dim
std::shared_ptr<Tensor> maxBroadValue;
{
maxBroadValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto outputDes = TensorUtils::getDescribe(maxBroadValue.get());
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region desReg;
desReg.size[0] = outside;
desReg.size[1] = channel;
desReg.size[2] = inside;
desReg.dst.offset = 0;
desReg.dst.stride[0] = channel*inside;
desReg.dst.stride[1] = inside;
desReg.dst.stride[2] = 1;
desReg.src.offset = 0;
desReg.src.stride[0] = inside;
desReg.src.stride[1] = 0;
desReg.src.stride[2] = 1;
desReg.origin = maxValue.get();
outputDes->regions.emplace_back(std::move(desReg));
res.extras.emplace_back(maxBroadValue);
}
//sub
std::shared_ptr<Tensor> subMaxValue;
{
subMaxValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto cmd = GeometryComputerUtils::makeBinary(BinaryOpOperation_SUB, tmpInput.get(), maxBroadValue.get(), subMaxValue.get());
res.extras.emplace_back(subMaxValue);
res.command.emplace_back(std::move(cmd));
}
//exp
std::shared_ptr<Tensor> expValue;
{
expValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto cmd = GeometryComputerUtils::makeUnary(UnaryOpOperation_EXP, subMaxValue.get(), expValue.get());
res.extras.emplace_back(expValue);
res.command.emplace_back(std::move(cmd));
}
//reduction sum, axis=2, only support NCHW
std::shared_ptr<Tensor> sumValue;
{
sumValue.reset(Tensor::createDevice<float>({outside, 1, inside}));
res.extras.emplace_back(sumValue);
res.command.emplace_back(GeometryComputerUtils::makeReduce(ReductionType_SUM, expValue.get(), sumValue.get()));
}
//broadcast reduction axis dim
std::shared_ptr<Tensor> sumBroadValue;
{
sumBroadValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto outputDes = TensorUtils::getDescribe(sumBroadValue.get());
outputDes->regions.clear();
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region desReg;
desReg.size[0] = outside;
desReg.size[1] = channel;
desReg.size[2] = inside;
desReg.dst.offset = 0;
desReg.dst.stride[0] = channel*inside;
desReg.dst.stride[1] = inside;
desReg.dst.stride[2] = 1;
desReg.src.offset = 0;
desReg.src.stride[0] = inside;
desReg.src.stride[1] = 0;
desReg.src.stride[2] = 1;
desReg.origin = sumValue.get();
outputDes->regions.emplace_back(std::move(desReg));
res.extras.emplace_back(sumBroadValue);
}
//div
std::shared_ptr<Tensor> tmpOutput;
{
tmpOutput.reset(Tensor::createDevice<float>({outside, channel, inside}));
auto cmd = GeometryComputerUtils::makeBinary(BinaryOpOperation_REALDIV, expValue.get(), sumBroadValue.get(), tmpOutput.get());
res.extras.emplace_back(tmpOutput);
res.command.emplace_back(std::move(cmd));
}
//transform to output
{
auto outputDes = TensorUtils::getDescribe(output);
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region desReg;
desReg.size[0] = outside;
desReg.size[1] = channel;
desReg.size[2] = inside;
desReg.dst.offset = 0;
desReg.dst.stride[0] = channel*inside;
desReg.dst.stride[1] = inside;
desReg.dst.stride[2] = 1;
desReg.src.offset = 0;
desReg.src.stride[0] = channel*inside;
desReg.src.stride[1] = inside;
desReg.src.stride[2] = 1;
desReg.origin = tmpOutput.get();
outputDes->regions.emplace_back(std::move(desReg));
}
return true;
}
};
static void _create() {
// std::shared_ptr<GeometryComputer> comp(new GeometrySoftmax);
// GeometryComputer::registerGeometryComputer(comp, {OpType_Softmax});
}
REGISTER_GEOMETRY(GeometrySoftmax, _create);
} // namespace MNN

View File

@ -7,7 +7,7 @@ add_executable(benchmarkExprModels.out ${CMAKE_CURRENT_LIST_DIR}/benchmarkExprMo
target_include_directories(benchmarkExprModels.out PRIVATE "${CMAKE_CURRENT_LIST_DIR}/exprModels" ${CMAKE_CURRENT_SOURCE_DIR}/) target_include_directories(benchmarkExprModels.out PRIVATE "${CMAKE_CURRENT_LIST_DIR}/exprModels" ${CMAKE_CURRENT_SOURCE_DIR}/)
target_link_libraries(benchmarkExprModels.out ${MNN_DEPS}) target_link_libraries(benchmarkExprModels.out ${MNN_DEPS})
if ((MSVC OR WIN32) AND NOT MNN_BUILD_SHARED_LIBS) if (MSVC AND NOT MNN_BUILD_SHARED_LIBS)
foreach (DEPEND ${MNN_DEPS}) foreach (DEPEND ${MNN_DEPS})
target_link_options(benchmark.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>) target_link_options(benchmark.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>)
target_link_options(benchmarkExprModels.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>) target_link_options(benchmarkExprModels.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>)

View File

@ -124,6 +124,7 @@ std::vector<float> doBench(Model& model, int loop, int warmup = 10, int forward
const auto bufferSize = revertor->getBufferSize(); const auto bufferSize = revertor->getBufferSize();
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromBuffer(modelBuffer, bufferSize)); auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromBuffer(modelBuffer, bufferSize));
revertor.reset(); revertor.reset();
net->setSessionMode(MNN::Interpreter::Session_Release);
MNN::ScheduleConfig config; MNN::ScheduleConfig config;
config.numThread = numberThread; config.numThread = numberThread;
config.type = static_cast<MNNForwardType>(forward); config.type = static_cast<MNNForwardType>(forward);

View File

@ -90,6 +90,7 @@ static std::vector<float> runNet(VARP netOutput, const ScheduleConfig& config, i
const void* buf = builder.GetBufferPointer(); const void* buf = builder.GetBufferPointer();
size_t size = builder.GetSize(); size_t size = builder.GetSize();
std::unique_ptr<Interpreter> net(Interpreter::createFromBuffer(buf, size)); std::unique_ptr<Interpreter> net(Interpreter::createFromBuffer(buf, size));
net->setSessionMode(MNN::Interpreter::Session_Release);
auto session = net->createSession(config); auto session = net->createSession(config);
net->releaseModel(); net->releaseModel();
auto inputTensor = net->getSessionInput(session, NULL); auto inputTensor = net->getSessionInput(session, NULL);

View File

@ -1,84 +0,0 @@
import os
import sys
major_py_ver = sys.version_info.major
def convert_string_to_hex_list(code_str):
hex_list = []
for i in range(len(code_str)):
hex_ = hex(ord(code_str[i]))
hex_list.append(hex_)
return hex_list
def opencl_codegen():
cl_kernel_dir = sys.argv[1]
output_path = sys.argv[2]
print("Generating OpenCL Kernels in "+cl_kernel_dir+" to "+output_path)
if not os.path.exists(cl_kernel_dir):
print(cl_kernel_dir + " doesn't exist!")
#common.h
common_header_code = ""
#quantized_common.h
quantized_common_header_code = ""
#activation_common.h
activation_common_header_code = ""
for file_name in os.listdir(cl_kernel_dir):
file_path = os.path.join(cl_kernel_dir, file_name)
if file_path[-2:] == ".h" and file_name[:-2] == "quantized_common":
with open(file_path, "r") as f:
quantized_common_header_code += f.read()
elif file_path[-2:] == ".h" and file_name[:-2] == "activation_common":
with open(file_path, "r") as f:
activation_common_header_code += f.read()
opencl_code_maps = {}
for file_name in os.listdir(cl_kernel_dir):
file_path = os.path.join(cl_kernel_dir, file_name)
if file_path[-3:] == ".cl":
with open(file_path, "r") as f:
code_str = ""
for line in f.readlines():
if "#include <activation_common.h>" in line:
code_str += common_header_code
code_str += activation_common_header_code
elif "#include <quantized_common.h>" in line:
code_str += common_header_code
code_str += quantized_common_header_code
elif "#include <common.h>" in line:
code_str += common_header_code
else:
code_str += line
opencl_code_maps[file_name[:-3]] = convert_string_to_hex_list(code_str)
#source model
opencl_source_map = "#include <map> \n"
opencl_source_map += "#include <string> \n"
opencl_source_map += "#include <vector> \n"
opencl_source_map += "namespace MNN { \n"
opencl_source_map += "extern const std::map<std::string, std::vector<unsigned char>> OpenCLProgramMap = \n { \n"
if major_py_ver == 2:
items = opencl_code_maps.iteritems()
else:
items = opencl_code_maps.items()
for file_name, file_source in items:
opencl_source_map += "{\n \""
opencl_source_map += file_name
opencl_source_map += "\", \n"
opencl_source_map += " { "
for source_hex in file_source:
opencl_source_map += source_hex
opencl_source_map += ","
opencl_source_map += " } "
opencl_source_map += "\n }, \n"
opencl_source_map += " }; \n"
opencl_source_map += "} \n"
with open(output_path, "w") as w_file:
w_file.write(opencl_source_map)
print("Generate OpenCL Source done !!! \n")
if __name__ == '__main__':
opencl_codegen()

140
ciscripts/build.sh Normal file
View File

@ -0,0 +1,140 @@
#!/usr/bin/env bash
. ./parse_options.sh || exit 1;
CMAKE=cmake
MAKE=make
ANDROID_NDK=/home/android-ndk-r18b
BUILD_ROOT=`pwd`
# Clean the exist directory other than remove it in order to solve
# the problem "Current working directory cannot be established".
function make_or_clean_dir {
if [ -d $1 ]; then
rm -rf $1/*
else
mkdir $1
fi
}
function build_arm_android_32 {
make_or_clean_dir build_arm_android_32 && cd build_arm_android_32
$CMAKE ../.. \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_ABI="armeabi-v7a" \
-DANDROID_STL=c++_static \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_NATIVE_API_LEVEL=android-21 \
-DANDROID_TOOLCHAIN=clang \
-DMNN_USE_LOGCAT=true \
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
-DNATIVE_LIBRARY_OUTPUT=. \
-DNATIVE_INCLUDE_OUTPUT=. \
-DMNN_VULKAN=$USE_VULKAN \
-DMNN_OPENCL=$USE_OPENCL \
-DMNN_OPENGL=$USE_OPENGL \
-DMNN_USE_THREAD_POOL=$USE_THREAD_POOL || exit 1;
$MAKE -j $build_threads || exit 1;
cd $BUILD_ROOT; true;
}
function build_arm_android_64 {
make_or_clean_dir build_arm_android_64 && cd build_arm_android_64
$CMAKE ../.. \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_ABI="arm64-v8a" \
-DANDROID_STL=c++_static \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_NATIVE_API_LEVEL=android-21 \
-DANDROID_TOOLCHAIN=clang \
-DMNN_USE_LOGCAT=true \
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
-DNATIVE_LIBRARY_OUTPUT=. \
-DNATIVE_INCLUDE_OUTPUT=. \
-DMNN_ARM82=ON \
-DMNN_VULKAN=$USE_VULKAN \
-DMNN_OPENCL=$USE_OPENCL \
-DMNN_OPENGL=$USE_OPENGL \
-DMNN_USE_THREAD_POOL=$USE_THREAD_POOL || exit 1;
$MAKE -j $build_threads || exit 1;
cd $BUILD_ROOT; true;
}
function build_arm_linux_32 {
cd $BUILD_ROOT; true;
}
function build_arm_linux_64 {
cd $BUILD_ROOT; true;
}
function build_x86_linux {
make_or_clean_dir build_x86_linux && cd build_x86_linux
$CMAKE ../.. \
-DCMAKE_BUILD_TYPE=Release \
-DMNN_BUILD_TRAIN=ON \
-DMNN_SEP_BUILD=OFF \
-DMNN_BUILD_DEMO=ON \
-DMNN_BUILD_QUANTOOLS=ON \
-DMNN_EVALUATION=ON \
-DMNN_BUILD_CONVERTER=ON \
-DMNN_SUPPORT_TFLITE_QUAN=ON \
-DMNN_BUILD_TEST=ON \
-DMNN_OPENCL=$USE_OPENCL \
-DMNN_VULKAN=$USE_VULKAN \
-DMNN_OPENMP=$USE_OPENMP \
-DMNN_USE_THREAD_POOL=OFF \
-DMNN_BUILD_BENCHMARK=ON || exit 1;
$MAKE -j $build_threads || exit 1;
cd $BUILD_ROOT; true;
}
function build_all {
build_arm_android_32 || exit 1;
build_arm_android_64 || exit 1;
build_arm_linux_32 || exit 1;
build_arm_linux_64 || exit 1;
build_x86_linux || exit 1;
true;
}
function clean {
rm -rf build_arm_android_32
rm -rf build_arm_android_64
rm -rf build_arm_linux_32
rm -rf build_arm_linux_64
rm -rf build_x86_linux
}
function build {
case $platform in
"arm_linux_32")
build_arm_linux_32 || exit 1;
;;
"arm_linux_64")
build_arm_linux_64 || exit 1;
;;
"x86_linux")
build_x86_linux || exit 1;
;;
"arm_android_32")
build_arm_android_32 || exit 1;
;;
"arm_android_64")
build_arm_android_64 || exit 1;
;;
"all")
build_all || exit 1;
;;
*) echo "Invalid platform: $platform" && exit 1;
esac
}
if [ $clean == 1 ]; then
clean
else
build $@
fi
true;

113
ciscripts/parse_options.sh Normal file
View File

@ -0,0 +1,113 @@
#!/usr/bin/env bash
# Valid platform:
# - arm_android_32
# - arm_android_64
# - arm_linux_32
# - arm_linux_64
# - x86_linux
platform="all"
# Option to build with opencl.
use_opencl=0
# Option to build with opengl.
use_opengl=0
# Option to build with vulkan.
use_vulkan=0
# Option to build with openmp multithreads library.
use_openmp=0
build_threads=1
# Option to clear the build history.
clean=0
USE_OPENCL=OFF
USE_VULKAN=OFF
USE_OPENGL=OFF
USE_OPENMP=OFF
USE_THREAD_POOL=ON
function print_usage {
echo -e "Usgae: ./build.sh"
echo -e " --platform=x: Specify build platform x. "
echo -e " All valid platforms are \"arm_android_32\", \"arm_android_64\",
\"arm_linux_32\", \"arm_linux_64\", \"x86_linux\", \"all\"."
echo -e " The default is \"all\"."
echo -e " --use_openmp=true|false: Build with openmp or not."
echo -e " The default is false."
echo -e " --use_opencl=true|false: Build with opencl or not."
echo -e " The default is false."
echo -e " --use_opengl=true|false: Build with opengl or not."
echo -e " The default is false."
echo -e " --use_vulkan=true|false: Build with vulkan or not."
echo -e " The default is false."
echo -e " --job=n: Build with n threads. Default is 1."
}
function parse_platform {
platform=`echo "$1" | awk -F '=' '{print $2}'`
}
function parse_nthreads {
build_threads=`echo "$1" | awk -F '=' '{print $2}'`
}
function parse_bool {
val=`echo "$1" | awk -F '=' '{print $2}'`
if [ $val == "true" ] || [ $val == "1" ]; then
return 1;
else
return 0;
fi
}
[ -z "${1:-}" ] && print_usage && exit 1;
while true; do
[ -z "${1:-}" ] && break;
case "$1" in
--platform=*) parse_platform "$1"; shift 1;
;;
--use_openmp=*) parse_bool "$1"; use_openmp=$?; shift 1;
;;
--use_openmp) use_openmp=true; shift 1;
;;
--use_opencl=*) parse_bool "$1"; use_opencl=$?; shift 1;
;;
--use_opencl) use_opencl=true; shift 1;
;;
--use_opengl=*) parse_bool "$1"; use_opengl=$?; shift 1;
;;
--use_opengl) use_opengl=true; shift 1;
;;
--use_vulkan=*) parse_bool "$1"; use_vulkan=$?; shift 1;
;;
--use_vulkan) use_vulkan=true; shift 1;
;;
--job=*) parse_nthreads "$1"; shift 1;
;;
clean) clean=1; shift 1;
;;
*) break;
esac
done
if [ $use_opencl == 1 ]; then
USE_OPENCL=ON
fi
if [ $use_opengl == 1 ]; then
USE_OPENGL=ON
fi
if [ $use_vulkan == 1 ]; then
USE_VULKAN=ON
fi
if [ $use_openmp == 1 ]; then
USE_OPENMP=ON
USE_THREAD_POOL=OFF
fi
true;

View File

@ -0,0 +1,3 @@
call "C:/Program Files (x86)/Microsoft Visual Studio/2017/BuildTools/VC/Auxiliary/Build/vcvars64.bat"
cmake -G "Ninja" -DCMAKE_BUILD_TYPE=Release ..
ninja

View File

@ -0,0 +1,3 @@
call "C:/Program Files (x86)/Microsoft Visual Studio/2017/BuildTools/VC/Auxiliary/Build/vcvars32.bat"
cmake -G "Ninja" -DCMAKE_BUILD_TYPE=Release ..
ninja

View File

@ -12,3 +12,9 @@ target_link_libraries(segment.out ${MNN_DEPS})
add_executable(expressDemo.out ${CMAKE_CURRENT_LIST_DIR}/expressDemo.cpp) add_executable(expressDemo.out ${CMAKE_CURRENT_LIST_DIR}/expressDemo.cpp)
target_link_libraries(expressDemo.out ${MNN_DEPS}) target_link_libraries(expressDemo.out ${MNN_DEPS})
add_executable(transformerDemo.out ${CMAKE_CURRENT_LIST_DIR}/transformerDemo.cpp)
target_link_libraries(transformerDemo.out ${MNN_DEPS})
add_executable(rasterDemo.out ${CMAKE_CURRENT_LIST_DIR}/rasterDemo.cpp)
target_link_libraries(rasterDemo.out ${MNN_DEPS})

View File

@ -53,7 +53,6 @@ int main(int argc, const char* argv[]) {
MNN_ERROR("Output Not valid\n"); MNN_ERROR("Output Not valid\n");
return 0; return 0;
} }
auto size = outputInfo->size;
//Test Speed //Test Speed
if (testTime > 0){ if (testTime > 0){
//Let the frequence up //Let the frequence up
@ -82,6 +81,7 @@ int main(int argc, const char* argv[]) {
} }
{ {
auto size = outputInfo->size;
auto outputPtr = output->readMap<float>(); auto outputPtr = output->readMap<float>();
if (nullptr == outputPtr) { if (nullptr == outputPtr) {
MNN_ERROR("Output Not valid read error\n"); MNN_ERROR("Output Not valid read error\n");

251
demo/exec/rasterDemo.cpp Normal file
View File

@ -0,0 +1,251 @@
//
// rasterDemo.cpp
// MNN
//
// Created by MNN on 2020/10/14.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <fstream>
#include <sstream>
#include <iostream>
#include <chrono>
#include <MNN/MNNDefine.h>
#include <MNN/Tensor.hpp>
#include <MNN/Interpreter.hpp>
#include "MNN_generated.h"
#include "core/TensorUtils.hpp"
#include "core/Execution.hpp"
#include "core/Backend.hpp"
#include "rapidjson/document.h"
#include "rapidjson/stringbuffer.h"
#include "rapidjson/writer.h"
using namespace MNN;
/*
1.Raster will do the index mapping like below:
for (region : regions)
src = region.src, dst = region.dst;
for (i = 0 -> size[0])
for (j = 0 -> size[1])
for (k = 0 -> size[2])
output[dst.offset + i * dst.stride[0] + j * dst.stride[1] + k * dst.stride[2]] =
region.origion[src.offset + i * src.stride[0] + j * src.stride[1] + k * src.stride[2]];
2. Raster Op has a input and a output, but the input is not the real input tensor, it's a
middle tensor whith VIRTUAL type that has many regions point to inputs tensors, like below.
input_0 --> region_0 --\
\
input_1 --> region_1 ---- middle ----> output
/
input_2 --> region_2 --/
3. This example read a json file and construct some Rasters and compute.
The input json file format is as below:
{
"inputs" : [
{
"id" : int,
"type" : "type_name", // float or int
"dims" : [int],
"data" : [int/float] // if null, fill with random number
}
],
"outputs" : [
// same with inputs
],
"regions" : [
{
"id" : int, // points to outputs
"size" : [int],
"src" : {
"offset" : int,
"stride" : [int]
},
"dst" : { // same with src },
"origin" : int // point to inputs
}
]
}
*/
static std::string runRaster(std::string jsonString, int runNum) {
srand(0);
rapidjson::Document document;
document.Parse(jsonString.c_str());
if (document.HasParseError()) {
MNN_ERROR("Invalid Json Format!\n");
return 0;
}
// prepare CPU backend
ScheduleConfig config;
config.type = MNN_FORWARD_CPU;
BackendConfig backendConfig;
backendConfig.precision = BackendConfig::Precision_High;
config.backendConfig = &backendConfig;
Backend::Info compute;
compute.type = config.type;
compute.numThread = config.numThread;
compute.user = config.backendConfig;
const RuntimeCreator* runtimeCreator(MNNGetExtraRuntimeCreator(compute.type));
std::unique_ptr<Runtime> runtime(runtimeCreator->onCreate(compute));
std::unique_ptr<Backend> backend(runtime->onCreate());
// build Op
std::unique_ptr<OpT> opt(new OpT);
opt->type = OpType_Raster;
flatbuffers::FlatBufferBuilder builder(1024);
builder.ForceDefaults(true);
auto len = Op::Pack(builder, opt.get());
builder.Finish(len);
auto buffer = builder.GetBufferPointer();
const Op* op = flatbuffers::GetMutableRoot<Op>(buffer);
// build tensors (NCHW) from json
std::vector<std::unique_ptr<Tensor>> inputs;
std::vector<std::unique_ptr<Tensor>> outputs;
auto readTensors = [&document, &backend](std::vector<std::unique_ptr<Tensor>>& tensors, const char* type) {
if (document.HasMember(type)) {
auto info = document[type].GetArray();
tensors.resize(info.Size());
for (auto iter = info.begin(); iter != info.end(); iter++) {
auto obj = iter->GetObject();
int id = obj["id"].GetInt();
tensors[id].reset(new Tensor(4));
auto tensor = tensors[id].get();
auto dataType = obj["type"].GetString();
bool isFloat = !strcmp(dataType, "float");
tensor->setType(isFloat ? DataType_DT_FLOAT : DataType_DT_INT32);
auto dims = obj["dims"].GetArray();
for (auto d = dims.begin(); d != dims.end(); d++) {
tensor->setLength(d - dims.begin(), d->GetInt());
}
TensorUtils::setLinearLayout(tensor);
backend->onAcquireBuffer(tensor, Backend::STATIC);
TensorUtils::getDescribe(tensor)->backend = backend.get();
auto data = obj["data"].GetArray();
if (!strcmp(type, "inputs")) {
bool hasData = data.Size() == tensor->elementSize();
auto dataIter = data.begin();
for (int i = 0; i < tensor->elementSize(); i++, dataIter++) {
if (isFloat) {
tensor->host<float>()[i] = hasData ? dataIter->GetFloat() : rand() % 10 / 10.0;
} else {
tensor->host<int>()[i] = hasData ? dataIter->GetInt() : rand() % 10;
}
}
}
}
}
};
readTensors(inputs, "inputs");
readTensors(outputs, "outputs");
// build middle tensors' region info from json
std::vector<std::unique_ptr<Tensor>> middles;
middles.resize(outputs.size());
if (document.HasMember("regions")) {
auto info = document["regions"].GetArray();
for (auto iter = info.begin(); iter != info.end(); iter++) {
auto obj = iter->GetObject();
int id = obj["id"].GetInt();
if (middles[id] == nullptr) {
middles[id].reset(new Tensor(4));
}
auto des = TensorUtils::getDescribe(middles[id].get());
des->memoryType = MNN::Tensor::InsideDescribe::MEMORY_VIRTUAL;
Tensor::InsideDescribe::Region region;
int origin = obj["origin"].GetInt();
region.origin = inputs[origin].get();
auto size = obj["size"].GetArray();
auto src = obj["src"].GetObject();
auto dst = obj["dst"].GetObject();
auto srcStride = src["stride"].GetArray();
auto dstStride = dst["stride"].GetArray();
for (int i = 0; i < 3; i++) {
region.size[i] = size[i].GetInt();
region.src.stride[i] = srcStride[i].GetInt();
region.dst.stride[i] = dstStride[i].GetInt();
}
region.src.offset = src["offset"].GetInt();
region.dst.offset = dst["offset"].GetInt();
des->regions.push_back(region);
}
}
// build execution of Raster and run them
for (int i = 0; i < outputs.size(); i++) {
std::vector<Tensor*> ins = {middles[i].get()}, outs = {outputs[i].get()};
std::unique_ptr<Execution> exe(backend->onCreate(ins, outs, op));
exe->onResize(ins, outs);
auto t1 = std::chrono::high_resolution_clock::now();
for (int j = 0; j < runNum; j++) {
exe->onExecute(ins, outs);
}
auto t2 = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
double time = time_span.count() * 1000.0 / runNum;
printf("For output_id = %d, run %d times, the average time is %f ms.\n", i, runNum, time);
}
auto writeTensors = [&document](std::vector<std::unique_ptr<Tensor>>& tensors, const char* type) {
auto info = document[type].GetArray();
for (auto iter = info.begin(); iter != info.end(); iter++) {
auto obj = iter->GetObject();
int id = obj["id"].GetInt();
auto data = obj["data"].GetArray();
if (data.Size() == tensors[id]->elementSize()) {
// has data, dont write
return;
}
bool isFloat = !strcmp(obj["type"].GetString(), "float");
data.Reserve(tensors[id]->elementSize(), document.GetAllocator());
for (int i = 0; i < tensors[id]->elementSize(); i++) {
if (isFloat) {
data.PushBack(tensors[id]->host<float>()[i], document.GetAllocator());
} else {
data.PushBack(tensors[id]->host<int>()[i], document.GetAllocator());
}
}
}
};
writeTensors(inputs, "inputs");
writeTensors(outputs, "outputs");
rapidjson::StringBuffer stringBuffer;
rapidjson::Writer<rapidjson::StringBuffer> writer(stringBuffer);
document.Accept(writer);
return stringBuffer.GetString();
}
int main(int argc, const char* argv[]) {
if (argc < 2) {
printf("Usage: ./rasterDemo.out input.json [output.json] [runNum]\ndefault output is input, and default runNum is 100.\n");
return 0;
}
const char* inputFile = argv[1];
const char* outputFile = argv[1];
int runNum = 100;
if (argc >= 3) {
outputFile = argv[2];
}
if (argc >= 4) {
runNum = ::atoi(argv[3]);
}
std::ifstream in(inputFile);
if (in.fail()) {
printf("Invalid input Json File!\n");
return 0;
}
std::ofstream out(outputFile);
if (out.fail()) {
printf("Invalid output Json File!\n");
return 0;
}
std::stringstream ss;
ss << in.rdbuf();
out << runRaster(ss.str(), runNum);
out.close();
printf("Run Raster Done!\n");
return 0;
}

View File

@ -0,0 +1,60 @@
#include <MNN/expr/Module.hpp>
#define MNN_OPEN_TIME_TRACE
#include <MNN/AutoTime.hpp>
#include <MNN/expr/ExprCreator.hpp>
#include <MNN/expr/Executor.hpp>
#include <fstream>
#include <sstream>
#include <stdio.h>
#include<string.h>
using namespace MNN::Express;
using namespace MNN;
using namespace std;
int main(int argc, const char* argv[]) {
if (argc < 2) {
MNN_ERROR("Don't has model name\n");
return 0;
}
BackendConfig config;
//Executor::getGlobalExecutor()->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 4);
auto modelName = argv[1];
std::shared_ptr<Module> model;
model.reset(Module::load({"NmtModel/Placeholder", "NmtModel/Placeholder_1"}, {"NmtModel/transpose_2"}, modelName));
std::vector<int> input0 = {32,16,234,3215,61,135,29,10,24317,4661,4,0};
std::vector<int> input1 = {1,1,1,1,1,1,1,1,1,1,1,1};
auto first = _Input({1, (int)input0.size()}, NHWC, halide_type_of<int>());
::memcpy(first->writeMap<int>(), input0.data(), input0.size() * sizeof(int));
auto second = _Input({1, (int)input1.size()}, NHWC, halide_type_of<int>());
::memcpy(second->writeMap<int>(), input1.data(), input1.size() * sizeof(int));
std::vector<VARP> outputs;
for (int i = 0; i < 2; ++i) {
{
AUTOTIME;
Executor::getGlobalExecutor()->resetProfile();
outputs = model->onForward({first, second});
Executor::getGlobalExecutor()->dumpProfile();
}
std::ostringstream fileNameOs;
std::ostringstream dimInfo;
fileNameOs << i << "_output.txt";
auto info = outputs[0]->getInfo();
for (int d=0; d<info->dim.size(); ++d) {
dimInfo << info->dim[d] << "_";
}
auto fileName = fileNameOs.str();
MNN_PRINT("Output Name: %s, Dim: %s\n", fileName.c_str(), dimInfo.str().c_str());
auto ptr = outputs[0]->readMap<int>();
std::ofstream outputOs(fileName.c_str());
for (int i=0; i<info->size; ++i) {
outputOs << ptr[i] << "\n";
}
}
for (int i = 0; i < 10; ++i) {
AUTOTIME;
outputs = model->onForward({first, second});
}
return 0;
}

View File

@ -53,27 +53,23 @@ static int CompareElements(const LabeledElement *a, const LabeledElement *b) {
if (!_net || !_session) { if (!_net || !_session) {
return nil; return nil;
} }
MNN::Tensor *output = _net->getSessionOutput(_session, nullptr);
MNN::Tensor copy(output);
auto input = _net->getSessionInput(_session, nullptr);
MNN::Tensor tensorCache(input);
input->copyToHostTensor(&tensorCache);
// run // run
NSTimeInterval begin = NSDate.timeIntervalSinceReferenceDate; NSTimeInterval begin = NSDate.timeIntervalSinceReferenceDate;
// you should set input data for each inference // you should set input data for each inference
if (cycles == 1) { for (int i = 0; i < cycles; i++) {
input->copyFromHostTensor(&tensorCache);
_net->runSession(_session); _net->runSession(_session);
} else { output->copyToHostTensor(&copy);
auto input = _net->getSessionInput(_session, nullptr);
MNN::Tensor tensorCache(input);
input->copyToHostTensor(&tensorCache);
for (int i = 0; i < cycles; i++) {
input->copyFromHostTensor(&tensorCache);
_net->runSession(_session);
}
} }
NSTimeInterval cost = NSDate.timeIntervalSinceReferenceDate - begin; NSTimeInterval cost = NSDate.timeIntervalSinceReferenceDate - begin;
// result // result
MNN::Tensor *output = _net->getSessionOutput(_session, nullptr);
MNN::Tensor copy(output);
output->copyToHostTensor(&copy);
float *data = copy.host<float>(); float *data = copy.host<float>();
LabeledElement objects[1000]; LabeledElement objects[1000];
for (int i = 0; i < 1000; i++) { for (int i = 0; i < 1000; i++) {

View File

@ -1,14 +1,21 @@
file(GLOB_RECURSE MNN_EXPR_SRCS "${CMAKE_CURRENT_LIST_DIR}/*.cpp") file(GLOB_RECURSE MNN_EXPR_SRCS "${CMAKE_CURRENT_LIST_DIR}/*.*")
option(MNN_EXPR_ENABLE_PROFILER "Support profile Expr's op cost" OFF) option(MNN_EXPR_ENABLE_PROFILER "Support profile Expr's op cost" OFF)
option(MNN_EXPR_SHAPE_EAGER "Force compute Expr's shape directly cost" OFF)
IF (MNN_EXPR_ENABLE_PROFILER) IF (MNN_EXPR_ENABLE_PROFILER)
add_definitions(-DMNN_EXPR_ENABLE_PROFILER) add_definitions(-DMNN_EXPR_ENABLE_PROFILER)
ENDIF() ENDIF()
IF (MNN_EXPR_SHAPE_EAGER)
add_definitions(-DMNN_EXPR_SHAPE_EAGER)
ENDIF()
IF(MNN_SEP_BUILD) IF(MNN_SEP_BUILD)
if (MNN_BUILD_FOR_ANDROID_COMMAND) if (MNN_BUILD_FOR_ANDROID_COMMAND)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../") set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../")
endif() endif()
add_library(MNN_Express SHARED ${MNN_EXPR_SRCS}) add_library(MNN_Express SHARED ${MNN_EXPR_SRCS})
target_link_libraries(MNN_Express MNN) target_link_libraries(MNN_Express MNN)
if (MNN_BUILD_MINI)
target_link_libraries(MNN_Express $<TARGET_OBJECTS:MNNTransform>)
endif()
ELSE() ELSE()
add_library(MNNExpress OBJECT ${MNN_EXPR_SRCS}) add_library(MNNExpress OBJECT ${MNN_EXPR_SRCS})
ENDIF() ENDIF()

30
express/Distributions.cpp Normal file
View File

@ -0,0 +1,30 @@
//
// Distributions.cpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "Distributions.hpp"
#include <cmath>
namespace MNN {
namespace Express {
void Distributions::uniform(const int count, const float min, const float max, float *r, std::mt19937 gen) {
std::uniform_real_distribution<float> dis(min, std::nextafter(max, std::numeric_limits<float>::max()));
for (int i = 0; i < count; i++) {
r[i] = dis(gen);
}
}
void Distributions::gaussian(const int count, const float mu, const float sigma, float *r, std::mt19937 gen) {
std::normal_distribution<float> dis(mu, sigma);
for (int i = 0; i < count; i++) {
r[i] = dis(gen);
}
}
} // namespace Express
} // namespace MNN

27
express/Distributions.hpp Normal file
View File

@ -0,0 +1,27 @@
//
// Distributions.hpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef Distributions_hpp
#define Distributions_hpp
#include <MNN/MNNDefine.h>
#include <random>
namespace MNN {
namespace Express {
class Distributions {
public:
static void uniform(const int count, const float min, const float max, float* r, std::mt19937 gen);
static void gaussian(const int count, const float mu, const float sigma, float* r, std::mt19937 gen);
};
} // namespace Express
} // namespace MNN
#endif // Distritutions_hpp

File diff suppressed because it is too large Load Diff

45
express/ExecutorScope.cpp Normal file
View File

@ -0,0 +1,45 @@
//
// ExecutorScope.cpp
// MNN
//
// Created by MNN on 2020/10/26.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <thread>
#include <MNN/expr/Executor.hpp>
#include <MNN/expr/Scope.hpp>
#include <MNN/expr/ExecutorScope.hpp>
namespace MNN {
namespace Express {
typedef std::shared_ptr<Express::Executor> ExecutorRef;
#if !defined(__APPLE__)
thread_local static Scope<ExecutorRef> g_executor_scope;
#else
static Scope<ExecutorRef> g_executor_scope;
#endif
ExecutorScope::ExecutorScope(const std::shared_ptr<Executor>& current) {
g_executor_scope.EnterScope(current);
}
ExecutorScope::ExecutorScope(const std::string& scope_name,
const std::shared_ptr<Executor>& current) {
g_executor_scope.EnterScope(scope_name, current);
}
ExecutorScope::~ExecutorScope() {
g_executor_scope.ExitScope();
}
const std::shared_ptr<Executor> ExecutorScope::Current() {
if (g_executor_scope.ScopedLevel() > 0) {
return g_executor_scope.Current().content;
}
return Executor::getGlobalExecutor();
}
} // namespace Express
} // namespace MNN

View File

@ -8,23 +8,33 @@
#define FLATBUFFERS_PREFER_PRINTF #define FLATBUFFERS_PREFER_PRINTF
#include <MNN/expr/Expr.hpp> #include <MNN/expr/Expr.hpp>
#include <MNN/expr/Executor.hpp>
#include <MNN/expr/ExprCreator.hpp> #include <MNN/expr/ExprCreator.hpp>
#include <map> #include <map>
#include "core/MNNMemoryUtils.h"
#include "Utils.hpp" #include "Utils.hpp"
#include <map>
#include "core/FileLoader.hpp" #include "core/FileLoader.hpp"
#include <MNN/expr/Executor.hpp> #include "core/TensorUtils.hpp"
#include "MNN_generated.h" #include "MNN_generated.h"
//#define MNN_OPEN_TIME_TRACE //#define MNN_OPEN_TIME_TRACE
#include "MNN/AutoTime.hpp" #include "MNN/AutoTime.hpp"
#include "MNN/expr/ExecutorScope.hpp"
//#define MNN_EXPRESS_ERROR_REPORT
static inline std::string numberToString(int index) { static inline std::string numberToString(int index) {
char s[10]; char s[10];
snprintf(s, 10, "%d", index); snprintf(s, 10, "%d", index);
return std::string(s); return std::string(s);
} }
static bool HasUnknownDim(const std::vector<int>& dims) {
for (const int& dim : dims) {
if (dim < 0) {
return true;
}
}
return false;
}
namespace MNN { namespace MNN {
namespace Express { namespace Express {
void Variable::Info::syncSize() { void Variable::Info::syncSize() {
@ -87,8 +97,7 @@ bool VARP::fix(VARP::InputType type) const {
} }
Expr::Expr(int outputSize) { Expr::Expr(int outputSize) {
mInside.reset(new Inside); mInside.reset(new Inside(outputSize));
mInside->mOutputInfos.resize(outputSize);
mOutputNames.resize(outputSize); mOutputNames.resize(outputSize);
} }
@ -117,27 +126,46 @@ void Expr::_addLinkForInputs(EXPRP expr) {
} }
} }
} }
EXPRP Expr::create(Variable::Info&& info) { EXPRP Expr::create(Variable::Info&& info, const void* ptr, VARP::InputType type, bool copy) {
EXPRP expr(new Expr(1)); EXPRP expr(new Expr(1));
expr->mOp = nullptr; expr->mOp = nullptr;
auto originPtr = info.ptr; auto originPtr = ptr;
expr->mInside->mOutputInfos[0] = std::move(info); expr->mInside->mOutputInfos[0] = std::move(info);
auto& dstInfo = expr->mInside->mOutputInfos[0]; auto& dstInfo = expr->mInside->mOutputInfos[0];
expr->mInside->mInfoDirty = false;
dstInfo.syncSize(); dstInfo.syncSize();
if (dstInfo.size > 0) { Utils::copyInfoToTensor(expr->mInside->mOutputTensors[0], expr->mInside->mOutputInfos.data());
expr->mExtraBuffer.reset(new char[dstInfo.size * dstInfo.type.bytes()], std::default_delete<char[]>()); expr->mType = type;
expr->mInside->mOutputInfos[0].ptr = expr->mExtraBuffer.get(); if (type == VARP::CONSTANT) {
expr->mInside->mInfoDirty = false; TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::CONSTANT;
} else if (type == VARP::INPUT) {
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::INPUT;
} else { } else {
expr->mInside->mOutputInfos[0].ptr = nullptr; // VARP::TRAINABLE
expr->mInside->mInfoDirty = true; TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::TRAINABLE;
}
if (dstInfo.size > 0 && copy) {
auto res = Utils::allocMemoryForHostTensor(expr->mInside->mOutputTensors[0]);
if (!res) {
MNN_ASSERT(false);
return nullptr;
}
} else {
expr->mInside->mOutputTensors[0]->buffer().host = nullptr;
} }
if (nullptr == originPtr) { if (nullptr == originPtr) {
expr->mType = VARP::INPUT; if (type == VARP::INPUT && dstInfo.size > 0) {
expr->mInside->mContentDirty = true;
}
return expr; return expr;
} }
expr->mType = VARP::CONSTANT; expr->mInside->mContentDirty = false;
::memcpy(expr->mInside->mOutputInfos[0].ptr, originPtr, dstInfo.size * dstInfo.type.bytes()); if (copy) {
::memcpy(expr->mInside->mOutputTensors[0]->buffer().host, originPtr, dstInfo.size * dstInfo.type.bytes());
} else {
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->memoryType = Tensor::InsideDescribe::MEMORY_OUTSIDE;
expr->mInside->mOutputTensors[0]->buffer().host = (uint8_t*)originPtr;
}
return expr; return expr;
} }
EXPRP Expr::create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize) { EXPRP Expr::create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize) {
@ -147,8 +175,7 @@ EXPRP Expr::create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP
expr->mOp = flatbuffers::GetMutableRoot<Op>(extra.first.get()); expr->mOp = flatbuffers::GetMutableRoot<Op>(extra.first.get());
expr->mOpBufferSize = extra.second; expr->mOpBufferSize = extra.second;
expr->mInputs = std::move(inputs); expr->mInputs = std::move(inputs);
expr->mInside->mInputInfos.resize(expr->mInputs.size()); expr->mInside->mReq = ExecutorScope::Current()->getRequirement(expr.get());
expr->mInside->mReq = Executor::getGlobalExecutor()->getRequirement(expr.get());
_addLinkForInputs(expr); _addLinkForInputs(expr);
return expr; return expr;
} }
@ -161,34 +188,34 @@ EXPRP Expr::create(const OpT* op, std::vector<VARP> inputs, int outputSize) {
info.dim[0] = 1; info.dim[0] = 1;
} }
info.order = Utils::revertFormat(op->main.AsInput()->dformat); info.order = Utils::revertFormat(op->main.AsInput()->dformat);
info.ptr = nullptr;
info.type = Utils::revertDataType(op->main.AsInput()->dtype); info.type = Utils::revertDataType(op->main.AsInput()->dtype);
return create(std::move(info)); return create(std::move(info), nullptr, VARP::INPUT);
} }
if (OpType_Const == op->type || OpType_TrainableParam == op->type) { if (OpType_Const == op->type || OpType_TrainableParam == op->type) {
Variable::Info info; Variable::Info info;
info.dim = op->main.AsBlob()->dims; info.dim = op->main.AsBlob()->dims;
info.order = Utils::revertFormat(op->main.AsBlob()->dataFormat); info.order = Utils::revertFormat(op->main.AsBlob()->dataFormat);
info.ptr = nullptr; void* ptr = nullptr;
info.type = Utils::revertDataType(op->main.AsBlob()->dataType); info.type = Utils::revertDataType(op->main.AsBlob()->dataType);
switch (op->main.AsBlob()->dataType) { switch (op->main.AsBlob()->dataType) {
case DataType_DT_INT8: case DataType_DT_INT8:
info.ptr = (void*)op->main.AsBlob()->int8s.data(); ptr = (void*)op->main.AsBlob()->int8s.data();
break; break;
case DataType_DT_INT32: case DataType_DT_INT32:
info.ptr = (void*)op->main.AsBlob()->int32s.data(); ptr = (void*)op->main.AsBlob()->int32s.data();
break; break;
case DataType_DT_UINT8: case DataType_DT_UINT8:
info.ptr = (void*)op->main.AsBlob()->uint8s.data(); ptr = (void*)op->main.AsBlob()->uint8s.data();
break; break;
case DataType_DT_FLOAT: case DataType_DT_FLOAT:
info.ptr = (void*)op->main.AsBlob()->float32s.data(); ptr = (void*)op->main.AsBlob()->float32s.data();
break; break;
default: default:
break; break;
} }
auto expr = create(std::move(info)); //MNN_ASSERT(nullptr != ptr);
if (OpType_TrainableParam == op->type) { auto expr = create(std::move(info), ptr, VARP::CONSTANT);
if (OpType_TrainableParam == op->type && nullptr != ptr) {
expr->mType = VARP::TRAINABLE; expr->mType = VARP::TRAINABLE;
} }
return expr; return expr;
@ -213,7 +240,7 @@ bool Expr::requireInfo() {
return false; return false;
} }
if (nullptr == mOp) { if (nullptr == mOp) {
return mInside->mOutputInfos[0].size > 0; return !HasUnknownDim(mInside->mOutputInfos[0].dim);
} }
bool ready = true; bool ready = true;
for (int i = 0; i < mInputs.size(); ++i) { for (int i = 0; i < mInputs.size(); ++i) {
@ -221,8 +248,8 @@ bool Expr::requireInfo() {
// The Variable is set nullptr by api // The Variable is set nullptr by api
return false; return false;
} }
mInside->mInputInfos[i] = mInputs[i]->getInfo(); auto inputInfo = mInputs[i]->getInfo();
if (nullptr == mInside->mInputInfos[i] && (!mInside->mReq.supportError[i])) { if (nullptr == inputInfo) {
#ifdef MNN_EXPRESS_ERROR_REPORT #ifdef MNN_EXPRESS_ERROR_REPORT
MNN_ERROR("%s, %d input not ready\n", mName.c_str(), i); MNN_ERROR("%s, %d input not ready\n", mName.c_str(), i);
#endif #endif
@ -233,15 +260,19 @@ bool Expr::requireInfo() {
for (int i = 0; i < mInputs.size(); ++i) { for (int i = 0; i < mInputs.size(); ++i) {
auto& v = mInputs[i]; auto& v = mInputs[i];
if (mInside->mReq.shapeNeedContent[i]) { if (mInside->mReq.shapeNeedContent[i]) {
// `readInternal` maybe return nullptr if element count is 0. // For shape need content, the content must not be nullptr
v->readInternal(true); auto ptr = v->readInternal(true);
if (nullptr == ptr) {
ready = false;
break;
}
} }
} }
if (!ready) { if (!ready) {
return false; return false;
} }
//MNN_PRINT("Info %s, %p Start\n", mName.c_str(), this); //MNN_PRINT("Info %s, %p Start\n", mName.c_str(), this);
auto res = Executor::getGlobalExecutor()->computeInfo(this); auto res = ExecutorScope::Current()->computeInfo(this);
//MNN_PRINT("Info Compute %s\n", mName.c_str()); //MNN_PRINT("Info Compute %s\n", mName.c_str());
if (NO_ERROR == res) { if (NO_ERROR == res) {
@ -261,6 +292,14 @@ const std::vector<WeakEXPRP>& Variable::toExprs() const {
VARP Variable::create(EXPRP expr, int index) { VARP Variable::create(EXPRP expr, int index) {
VARP res(new Variable(expr, index)); VARP res(new Variable(expr, index));
#ifdef MNN_EXPR_SHAPE_EAGER
auto info = expr->requireInfo();
if (!info) {
#ifdef MNN_EXPRESS_ERROR_REPORT
MNN_ERROR("Can't compute shape\n");
#endif
}
#endif
return res; return res;
} }
void Expr::replace(EXPRP old, EXPRP from) { void Expr::replace(EXPRP old, EXPRP from) {
@ -307,16 +346,22 @@ void Expr::replace(EXPRP old, EXPRP from) {
old->mValid = from->mValid; old->mValid = from->mValid;
old->mInside = from->mInside; old->mInside = from->mInside;
old->mInputs = from->mInputs; old->mInputs = from->mInputs;
std::vector<Expr*> visited;
old->visitOutputs([&](EXPRP expr, int index) { old->visitOutputs([&](EXPRP expr, int index) {
if (expr->mInside->mInfoDirty && expr->mValid && !expr->mInside->mLinkCache) { if (expr->visited()) {
return false; return false;
} }
visited.emplace_back(expr.get());
expr->setVisited(true);
expr->mInside->mCache.reset(); expr->mInside->mCache.reset();
expr->mInside->mCacheOffset = 0; expr->mInside->mCacheOffset = 0;
expr->mValid = true; expr->mValid = true;
expr->mInside->mInfoDirty = true; expr->mInside->mInfoDirty = true;
return true; return true;
}); });
for (auto e : visited) {
e->setVisited(false);
}
} }
void Variable::setName(const std::string& name) { void Variable::setName(const std::string& name) {
@ -351,7 +396,7 @@ bool Variable::input(VARP src) {
info = tempInfo.get(); info = tempInfo.get();
} }
auto dstInfo = getInfo(); auto dstInfo = getInfo();
bool needChange = nullptr == dstInfo || info->order != dstInfo->order || info->dim.size() != dstInfo->dim.size(); bool needChange = nullptr == dstInfo || info->order != dstInfo->order || info->dim.size() != dstInfo->dim.size() || info->type != dstInfo->type;
if (!needChange) { if (!needChange) {
for (int i=0; i<info->dim.size(); ++i) { for (int i=0; i<info->dim.size(); ++i) {
if (dstInfo->dim[i] != info->dim[i]) { if (dstInfo->dim[i] != info->dim[i]) {
@ -362,22 +407,19 @@ bool Variable::input(VARP src) {
} }
if (!mFrom->mInside->mCache) { if (!mFrom->mInside->mCache) {
Executor::getGlobalExecutor()->makeCache({mFrom}, false); ExecutorScope::Current()->makeCache({mFrom}, false);
} }
if (needChange) { if (needChange) {
bool needAlloc = info->size * info->type.bytes() > mFrom->mInside->mOutputInfos[0].size * mFrom->mInside->mOutputInfos[0].type.bytes();
mFrom->mInside->mOutputInfos[0] = *info; mFrom->mInside->mOutputInfos[0] = *info;
if (needAlloc) { Utils::releaseMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
mFrom->mExtraBuffer.reset(new char[info->size * info->type.bytes()], std::default_delete<char[]>()); Utils::copyInfoToTensor(mFrom->inside()->mOutputTensors[0], mFrom->inside()->mOutputInfos.data());
} Utils::allocMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
mFrom->mInside->mOutputInfos[0].ptr = mFrom->mExtraBuffer.get();
mFrom->mInside->mCache->setShapeDirty(0, mFrom->outputInfo(0));
} }
if (info->size) { if (info->size) {
auto dstPtr = writeInternal(false); auto dstPtr = writeInternal(false);
auto srcPtr = src->readMap<void>(); auto srcPtr = src->readMap<void>();
if (nullptr == dstPtr || nullptr == srcPtr) { if (nullptr == dstPtr || nullptr == srcPtr) {
MNN_ERROR("Alloc memory error or compute src error in Variable::Input\n"); //MNN_ERROR("Alloc memory error or compute src error in Variable::Input\n");
return false; return false;
} }
::memcpy(dstPtr, srcPtr, info->size * info->type.bytes()); ::memcpy(dstPtr, srcPtr, info->size * info->type.bytes());
@ -387,7 +429,7 @@ bool Variable::input(VARP src) {
} else { } else {
informDirty(); informDirty();
} }
mFrom->mInside->mCache->setContentReady(); mFrom->mInside->mContentDirty = false;
return true; return true;
} }
@ -396,23 +438,44 @@ void Variable::replace(VARP dst, VARP src) {
dst->setExpr(nullptr, 0); dst->setExpr(nullptr, 0);
return; return;
} }
if (nullptr == dst) {
dst.mContent = src.mContent;
return;
}
if (src->mFrom.get() == dst->mFrom.get()) { if (src->mFrom.get() == dst->mFrom.get()) {
dst->mFromIndex = src->mFromIndex; dst->mFromIndex = src->mFromIndex;
return; return;
} }
if (src->mFrom->outputSize() != dst->mFrom->outputSize()) { if (src->mFrom->outputSize() != dst->mFrom->outputSize()) {
// Can't replace Expr, Just replace VARP // Can't replace Expr, Just replace VARP
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) { std::vector<Expr*> visited;
src->mFrom->mTo.emplace_back(expr); dst->mFrom->visitOutputs([src, dst, &visited](EXPRP expr, int index) {
return false; if (expr->visited()) {
}); return false;
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) { }
expr->setVisited(true);
visited.emplace_back(expr.get());
expr->mInside->mCache.reset(); expr->mInside->mCache.reset();
expr->mInside->mCacheOffset = 0; expr->mInside->mCacheOffset = 0;
expr->mValid = true; expr->mValid = true;
expr->mInside->mInfoDirty = true; expr->mInside->mInfoDirty = true;
expr->mInside->mContentDirty = true;
return true; return true;
}); });
for (auto v : visited) {
v->setVisited(false);
}
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
for (int i =0; i< expr->inputs().size(); ++i) {
auto input = expr->inputs()[i];
if (input == dst) {
expr->mInputs[i] = src;
}
}
src->mFrom->mTo.emplace_back(expr);
return false;
});
dst->mFrom = src->mFrom; dst->mFrom = src->mFrom;
dst->mFromIndex = src->mFromIndex; dst->mFromIndex = src->mFromIndex;
return; return;
@ -452,15 +515,19 @@ bool Variable::resize(INTS dims) {
} }
info.dim = dims; info.dim = dims;
info.syncSize(); info.syncSize();
mFrom->mExtraBuffer.reset(new char[info.size * info.type.bytes()], std::default_delete<char[]>()); Utils::copyInfoToTensor(mFrom->inside()->mOutputTensors[0], mFrom->inside()->mOutputInfos.data());
info.ptr = mFrom->mExtraBuffer.get(); Utils::releaseMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
if (0 >= info.size) {
mFrom->mValid = true; return false;
mFrom->mInside->mInputInfos.clear();
auto cache = mFrom->mInside->mCache;
if (nullptr != cache) {
cache->setShapeDirty(0, mFrom->outputInfo(0));
} }
bool res = Utils::allocMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
if (!res) {
return false;
}
mFrom->mValid = true;
mFrom->inside()->mInfoDirty = false;
mFrom->inside()->mContentDirty = true;
mFrom->visitOutputs([](EXPRP expr, int index) { return expr->setInfoDirty(); }); mFrom->visitOutputs([](EXPRP expr, int index) { return expr->setInfoDirty(); });
return true; return true;
} }
@ -478,11 +545,12 @@ void Expr::visit(EXPRP expr, const std::function<bool(EXPRP)>& before, const std
void* Variable::readInternal(bool forShape) { void* Variable::readInternal(bool forShape) {
if (nullptr == mFrom->get()) { if (nullptr == mFrom->get()) {
if (VARP::INPUT == mFrom->mType) { if (VARP::INPUT == mFrom->mType) {
if (nullptr == mFrom->mInside->mCache) { if (mFrom->mInside->mContentDirty) {
return nullptr; return nullptr;
} }
} }
return mFrom->outputInfo(mFromIndex)->ptr; //MNN_ASSERT(nullptr != mFrom->inside()->mOutputTensors[0]->buffer().host);
return mFrom->inside()->mOutputTensors[0]->buffer().host;
} }
auto res = mFrom->requireInfo(); auto res = mFrom->requireInfo();
if (false == res) { if (false == res) {
@ -490,21 +558,26 @@ void* Variable::readInternal(bool forShape) {
} }
auto cache = mFrom->inside()->mCache; auto cache = mFrom->inside()->mCache;
if (nullptr == cache) { if (nullptr == cache) {
Executor::getGlobalExecutor()->makeCache({mFrom}, forShape); ExecutorScope::Current()->makeCache({mFrom}, forShape);
cache = mFrom->inside()->mCache; cache = mFrom->inside()->mCache;
} }
if (nullptr == cache) { if (nullptr == cache) {
return nullptr; return nullptr;
} }
if (NO_ERROR != Executor::getGlobalExecutor()->runCache(cache)) { if (NO_ERROR != ExecutorScope::Current()->runCache(cache)) {
return nullptr; return nullptr;
} }
cache->syncOutput(mFrom->mInside->mCacheOffset + mFromIndex, mFrom->outputInfo(mFromIndex)); return Executor::mapOutput(cache.get(), mFrom->mInside->mCacheOffset + mFromIndex, mFrom->mInside->mOutputTensors[mFromIndex]);
return mFrom->outputInfo(mFromIndex)->ptr;
} }
void Variable::informDirty() { void Variable::informDirty() {
mFrom->visitOutputs([](EXPRP expr, int index) { std::vector<Expr*> visited;
mFrom->visitOutputs([&visited](EXPRP expr, int index) {
if (expr->visited()) {
return false;
}
visited.emplace_back(expr.get());
expr->setVisited(true);
if (expr->inside()->mReq.shapeNeedContent.empty()) { if (expr->inside()->mReq.shapeNeedContent.empty()) {
// Not init // Not init
return false; return false;
@ -514,28 +587,32 @@ void Variable::informDirty() {
expr->visitOutputs([](EXPRP e, int index) { return e->setInfoDirty(); }); expr->visitOutputs([](EXPRP e, int index) { return e->setInfoDirty(); });
return false; return false;
} }
if (expr->inside()->mContentDirty) {
return false;
}
expr->inside()->mContentDirty = true;
if (expr->inside()->mReq.contentNeedContent[index]) { if (expr->inside()->mReq.contentNeedContent[index]) {
if (expr->inside()->mCache != nullptr) { if (expr->inside()->mCache != nullptr) {
expr->inside()->mCache->setContentDirty(); Executor::setContentDirty(expr->inside()->mCache.get());
} }
return true; return true;
} }
return false; return false;
}); });
for (auto e : visited) {
e->setVisited(false);
}
} }
void Variable::prepareCompute(const std::vector<VARP>& vars, bool forceCpu) { void Variable::prepareCompute(const std::vector<VARP>& vars, bool forceCpu) {
std::vector<EXPRP> exprs; std::vector<EXPRP> exprs;
for (auto v : vars) { for (auto v : vars) {
if (v->expr().first->inside()->mCache == nullptr) { if (!v->expr().first->visited()) {
v->expr().first->inside()->mCache = nullptr;
v->expr().first->requireInfo(); v->expr().first->requireInfo();
v->expr().first->setVisited(true);
exprs.emplace_back(v->expr().first); exprs.emplace_back(v->expr().first);
} }
} }
Executor::getGlobalExecutor()->makeCache(std::move(exprs), forceCpu); for (auto v : vars) {
v->expr().first->setVisited(false);
}
ExecutorScope::Current()->makeCache(std::move(exprs), forceCpu);
} }
void* Variable::writeInternal(bool inform) { void* Variable::writeInternal(bool inform) {
@ -545,16 +622,8 @@ void* Variable::writeInternal(bool inform) {
if (inform) { if (inform) {
informDirty(); informDirty();
} }
auto cache = mFrom->mInside->mCache; mFrom->mInside->mContentDirty = false;
if (nullptr == cache) { return mFrom->inside()->mOutputTensors[0]->host<void>();
Executor::getGlobalExecutor()->makeCache({mFrom});
cache = mFrom->mInside->mCache;
}
if (nullptr == cache) {
return nullptr;
}
mFrom->mInside->mCache->setContentReady();
return mFrom->mInside->mOutputInfos[0].ptr;
} }
void Variable::unMap() { void Variable::unMap() {
@ -591,25 +660,30 @@ bool Expr::setInfoDirty() {
mInside->mContentDirty = true; mInside->mContentDirty = true;
mValid = true; mValid = true;
if (mInside->mCache != nullptr) { if (mInside->mCache != nullptr) {
mInside->mCache->setShapeDirty(0, nullptr); Executor::setShapeDirty(mInside->mCache.get());
}
for (auto o : mInside->mOutputTensors) {
Utils::releaseMemoryForHostTensor(o);
} }
return true; return true;
} }
std::vector<VARP> Variable::load(const char* fileName) { std::vector<VARP> Variable::load(const char* fileName) {
FileLoader loader(fileName);
if (!loader.valid()) {
MNN_ERROR("Error for open %s\n", fileName);
return {};
}
loader.read();
if (!loader.valid()) {
return {};
}
AutoStorage<uint8_t> buffer; AutoStorage<uint8_t> buffer;
loader.merge(buffer); {
if (buffer.get() == nullptr) { FileLoader loader(fileName);
return {}; if (!loader.valid()) {
MNN_ERROR("Error for open %s\n", fileName);
return {};
}
loader.read();
if (!loader.valid()) {
return {};
}
loader.merge(buffer);
if (buffer.get() == nullptr) {
return {};
}
} }
return load(buffer.get(), buffer.size()); return load(buffer.get(), buffer.size());
} }
@ -722,6 +796,7 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
} else { } else {
MNN_ASSERT(1 == expr->outputSize()); MNN_ASSERT(1 == expr->outputSize());
auto& info = expr->mInside->mOutputInfos[0]; auto& info = expr->mInside->mOutputInfos[0];
auto ptr = expr->mInside->mOutputTensors[0]->host<void>();
op.reset(new OpT); op.reset(new OpT);
if (expr->mType != VARP::INPUT) { if (expr->mType != VARP::INPUT) {
auto blob = new BlobT; auto blob = new BlobT;
@ -730,16 +805,20 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
if (info.type.code == halide_type_float) { if (info.type.code == halide_type_float) {
blob->dataType = DataType_DT_FLOAT; blob->dataType = DataType_DT_FLOAT;
blob->float32s.resize(info.size); blob->float32s.resize(info.size);
::memcpy(blob->float32s.data(), info.ptr, info.size * sizeof(float)); ::memcpy(blob->float32s.data(), ptr, info.size * sizeof(float));
} else if (info.type.code == halide_type_int) { } else if (info.type.code == halide_type_int && info.type.bits == 32) {
blob->dataType = DataType_DT_INT32; blob->dataType = DataType_DT_INT32;
blob->int32s.resize(info.size); blob->int32s.resize(info.size);
::memcpy(blob->int32s.data(), info.ptr, info.size * sizeof(int)); ::memcpy(blob->int32s.data(), ptr, info.size * sizeof(int));
} } else if (info.type.code == halide_type_int && info.type.bits == 8) {
else if (info.type.code == halide_type_uint && info.type.bits == 8) { blob->dataType = DataType_DT_INT8;
blob->int8s.resize(info.size);
auto pptr = (int8_t *)ptr;
::memcpy(blob->int8s.data(), ptr, info.size * sizeof(int8_t));
} else if (info.type.code == halide_type_uint && info.type.bits == 8) {
blob->dataType = DataType_DT_UINT8; blob->dataType = DataType_DT_UINT8;
blob->uint8s.resize(info.size); blob->uint8s.resize(info.size);
::memcpy(blob->uint8s.data(), info.ptr, info.size * sizeof(uint8_t)); ::memcpy(blob->uint8s.data(), ptr, info.size * sizeof(uint8_t));
} }
op->type = OpType_Const; op->type = OpType_Const;
if (expr->mType == VARP::TRAINABLE) { if (expr->mType == VARP::TRAINABLE) {
@ -781,12 +860,12 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
auto op = dest->oplists[index].get(); auto op = dest->oplists[index].get();
auto tensorIndexOffset = varIndexInfo[expr]; auto tensorIndexOffset = varIndexInfo[expr];
for (int v=0; v<expr->outputSize(); ++v) { for (int v=0; v<expr->outputSize(); ++v) {
auto const tensorIndex = tensorIndexOffset + v; auto subindex = tensorIndexOffset + v;
if (dest->tensorName[tensorIndex].empty()) { if (dest->tensorName[subindex].empty()) {
if (v == 0) { if (v == 0) {
dest->tensorName[tensorIndex] = op->name; dest->tensorName[subindex] = op->name;
} else { } else {
dest->tensorName[tensorIndex] = op->name + numberToString(v); dest->tensorName[subindex] = op->name + numberToString(v);
} }
} }
} }

210
express/Initializer.cpp Normal file
View File

@ -0,0 +1,210 @@
//
// Initializer.cpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "Initializer.hpp"
#include <MNN/expr/ExprCreator.hpp>
#include <cmath>
#include <vector>
#include "Distributions.hpp"
#include "RandomGenerator.hpp"
namespace MNN {
namespace Express {
Express::VARP Initializer::createConstVar(Express::INTS dim, Express::Dimensionformat format) {
auto res = Express::_Input(dim, format, halide_type_of<float>());
this->onExecute(res);
res.fix(Express::VARP::CONSTANT);
return res;
}
class ConstantInitializer : public Initializer {
public:
ConstantInitializer(float value) : mConstant(value) {
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
auto ptr = p->writeMap<float>();
for (int i = 0; i < count; i++) {
ptr[i] = mConstant;
}
}
private:
float mConstant;
};
Initializer* Initializer::constValue(float value) {
return new ConstantInitializer(value);
}
class UniformInitializer : public Initializer {
public:
UniformInitializer(float min = 0, float max = 1) {
mMin = min;
mMax = max;
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
Distributions::uniform(count, mMin, mMax, p->writeMap<float>(), RandomGenerator::generator());
}
private:
float mMin;
float mMax;
};
Initializer* Initializer::uniform(float minValue, float maxValue) {
return new UniformInitializer(minValue, maxValue);
}
class XavierInitializer : public Initializer {
public:
XavierInitializer(VarianceNorm norm = FANIN) {
mNorm = norm;
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
const std::vector<int> dims = p->getInfo()->dim;
// referenced from Caffe
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
int fanIn = count / dims[0];
int fanOut = dims.size() > 1 ? count / dims[1] : count;
float n = fanIn; // default: FANIN
if (mNorm == VarianceNorm::AVERAGE) {
n = (fanIn + fanOut) / 2.0f;
} else if (mNorm == VarianceNorm::FANOUT) {
n = fanOut;
}
float scale = sqrtf(3.0f / n);
Distributions::uniform(count, -scale, scale, p->writeMap<float>(), RandomGenerator::generator());
}
private:
VarianceNorm mNorm;
};
Initializer* Initializer::xavier(VarianceNorm norm) {
return new XavierInitializer(norm);
}
class GaussianInitializer : public Initializer {
public:
GaussianInitializer(float mean = 0, float std = 1) {
mMean = mean;
mStd = std;
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
Distributions::gaussian(count, mMean, mStd, p->writeMap<float>(), RandomGenerator::generator());
}
private:
float mMean;
float mStd;
};
Initializer* Initializer::gauss(float mean, float std) {
return new GaussianInitializer(mean, std);
}
class MSRAInitializer : public Initializer {
public:
MSRAInitializer(VarianceNorm norm = FANIN) {
mNorm = norm;
}
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
const std::vector<int> dims = p->getInfo()->dim;
// referenced from Caffe
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
int fanIn = count / dims[0];
int fanOut = dims.size() > 1 ? count / dims[1] : count;
float n = fanIn; // default: FANIN
if (mNorm == VarianceNorm::AVERAGE) {
n = (fanIn + fanOut) / 2.0f;
} else if (mNorm == VarianceNorm::FANOUT) {
n = fanOut;
}
float std = sqrtf(2.0f / n);
Distributions::gaussian(count, 0.0f, std, p->writeMap<float>(), RandomGenerator::generator());
}
private:
VarianceNorm mNorm;
};
Initializer* Initializer::MSRA(VarianceNorm norm) {
return new MSRAInitializer(norm);
}
class BilinearInitializer : public Initializer {
public:
BilinearInitializer() = default;
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
const std::vector<int> dims = p->getInfo()->dim;
MNN_ASSERT(dims.size() == 4);
MNN_ASSERT(dims[2] == dims[3]); // NCHW, H == W
// referenced from Caffe
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
int f = ceilf(dims[3] / 2.0f);
float c = (dims[3] - 1) / (2.0f * f);
auto ptr = p->writeMap<float>();
for (int i = 0; i < count; i++) {
float x = i % dims[3];
float y = (i / dims[3]) % dims[2];
ptr[i] = (1 - std::fabs(x / f - c)) * (1 - std::fabs(y / f - c));
}
}
};
Initializer* Initializer::bilinear() {
return new BilinearInitializer();
}
class PositiveUnitball : public Initializer {
public:
PositiveUnitball() = default;
virtual void onExecute(Express::VARP p) override {
const int count = p->getInfo()->size;
MNN_ASSERT(count > 0);
const std::vector<int> dims = p->getInfo()->dim;
auto ptr = p->writeMap<float>();
Distributions::uniform(count, 0, 1, ptr, RandomGenerator::generator());
int dim = count / dims[0];
for (int i = 0; i < dims[0]; i++) {
float sum = 0;
for (int j = 0; j < dim; j++) {
sum += ptr[i * dim + j];
}
for (int j = 0; j < dim; j++) {
ptr[i * dim + j] = ptr[i * dim + j] / sum;
}
}
}
};
Initializer* Initializer::positiveUnitball() {
return new PositiveUnitball();
}
} // namespace Express
} // namespace MNN

43
express/Initializer.hpp Normal file
View File

@ -0,0 +1,43 @@
//
// Initializer.hpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef Initializer_hpp
#define Initializer_hpp
#include <MNN/expr/Expr.hpp>
namespace MNN {
namespace Express {
class RandomGenerator;
class MNN_PUBLIC Initializer {
public:
Initializer() = default;
virtual ~Initializer() = default;
Express::VARP createConstVar(Express::INTS dim, Express::Dimensionformat format = Express::NCHW);
virtual void onExecute(Express::VARP p) = 0;
static Initializer* constValue(float value);
static Initializer* uniform(float minValue = 0.0f, float maxValue = 1.0f);
enum VarianceNorm {
FANIN,
FANOUT,
AVERAGE,
};
static Initializer* xavier(VarianceNorm norm = FANIN);
static Initializer* gauss(float mean = 0.0f, float std = 1.0f);
static Initializer* MSRA(VarianceNorm norm = FANIN);
static Initializer* bilinear();
static Initializer* positiveUnitball();
};
} // namespace Express
} // namespace MNN
#endif // Initializer_hpp

View File

@ -30,7 +30,18 @@ static DataType _convertDataType(halide_type_t type) {
} }
return DataType_DT_INVALID; return DataType_DT_INVALID;
} }
static VARP _checkNC4HW4(VARP x) {
#ifdef MNN_EXPR_SHAPE_EAGER
auto info = x->getInfo();
if (nullptr != info && info->order == NC4HW4) {
return _Convert(x, NCHW);
}
#endif
return x;
}
static VARP _Binary(VARP x, VARP y, BinaryOpOperation operation) { static VARP _Binary(VARP x, VARP y, BinaryOpOperation operation) {
x = _checkNC4HW4(x);
y = _checkNC4HW4(y);
std::unique_ptr<OpT> op(new OpT); std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_BinaryOp; op->main.type = OpParameter_BinaryOp;
op->type = OpType_BinaryOp; op->type = OpType_BinaryOp;
@ -49,6 +60,7 @@ static VARP _Unary(VARP x, UnaryOpOperation operation) {
return (Variable::create(Expr::create(op.get(), {x}))); return (Variable::create(Expr::create(op.get(), {x})));
} }
static VARP _Reduce(VARP x, INTS dim, ReductionType type, bool keepDim) { static VARP _Reduce(VARP x, INTS dim, ReductionType type, bool keepDim) {
x = _checkNC4HW4(x);
std::unique_ptr<OpT> op(new OpT); std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_ReductionParam; op->main.type = OpParameter_ReductionParam;
op->type = OpType_Reduction; op->type = OpType_Reduction;
@ -60,6 +72,7 @@ static VARP _Reduce(VARP x, INTS dim, ReductionType type, bool keepDim) {
return (Variable::create(Expr::create(op.get(), {x}))); return (Variable::create(Expr::create(op.get(), {x})));
} }
static VARP _ReduceMutable(VARP x, VARP dim, ReductionType type, bool keepDim) { static VARP _ReduceMutable(VARP x, VARP dim, ReductionType type, bool keepDim) {
x = _checkNC4HW4(x);
std::unique_ptr<OpT> op(new OpT); std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_ReductionParam; op->main.type = OpParameter_ReductionParam;
op->type = OpType_Reduction; op->type = OpType_Reduction;
@ -955,6 +968,7 @@ Returns:
A variable of type int. A variable of type int.
*/ */
VARP _ArgMax(VARP input, int axis) { VARP _ArgMax(VARP input, int axis) {
input = _checkNC4HW4(input);
std::unique_ptr<OpT> op(new OpT); std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_ArgMax; op->main.type = OpParameter_ArgMax;
op->type = OpType_ArgMax; op->type = OpType_ArgMax;
@ -976,6 +990,7 @@ Returns:
A variable of type int. A variable of type int.
*/ */
VARP _ArgMin(VARP input, int axis) { VARP _ArgMin(VARP input, int axis) {
input = _checkNC4HW4(input);
std::unique_ptr<OpT> op(new OpT); std::unique_ptr<OpT> op(new OpT);
op->main.type = OpParameter_ArgMax; op->main.type = OpParameter_ArgMax;
op->type = OpType_ArgMin; op->type = OpType_ArgMin;

View File

@ -5,6 +5,7 @@
// Created by MNN on 2019/08/20. // Created by MNN on 2019/08/20.
// Copyright © 2018, Alibaba Group Holding Limited // Copyright © 2018, Alibaba Group Holding Limited
// //
#ifndef MergeOptimizer_hpp #ifndef MergeOptimizer_hpp
#define MergeOptimizer_hpp #define MergeOptimizer_hpp

View File

@ -54,16 +54,14 @@ VARP _Input(INTS shape, Dimensionformat data_format, halide_type_t dtype) {
info.dim = std::move(shape); info.dim = std::move(shape);
info.order = data_format; info.order = data_format;
info.type = dtype; info.type = dtype;
info.ptr = nullptr; return (Variable::create(Expr::create(std::move(info), nullptr, VARP::INPUT)));
return (Variable::create(Expr::create(std::move(info))));
} }
VARP _Scalar(const void* ptr, halide_type_t type) { VARP _Scalar(const void* ptr, halide_type_t type) {
Variable::Info info; Variable::Info info;
info.dim = {}; info.dim = {};
info.order = NHWC; info.order = NHWC;
info.type = type; info.type = type;
info.ptr = (void*)ptr; return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
return (Variable::create(Expr::create(std::move(info))));
} }
/*create a constant variable. /*create a constant variable.
Args: Args:
@ -79,8 +77,7 @@ VARP _Const(const void* ptr, INTS shape, Dimensionformat format, halide_type_t t
info.dim = std::move(shape); info.dim = std::move(shape);
info.order = format; info.order = format;
info.type = type; info.type = type;
info.ptr = (void*)ptr; return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
return (Variable::create(Expr::create(std::move(info))));
} }
VARP _Const(float value, INTS shape, Dimensionformat format) { VARP _Const(float value, INTS shape, Dimensionformat format) {
@ -93,8 +90,8 @@ VARP _Const(float value, INTS shape, Dimensionformat format) {
for (int i = 0; i < info.size; ++i) { for (int i = 0; i < info.size; ++i) {
values[i] = value; values[i] = value;
} }
info.ptr = (void*)values.data(); auto ptr = (void*)values.data();
return (Variable::create(Expr::create(std::move(info)))); return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
} }
VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format, halide_type_t type) { VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format, halide_type_t type) {
@ -107,6 +104,23 @@ VARP _TrainableParam(float value, INTS dims, Dimensionformat format) {
v.fix(VARP::TRAINABLE); v.fix(VARP::TRAINABLE);
return v; return v;
} }
VARP _InnerProduct(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS outputShape) {
std::unique_ptr<OpT> ipOp(new OpT);
ipOp->type = OpType_InnerProduct;
ipOp->main.type = OpParameter_InnerProduct;
ipOp->main.value = new InnerProductT;
auto ipParam = ipOp->main.AsInnerProduct();
ipParam->outputCount = outputShape[1];
if(!bias.empty()) {
ipParam->biasTerm = 1;
}
ipParam->weightSize = weight.size();
ipParam->weight = std::move(weight);
ipParam->bias = std::move(bias);
return (Variable::create(Expr::create(ipOp.get(), {x})));
}
VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads) { VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads) {
std::unique_ptr<OpT> convOp(new OpT); std::unique_ptr<OpT> convOp(new OpT);
@ -183,7 +197,7 @@ VARP _Conv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS
return (Variable::create(Expr::create(convOp.get(), {x}))); return (Variable::create(Expr::create(convOp.get(), {x})));
} }
VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize, VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6) { PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6, int nbits) {
std::unique_ptr<OpT> convOp(new OpT); std::unique_ptr<OpT> convOp(new OpT);
convOp->type = OpType_Convolution; convOp->type = OpType_Convolution;
if (channel[0] == channel[1] && channel[0] == group) { if (channel[0] == channel[1] && channel[0] == group) {
@ -285,6 +299,42 @@ VARP _Deconv(VARP weight, VARP bias, VARP x, PaddingMode pad, INTS stride, INTS
return (Variable::create(Expr::create(std::move(convOp), {x, weight}))); return (Variable::create(Expr::create(std::move(convOp), {x, weight})));
} }
VARP _Deconv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6) {
std::unique_ptr<OpT> convOp(new OpT);
convOp->type = OpType_Deconvolution;
if (channel[0] == channel[1] && channel[0] == group) {
convOp->type = OpType_DeconvolutionDepthwise;
}
convOp->main.type = OpParameter_Convolution2D;
convOp->main.value = new Convolution2DT;
auto conv2D = convOp->main.AsConvolution2D();
conv2D->common.reset(new Convolution2DCommonT);
conv2D->common->padMode = _convertPadMode(pad);
if (pads.size() == 2) {
conv2D->common->padX = pads[0];
conv2D->common->padY = pads[1];
} else {
conv2D->common->pads = std::move(pads);
}
conv2D->common->strideX = stride[0];
conv2D->common->strideY = stride[1];
conv2D->common->group = group;
conv2D->common->outputCount = channel[1];
conv2D->common->inputCount = channel[0];
conv2D->common->dilateX = dilate[0];
conv2D->common->dilateY = dilate[1];
conv2D->common->kernelX = kernelSize[0];
conv2D->common->kernelY = kernelSize[1];
conv2D->common->relu6 = relu6;
conv2D->common->relu = relu;
MNN_ASSERT(weight.size() == channel[1] * (channel[0] / group) * kernelSize[0] * kernelSize[1]);
conv2D->weight = std::move(weight);
MNN_ASSERT(bias.size() == channel[1]);
conv2D->bias = std::move(bias);
return (Variable::create(Expr::create(convOp.get(), {x})));
}
static VARP _Pool(VARP x, INTS kernel, INTS stride, PoolType type, PaddingMode pad, INTS pads) { static VARP _Pool(VARP x, INTS kernel, INTS stride, PoolType type, PaddingMode pad, INTS pads) {
std::unique_ptr<OpT> pool(new OpT); std::unique_ptr<OpT> pool(new OpT);
pool->type = OpType_Pooling; pool->type = OpType_Pooling;
@ -381,9 +431,13 @@ x: A variable.
Returns: Returns:
output: A variable with the same type as `x`. output: A variable with the same type as `x`.
*/ */
VARP _Relu6(VARP x) { VARP _Relu6(VARP x, float minValue, float maxValue) {
std::unique_ptr<OpT> relu(new OpT); std::unique_ptr<OpT> relu(new OpT);
relu->type = OpType_ReLU6; relu->type = OpType_ReLU6;
relu->main.value = new Relu6T;
relu->main.type = OpParameter_Relu6;
relu->main.AsRelu6()->maxValue = maxValue;
relu->main.AsRelu6()->minValue = minValue;
return (Variable::create(Expr::create(relu.get(), {x}))); return (Variable::create(Expr::create(relu.get(), {x})));
} }
/*Given an input value x, it computes the output as x if x > 0 and slopes * x if x <= 0. /*Given an input value x, it computes the output as x if x > 0 and slopes * x if x <= 0.
@ -746,9 +800,12 @@ input: A variable.
Returns: Returns:
A variable of Halide_Type_Int. A variable of Halide_Type_Int.
*/ */
VARP _Shape(VARP input) { VARP _Shape(VARP input, bool nchw) {
std::unique_ptr<OpT> shape(new OpT); std::unique_ptr<OpT> shape(new OpT);
shape->type = OpType_Shape; shape->type = OpType_Shape;
if (nchw) {
shape->defaultDimentionFormat = MNN_DATA_FORMAT_NCHW;
}
return (Variable::create(Expr::create(std::move(shape), {input}))); return (Variable::create(Expr::create(std::move(shape), {input})));
} }
/*Stacks a list of rank-R variables into one rank-(R+1) variable. /*Stacks a list of rank-R variables into one rank-(R+1) variable.
@ -906,6 +963,21 @@ VARP _Elu(VARP features, float alpha) {
op->main.value = eluParam; op->main.value = eluParam;
return (Variable::create(Expr::create(std::move(op), {features}))); return (Variable::create(Expr::create(std::move(op), {features})));
} }
/*Given an input value x, it computes the output as 1.0 if x > threshold and 0.0 if x <= threshold.
features: A variable of type Halide_Type_Float
threshold: threshold value
Returns:
A variable. Has the same type as features.
*/
VARP _Threshold(VARP features, float threshold) {
std::unique_ptr<OpT> op(new OpT);
op->type = OpType_Threshold;
auto eluParam = new ELUT;
op->main.type = OpParameter_ELU;
eluParam->alpha = threshold;
op->main.value = eluParam;
return (Variable::create(Expr::create(std::move(op), {features})));
}
/*Computes the size of the variable /*Computes the size of the variable
Args: Args:
input: A variable of type Halide_Type_Float or Halide_Type_Int input: A variable of type Halide_Type_Float or Halide_Type_Int
@ -1049,7 +1121,6 @@ std::vector<VARP> _Moments(VARP x, INTS axis, VARP shift, bool keepDims) {
op->main.type = OpParameter_MomentsParam; op->main.type = OpParameter_MomentsParam;
momentsParam->dim = axis; momentsParam->dim = axis;
momentsParam->keepDims = keepDims; momentsParam->keepDims = keepDims;
momentsParam->dType = (MNN::DataType)Utils::convertDataType(x->getInfo()->type);
op->main.value = momentsParam; op->main.value = momentsParam;
EXPRP expr = Expr::create(std::move(op), {x}, 2); EXPRP expr = Expr::create(std::move(op), {x}, 2);
std::vector<VARP> res; std::vector<VARP> res;
@ -1405,7 +1476,7 @@ VARP _ZeroGrad(VARP x) {
} }
VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize, VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu) { PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, int nbits) {
std::unique_ptr<OpT> convOp(new OpT); std::unique_ptr<OpT> convOp(new OpT);
convOp->type = OpType_ConvInt8; convOp->type = OpType_ConvInt8;
if (channel[0] == channel[1] && channel[0] == group) { if (channel[0] == channel[1] && channel[0] == group) {
@ -1433,9 +1504,16 @@ VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<fl
conv2D->symmetricQuan->bias = std::move(bias); conv2D->symmetricQuan->bias = std::move(bias);
conv2D->symmetricQuan->scale = std::move(scale); conv2D->symmetricQuan->scale = std::move(scale);
conv2D->symmetricQuan->weight = std::move(weight); conv2D->symmetricQuan->weight = std::move(weight);
conv2D->symmetricQuan->nbits = nbits;
return (Variable::create(Expr::create(convOp.get(), {x}))); return (Variable::create(Expr::create(convOp.get(), {x})));
} }
VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim) {
std::unique_ptr<MNN::OpT> cosineSimilarityOp(new MNN::OpT);
cosineSimilarityOp->type = MNN::OpType_CosineSimilarity;
return (Variable::create(Expr::create(std::move(cosineSimilarityOp), {input0, input1, inputDim})));
}
VARP _FloatToInt8(VARP x, VARP scale, char minValue/*For future*/, char maxValue/*For future*/) { VARP _FloatToInt8(VARP x, VARP scale, char minValue/*For future*/, char maxValue/*For future*/) {
auto xInfo = x->getInfo(); auto xInfo = x->getInfo();
auto scaleInfo = scale->getInfo(); auto scaleInfo = scale->getInfo();

View File

@ -22,28 +22,7 @@ Optimizer::Parameters::~Parameters() {
} }
} }
std::shared_ptr<Optimizer> Optimizer::create(Config config) { std::shared_ptr<Optimizer> Optimizer::create(Config config) {
const int numThread = config.numThread; // Do nothing
auto forwardType = config.forwardType;
if (forwardType != MNN_FORWARD_ALL) {
if (MNNGetExtraBackendCreator(forwardType) == nullptr) {
return nullptr;
}
return std::shared_ptr<Optimizer>(new MergeOptimizer(config.forwardType, numThread, nullptr));
}
auto device = config.device;
if (CPU == device) {
return std::shared_ptr<Optimizer>(new MergeOptimizer(MNN_FORWARD_CPU, numThread, nullptr));
}
if (GPU == device) {
std::vector<MNNForwardType> types {MNN_FORWARD_METAL, MNN_FORWARD_OPENCL, MNN_FORWARD_VULKAN, MNN_FORWARD_OPENGL};
for (auto type : types) {
auto creator = MNNGetExtraBackendCreator(type);
if (nullptr != creator) {
return std::shared_ptr<Optimizer>(new MergeOptimizer(type, numThread, nullptr));
}
}
}
return nullptr; return nullptr;
} }

View File

@ -0,0 +1,45 @@
//
// RandomGenerator.hpp
// MNN
//
// Created by MNN on 2019/11/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef RandomGenerator_hpp
#define RandomGenerator_hpp
#include <MNN/MNNDefine.h>
#include <random>
namespace MNN {
namespace Express {
class MNN_PUBLIC RandomGenerator {
private:
RandomGenerator(int seed = std::random_device()()) {
mSeed = seed;
mGenerator.seed(mSeed);
}
~RandomGenerator() = default;
RandomGenerator(RandomGenerator &);
RandomGenerator &operator=(const RandomGenerator &);
private:
int mSeed;
std::mt19937 mGenerator;
public:
static std::mt19937 &generator(int seed = std::random_device()()) {
static RandomGenerator rng(seed);
return rng.mGenerator;
}
};
} // namespace Express
} // namespace MNN
#endif // RandomGenerator_hpp

View File

@ -10,8 +10,24 @@
#include <map> #include <map>
#include "MNN_generated.h" #include "MNN_generated.h"
#include "core/TensorUtils.hpp" #include "core/TensorUtils.hpp"
#include "core/MNNMemoryUtils.h"
namespace MNN { namespace MNN {
namespace Express { namespace Express {
Expr::Inside::Inside(int outputSize) {
mOutputInfos.resize(outputSize);
mOutputTensors.resize(outputSize);
for (int i=0; i<outputSize; ++i) {
mOutputTensors[i] = new Tensor;
TensorUtils::getDescribe(mOutputTensors[i])->memoryType = Tensor::InsideDescribe::MEMORY_HOST;
}
}
Expr::Inside::~Inside() {
for (auto t : mOutputTensors) {
delete t;
}
}
#define CONVERT(src, dst, f)\ #define CONVERT(src, dst, f)\
if (f == src) return dst; if (f == src) return dst;
@ -61,7 +77,6 @@ void Utils::copyInfoToTensor(Tensor* dest, const Variable::Info* source) {
} }
dest->buffer().dimensions = (int)source->dim.size(); dest->buffer().dimensions = (int)source->dim.size();
dest->buffer().type = source->type; dest->buffer().type = source->type;
dest->buffer().host = (uint8_t*)source->ptr;
TensorUtils::getDescribe(dest)->dimensionFormat = (MNN_DATA_FORMAT)Utils::convertFormat(source->order); TensorUtils::getDescribe(dest)->dimensionFormat = (MNN_DATA_FORMAT)Utils::convertFormat(source->order);
TensorUtils::setLinearLayout(dest); TensorUtils::setLinearLayout(dest);
} }
@ -70,7 +85,31 @@ void Utils::copyTensorToInfo(Variable::Info* shape, const Tensor* tensor) {
shape->dim = tensor->shape(); shape->dim = tensor->shape();
shape->size = tensor->elementSize(); shape->size = tensor->elementSize();
shape->order = Utils::revertFormat(TensorUtils::getDescribe(tensor)->dimensionFormat); shape->order = Utils::revertFormat(TensorUtils::getDescribe(tensor)->dimensionFormat);
shape->ptr = tensor->host<float>(); }
bool Utils::allocMemoryForHostTensor(Tensor* dest) {
if (nullptr != dest->buffer().host) {
return true;
}
if (TensorUtils::getDescribe(dest)->memoryType != Tensor::InsideDescribe::MEMORY_HOST) {
return false;
}
auto size = dest->size();
if (0 >= size) {
return false;
}
dest->buffer().host = (uint8_t*)MNNMemoryAllocAlign(size, MNN_MEMORY_ALIGN_DEFAULT);
return dest->buffer().host != nullptr;
}
bool Utils::releaseMemoryForHostTensor(Tensor* dest) {
if (nullptr == dest->buffer().host) {
return true;
}
if (TensorUtils::getDescribe(dest)->memoryType != Tensor::InsideDescribe::MEMORY_HOST) {
return false;
}
MNNMemoryFreeAlign(dest->buffer().host);
dest->buffer().host = nullptr;
return true;
} }
} // namespace Express } // namespace Express

View File

@ -15,15 +15,16 @@
namespace MNN { namespace MNN {
namespace Express { namespace Express {
struct Expr::Inside { struct Expr::Inside {
std::vector<const Variable::Info*> mInputInfos; Inside(int outputSize);
~ Inside();
std::vector<Variable::Info> mOutputInfos; std::vector<Variable::Info> mOutputInfos;
std::vector<Tensor*> mOutputTensors;
Executor::Requirement mReq; Executor::Requirement mReq;
std::shared_ptr<Executor::ComputeCache::Unit> mUnit; std::shared_ptr<Executor::Unit> mUnit;
std::shared_ptr<Executor::ComputeCache> mCache; std::shared_ptr<Executor::ComputeCache> mCache;
int mCacheOffset = 0; int mCacheOffset = 0;
bool mInfoDirty = true; bool mInfoDirty = true;
bool mContentDirty = true; bool mContentDirty = true;
bool mLinkCache = false;
}; };
class Utils { class Utils {
public: public:
@ -33,6 +34,8 @@ public:
static int convertFormat(Dimensionformat format); static int convertFormat(Dimensionformat format);
static Express::Dimensionformat revertFormat(int format); static Express::Dimensionformat revertFormat(int format);
static halide_type_t revertDataType(DataType dataType); static halide_type_t revertDataType(DataType dataType);
static bool allocMemoryForHostTensor(Tensor* dest);
static bool releaseMemoryForHostTensor(Tensor* dest);
}; };
} // namespace Express } // namespace Express
} // namespace MNN } // namespace MNN

View File

@ -10,7 +10,7 @@
#include <MNN/expr/ExprCreator.hpp> #include <MNN/expr/ExprCreator.hpp>
using namespace MNN::Express; using namespace MNN::Express;
namespace MNN { namespace MNN {
namespace Train { namespace Express {
FixModule::FixModule(std::vector<Express::VARP> output, std::vector<Express::VARP> parameters, FixModule::FixModule(std::vector<Express::VARP> output, std::vector<Express::VARP> parameters,
std::vector<std::pair<Express::VARP, Express::Dimensionformat>> inputs) { std::vector<std::pair<Express::VARP, Express::Dimensionformat>> inputs) {
for (auto p : parameters) { for (auto p : parameters) {
@ -34,5 +34,19 @@ std::vector<Express::VARP> FixModule::onForward(const std::vector<Express::VARP>
} }
return mOutput; return mOutput;
} }
} // namespace Train
Module* FixModule::clone(CloneContext* ctx) const {
FixModule* module(new FixModule);
for (auto& it : mInputs) {
VARP v = ctx->getOrClone(it.first);
module->mInputs.push_back(std::make_pair(v, it.second));
}
for (auto& it : mOutput) {
VARP v = ctx->getOrClone(it);
module->mOutput.push_back(v);
}
return this->cloneBaseTo(ctx, module);
}
} // namespace Express
} // namespace MNN } // namespace MNN

View File

@ -8,9 +8,9 @@
#ifndef FixModule_hpp #ifndef FixModule_hpp
#define FixModule_hpp #define FixModule_hpp
#include "Module.hpp" #include <MNN/expr/Module.hpp>
namespace MNN { namespace MNN {
namespace Train { namespace Express {
class FixModule : public Module { class FixModule : public Module {
public: public:
@ -20,10 +20,14 @@ public:
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override; virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
virtual void onClearCache() override; virtual void onClearCache() override;
private: private:
FixModule() = default;
Module* clone(CloneContext* ctx) const override;
std::vector<std::pair<Express::VARP, Express::Dimensionformat>> mInputs; std::vector<std::pair<Express::VARP, Express::Dimensionformat>> mInputs;
std::vector<Express::VARP> mOutput; std::vector<Express::VARP> mOutput;
}; };
} // namespace Train } // namespace Express
} // namespace MNN } // namespace MNN
#endif #endif

112
express/module/IfModule.cpp Normal file
View File

@ -0,0 +1,112 @@
//
// IfModule.cpp
// MNN
//
// Created by MNN on 2020/09/01.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "IfModule.hpp"
#include "MNN_generated.h"
namespace MNN {
namespace Express {
static int _findPos(const std::vector<std::string>& names, const std::string& key) {
for (int i=0; i<names.size(); ++i) {
if (names[i] == key) {
return i;
}
}
return -1;
}
std::vector<Express::VARP> IfModule::onForward(const std::vector<Express::VARP>& inputs) {
std::vector<Express::VARP> outputs(mOutputFromElse.size());
MNN_ASSERT(mOutputFromThen.size() == mOutputFromElse.size());
if (inputs[0]->readMap<int>()[0] > 0) {
std::vector<Express::VARP> subInputs(mInputForThen.size());
for (auto& p : mInputForThen) {
subInputs[p.first] = inputs[p.second];
}
auto subOutputs = mThen->onForward(subInputs);
for (int i=0; i<mOutputFromThen.size(); ++i) {
outputs[i] = subOutputs[mOutputFromThen[i]];
}
} else {
std::vector<Express::VARP> subInputs(mInputForElse.size());
for (auto& p : mInputForElse) {
subInputs[p.first] = inputs[p.second];
}
auto subOutputs = mElse->onForward(subInputs);
for (int i=0; i<mOutputFromElse.size(); ++i) {
outputs[i] = subOutputs[mOutputFromElse[i]];
}
}
return outputs;
}
IfModule* IfModule::create(const Op* op, const std::map<std::string, SubGraph>& subGraph) {
auto module = new IfModule;
auto ifParam = op->main_as_IfParam();
auto& thenG = subGraph.find(ifParam->then_graph()->str())->second;
auto& elseG = subGraph.find(ifParam->else_graph()->str())->second;
module->mElse = elseG.m;
module->mThen = thenG.m;
if (nullptr != op->name()) {
module->setName(op->name()->str());
}
/** Compute map index
std::vector<std::pair<int, int>> mInputForThen;
// First mElse' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForElse;
std::vector<int> mOutputFromThen;
std::vector<int> mOutputFromElse;
*/
// Map Inputs
for (int i=0; i<ifParam->aliases_inputs()->size(); ++i) {
auto index = i;
auto data = ifParam->aliases_inputs()->GetAs<StringVec>(i);
if (nullptr == data->data()) {
continue;
}
for (int s=0; s<data->data()->size(); ++s) {
auto name = data->data()->GetAsString(s)->str();
auto thenPos = _findPos(thenG.inputs, name);
if (thenPos >= 0) {
module->mInputForThen.emplace_back(std::make_pair(thenPos, i));
}
auto elsePos = _findPos(elseG.inputs, name);
if (elsePos >= 0) {
module->mInputForElse.emplace_back(std::make_pair(elsePos, i));
}
}
}
// Map outputs
auto output = ifParam->aliases_outputs();
module->mOutputFromThen.resize(output->size());
module->mOutputFromElse.resize(output->size());
for (int i=0; i<output->size(); ++i) {
auto data = output->GetAs<StringVec>(i);
MNN_ASSERT(data->data()->size() == 2);
auto thenPos = _findPos(thenG.outputs, data->data()->GetAsString(0)->str());
MNN_ASSERT(thenPos >= 0);
auto elsePos = _findPos(elseG.outputs, data->data()->GetAsString(1)->str());
module->mOutputFromThen[i] = thenPos;
module->mOutputFromElse[i] = elsePos;
}
return module;
}
Module* IfModule::clone(CloneContext* ctx) const {
IfModule* module(new IfModule);
module->mInputForThen = mInputForThen;
module->mInputForElse = mInputForElse;
module->mOutputFromThen = mOutputFromThen;
module->mOutputFromElse = mOutputFromElse;
module->mThen.reset(mThen->clone(ctx));
module->mElse.reset(mElse->clone(ctx));
return this->cloneBaseTo(ctx, module);
}
} // namespace Express
} // namespace MNN

View File

@ -0,0 +1,43 @@
//
// IfModule.hpp
// MNN
//
// Created by MNN on 2020/09/01.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef IfModule_hpp
#define IfModule_hpp
#include <MNN/expr/Module.hpp>
namespace MNN {
namespace Express {
class IfModule : public Module {
public:
virtual ~ IfModule() {
// Do nothing
}
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
static IfModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
private:
IfModule(){}
Module* clone(CloneContext* ctx) const override;
// First mThen' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForThen;
// First mElse' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForElse;
std::vector<int> mOutputFromThen;
std::vector<int> mOutputFromElse;
std::shared_ptr<Module> mThen;
std::shared_ptr<Module> mElse;
};
}
}
#endif /* IfModule_hpp */

182
express/module/Module.cpp Normal file
View File

@ -0,0 +1,182 @@
//
// Module.cpp
// MNN
//
// Created by MNN on 2019/11/25.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <MNN/expr/Module.hpp>
#include <MNN/expr/ExprCreator.hpp>
#include "FixModule.hpp"
#include "PipelineModule.hpp"
#include "core/FileLoader.hpp"
namespace MNN {
namespace Express {
class EmptyModule : public Module {
public:
EmptyModule(const std::vector<Express::VARP>& parameters) {
for (auto p : parameters) {
addParameter(p);
}
}
virtual ~EmptyModule() {
// Do nothing
}
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override {
return {};
}
protected:
EmptyModule() = default;
Module* clone(Module::CloneContext* ctx) const override {
EmptyModule* module(new EmptyModule);
return this->cloneBaseTo(ctx, module);
}
};
Module* Module::createEmpty(const std::vector<Express::VARP>& parameters) {
return new EmptyModule(parameters);
}
Express::VARP Module::forward(Express::VARP input) {
return this->onForward({input})[0];
}
std::vector<Express::VARP> Module::parameters() const {
std::vector<Express::VARP> result;
_collectParameters(result);
return result;
}
bool Module::loadParameters(const std::vector<Express::VARP>& parameters) {
std::vector<Express::VARP> result;
_collectParameters(result);
if (parameters.empty() || parameters.size() != result.size()) {
MNN_ERROR("Error parameters, empty or parameter size not match \n");
return false;
}
for (int i=0; i<parameters.size(); ++i) {
if (nullptr != result[i].get()) {
// Check Origin parameter's size
auto dstInfo = result[i]->getInfo();
auto srcInfo = parameters[i]->getInfo();
if (dstInfo->dim.size() != srcInfo->dim.size() || dstInfo->order != srcInfo->order) {
MNN_ERROR("Error parameters %d, dim size or order not match \n", i);
return false;
}
if (dstInfo->size != srcInfo->size || dstInfo->type != srcInfo->type) {
MNN_ERROR("Error parameters %d, size or type not match \n", i);
return false;
}
}
Variable::replace(result[i], parameters[i]);
}
return true;
}
void Module::setIsTraining(const bool isTraining) {
mIsTraining = isTraining;
for (auto c : mChildren) {
c->setIsTraining(isTraining);
}
}
bool Module::getIsTraining() {
return mIsTraining;
}
void Module::registerModel(const std::vector<std::shared_ptr<Module>>& children) {
mChildren.insert(mChildren.begin(), children.begin(), children.end());
}
int Module::addParameter(VARP parameter) {
auto res = mParameters.size();
mParameters.emplace_back(parameter);
return (int)res;
}
void Module::setParameter(Express::VARP parameter, int index) {
if (index < 0 || index >= mParameters.size()) {
MNN_ERROR("Module error: index out of range: %d - %d:\n", index, (int)mParameters.size());
return;
}
mParameters[index] = parameter;
}
void Module::_collectParameters(std::vector<Express::VARP>& result) const {
for (auto p : mParameters) {
result.push_back(p);
}
for (auto c : mChildren) {
c->_collectParameters(result);
}
}
void Module::clearCache() {
for (auto c : mChildren) {
c->clearCache();
}
this->onClearCache();
}
Module* Module::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const char* fileName, bool dynamic) {
AutoStorage<uint8_t> buffer;
{
FileLoader loader(fileName);
if (!loader.valid()) {
MNN_ERROR("Error for open %s\n", fileName);
return {};
}
loader.read();
if (!loader.valid()) {
return {};
}
loader.merge(buffer);
if (buffer.get() == nullptr) {
return {};
}
}
return load(inputs, outputs, buffer.get(), buffer.size(), dynamic);
}
Module* Module::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic) {
return PipelineModule::load(inputs, outputs, buffer, length, dynamic);
}
EXPRP Module::CloneContext::getOrClone(EXPRP expr) {
auto it = mExprMap.find(expr.get());
if (it == mExprMap.end()) {
// EXPRP replica = expr->clone(shareParams);
// TODO(hjchen2): Clone expr.
EXPRP replica = expr;
it = mExprMap.emplace(expr.get(), replica).first;
}
return it->second;
}
VARP Module::CloneContext::getOrClone(VARP var) {
auto it = mVarMap.find(var.get());
if (it != mVarMap.end()) {
// TODO(hjchen2): Clone variable.
VARP replica = var;
it = mVarMap.emplace(var.get(), replica).first;
}
return it->second;
}
Module* Module::clone(const Module* module, const bool shareParams) {
CloneContext context(shareParams);
return module->clone(&context);
}
Module* Module::cloneBaseTo(CloneContext* ctx, Module* module) const {
for (const Express::VARP& var : mParameters) {
module->mParameters.push_back(ctx->getOrClone(var));
}
module->mIsTraining = mIsTraining;
module->mName = mName;
module->mType = mType;
return module;
}
} // namespace Express
} // namespace MNN

View File

@ -6,9 +6,11 @@
// Copyright © 2018, Alibaba Group Holding Limited // Copyright © 2018, Alibaba Group Holding Limited
// //
#include "NN.hpp" #include <MNN/expr/NN.hpp>
#include "Distributions.hpp" #include "Distributions.hpp"
#include "FixModule.hpp" #include "FixModule.hpp"
#include "WhileModule.hpp"
#include "IfModule.hpp"
#include "Initializer.hpp" #include "Initializer.hpp"
#include "MNN_generated.h" #include "MNN_generated.h"
#include "RandomGenerator.hpp" #include "RandomGenerator.hpp"
@ -17,7 +19,7 @@
using namespace MNN::Express; using namespace MNN::Express;
namespace MNN { namespace MNN {
namespace Train { namespace Express {
static VARP _activate(VARP x, NN::ActivationFunctionType type) { static VARP _activate(VARP x, NN::ActivationFunctionType type) {
switch (type) { switch (type) {
case NN::None: case NN::None:
@ -58,6 +60,14 @@ public:
} }
private: private:
DropoutModule() = default;
Module* clone(CloneContext* ctx) const override {
DropoutModule* module(new DropoutModule);
module->mDropRatio = mDropRatio;
return this->cloneBaseTo(ctx, module);
}
float mDropRatio; float mDropRatio;
}; };
@ -80,8 +90,8 @@ public:
mRunningVariance = _Const(bnPa->varData()->data(), {1, mChannels, 1, 1}, NCHW); mRunningVariance = _Const(bnPa->varData()->data(), {1, mChannels, 1, 1}, NCHW);
addParameter(mScale); addParameter(mScale);
addParameter(mBias); addParameter(mBias);
addParameter(mRunningVariance); mRunningVariancePos = addParameter(mRunningVariance);
addParameter(mRunningMean); mRunningMeanPos = addParameter(mRunningMean);
mReductionDims = {0, 2, 3}; mReductionDims = {0, 2, 3};
setType("BatchNorm"); setType("BatchNorm");
} }
@ -110,8 +120,8 @@ public:
addParameter(mScale); addParameter(mScale);
addParameter(mBias); addParameter(mBias);
addParameter(mRunningVariance); mRunningVariancePos = addParameter(mRunningVariance);
addParameter(mRunningMean); mRunningMeanPos = addParameter(mRunningMean);
setType("BatchNorm"); setType("BatchNorm");
} }
@ -156,9 +166,8 @@ public:
mRunningVariance = _Const(mMomentum) * mRunningVariance + _Const(1 - mMomentum) * sampleVar; mRunningVariance = _Const(mMomentum) * mRunningVariance + _Const(1 - mMomentum) * sampleVar;
outputData->setName(name()); outputData->setName(name());
outputData = _Convert(outputData, dimFormat); outputData = _Convert(outputData, dimFormat);
Variable::prepareCompute({inputs[0], outputData, mRunningMean, mRunningVariance}); setParameter(mRunningMean, mRunningMeanPos);
mRunningMean.fix(Express::VARP::CONSTANT); setParameter(mRunningVariance, mRunningVariancePos);
mRunningVariance.fix(Express::VARP::CONSTANT);
return {outputData}; return {outputData};
} }
auto rStd = _Const(1.0f) / _Sqrt(mRunningVariance + _Const(mEps)); auto rStd = _Const(1.0f) / _Sqrt(mRunningVariance + _Const(mEps));
@ -180,12 +189,31 @@ public:
} }
private: private:
BatchNormModule() = default;
Module* clone(CloneContext* ctx) const override {
BatchNormModule* module(new BatchNormModule);
module->mMomentum = mMomentum;
module->mEps = mEps;
module->mScale = ctx->getOrClone(mScale);
module->mBias = ctx->getOrClone(mBias);
module->mRunningMean = ctx->getOrClone(mRunningMean);
module->mRunningVariance = ctx->getOrClone(mRunningVariance);
module->mRunningMeanPos = mRunningMeanPos;
module->mRunningVariancePos = mRunningVariancePos;
module->mChannels = mChannels;
module->mReductionDims = mReductionDims;
return this->cloneBaseTo(ctx, module);
}
float mMomentum = 0.99; float mMomentum = 0.99;
float mEps = 1e-5; float mEps = 1e-5;
VARP mScale = nullptr; VARP mScale = nullptr;
VARP mBias = nullptr; VARP mBias = nullptr;
VARP mRunningMean = nullptr; VARP mRunningMean = nullptr;
VARP mRunningVariance = nullptr; VARP mRunningVariance = nullptr;
int mRunningMeanPos = -1;
int mRunningVariancePos = -1;
int mChannels; int mChannels;
std::vector<int> mReductionDims; std::vector<int> mReductionDims;
}; };
@ -246,7 +274,18 @@ public:
tempOutput->setName(name()); tempOutput->setName(name());
return {tempOutput}; return {tempOutput};
} }
private: private:
ConvModule() = default;
Module* clone(CloneContext* ctx) const override {
ConvModule* module(new ConvModule);
module->mParameter = mParameter;
module->mParameter.weight = ctx->getOrClone(mParameter.weight);
module->mParameter.bias = ctx->getOrClone(mParameter.bias);
return this->cloneBaseTo(ctx, module);
}
NN::ConvParameters mParameter; NN::ConvParameters mParameter;
}; };
static std::tuple<VARP, VARP, int> _initParameters(const NN::ConvOption& option, bool hasBias, static std::tuple<VARP, VARP, int> _initParameters(const NN::ConvOption& option, bool hasBias,
@ -533,7 +572,23 @@ public:
} }
private: private:
const NN::ConvOption mOption; ConvOctaveModule() = default;
Module* clone(CloneContext* ctx) const override {
ConvOctaveModule* module(new ConvOctaveModule);
module->mOption = mOption;
module->mLLW = ctx->getOrClone(mLLW);
module->mLHW = ctx->getOrClone(mLHW);
module->mHLW = ctx->getOrClone(mHLW);
module->mHHW = ctx->getOrClone(mHHW);
module->mLBias = ctx->getOrClone(mLBias);
module->mHBias = ctx->getOrClone(mHBias);
module->mSplitInput = mSplitInput;
module->mGroup = mGroup;
return this->cloneBaseTo(ctx, module);
}
NN::ConvOption mOption;
VARP mLLW; VARP mLLW;
VARP mLHW; VARP mLHW;
VARP mHLW; VARP mHLW;
@ -555,7 +610,7 @@ Module* NN::ConvOctave(const ConvParameters& parameters,
module->setName(parameters.name); module->setName(parameters.name);
return module; return module;
} }
Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr) { Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr, const std::map<std::string, SubGraph>& subgraphs) {
if (nullptr == expr->get()) { if (nullptr == expr->get()) {
return nullptr; return nullptr;
} }
@ -565,6 +620,12 @@ Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr) {
if (expr->get()->type() == OpType_Dropout) { if (expr->get()->type() == OpType_Dropout) {
return new DropoutModule(0.3f); return new DropoutModule(0.3f);
} }
if (expr->get()->type() == OpType_While) {
return WhileModule::create(expr->get(), subgraphs);
}
if (expr->get()->type() == OpType_If) {
return IfModule::create(expr->get(), subgraphs);
}
return nullptr; return nullptr;
} }
@ -621,6 +682,9 @@ public:
auto limit = (float)(1 << (bits - 1)) - 1.0f; auto limit = (float)(1 << (bits - 1)) - 1.0f;
mLimitScale = _Scalar<float>(1.0f / limit); mLimitScale = _Scalar<float>(1.0f / limit);
mClampValue = _Scalar<float>(limit); mClampValue = _Scalar<float>(limit);
mInputScalePos = addParameter(mInputScale);
mOutputScalePos = addParameter(mOutputScale);
setType("ConvBNReluFused"); setType("ConvBNReluFused");
} }
@ -632,31 +696,16 @@ public:
tempX = _Convert(tempX, NCHW); tempX = _Convert(tempX, NCHW);
} }
auto originX = tempX; auto originX = tempX;
VARP scale; VARP scale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
if (mFeatureScaleStatMethod == NN::PerTensor) {
scale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
} else {
auto originSize = originX->getInfo()->size;
auto batch = originX->getInfo()->dim[0];
auto channel = originX->getInfo()->dim[1];
if (originSize / batch / channel < 10) {
// Too small data
//MNN_PRINT("%d - %d - %d\n", originSize, batch, channel);
std::vector<int> dims = {1, channel, 1, 1};
auto dimVar = _Const(dims.data(), {4}, NCHW, halide_type_of<int32_t>());
auto singleScale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
scale = _Fill(dimVar, singleScale);
} else {
//MNN_PRINT("%d - %d - %d\n", originSize, batch, channel);
scale = _Maximum(_ReduceMax(_Abs(tempX), {0, 2, 3}, true), _Scalar<float>(0.0001f)) * mLimitScale;
}
}
scale.fix(VARP::CONSTANT);
if (useScale == nullptr) { if (useScale == nullptr) {
tempX = _Round(tempX * _Reciprocal(scale)) * scale; tempX = _Round(tempX * _Reciprocal(scale)) * scale;
} else { } else {
tempX = _Round(tempX * _Reciprocal(useScale)) * useScale; tempX = _Round(tempX * _Reciprocal(useScale)) * useScale;
} }
// Break the grad by use cast
tempX = _Cast<float>(tempX);
// Move grad from tempX to originX
tempX = _Convert(tempX + _ZeroGrad(originX), originFormat); tempX = _Convert(tempX + _ZeroGrad(originX), originFormat);
return std::make_pair(tempX, scale); return std::make_pair(tempX, scale);
} }
@ -684,18 +733,16 @@ public:
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override { virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override {
VARP res; VARP res;
if (getIsTraining()) { if (getIsTraining()) {
Variable::prepareCompute({inputs[0]});
auto x = _Convert(inputs[0], NCHW); auto x = _Convert(inputs[0], NCHW);
// simulate weight quant // simulate weight quant
auto weightScale = _Maximum(_ReduceMax(_Abs(mWeight), {1, 2, 3}, true), _Scalar<float>(1E-6)) * mLimitScale; auto weightScale = _Maximum(_ReduceMax(_Abs(mWeight), {1, 2, 3}, true), _Scalar<float>(1E-6)) * mLimitScale;
weightScale.fix(VARP::CONSTANT);
auto weightTemp = _Round(mWeight * _Reciprocal(weightScale)) * weightScale; auto weightTemp = _Round(mWeight * _Reciprocal(weightScale)) * weightScale;
weightTemp = weightTemp + _ZeroGrad(mWeight); weightTemp = weightTemp + _ZeroGrad(mWeight);
// simulate input quant to get original input scale // simulate input quant to get original input scale
auto inputPair = fakeQuantFeature(x); auto inputPair = fakeQuantFeature(x);
mInputScale = updateScale(mInputScale, inputPair.second); mInputScale = updateScale(mInputScale, inputPair.second);
mInputScale.fix(VARP::CONSTANT); setParameter(mInputScale, mInputScalePos);
// simulate output quant to get original output scale // simulate output quant to get original output scale
res = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride, res = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride,
@ -709,10 +756,9 @@ public:
res = _activate(res, mActivation); res = _activate(res, mActivation);
Variable::prepareCompute({conv, res});
auto outputPair = fakeQuantFeature(res); auto outputPair = fakeQuantFeature(res);
mOutputScale = updateScale(mOutputScale, outputPair.second); mOutputScale = updateScale(mOutputScale, outputPair.second);
mOutputScale.fix(VARP::CONSTANT); setParameter(mOutputScale, mOutputScalePos);
res = outputPair.first; res = outputPair.first;
} else { } else {
if (nullptr == mInputScale) { if (nullptr == mInputScale) {
@ -725,6 +771,7 @@ public:
auto x = _Convert(inputs[0], NCHW); auto x = _Convert(inputs[0], NCHW);
auto inputPair = fakeQuantFeature(x); auto inputPair = fakeQuantFeature(x);
mInputScale = inputPair.second; mInputScale = inputPair.second;
setParameter(mInputScale, mInputScalePos);
inputPair.first.fix(VARP::CONSTANT); inputPair.first.fix(VARP::CONSTANT);
auto simuRes = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride, auto simuRes = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride,
@ -737,6 +784,7 @@ public:
Variable::prepareCompute({simuRes}); Variable::prepareCompute({simuRes});
auto outputPair = fakeQuantFeature(simuRes); auto outputPair = fakeQuantFeature(simuRes);
mOutputScale = outputPair.second; mOutputScale = outputPair.second;
setParameter(mOutputScale, mOutputScalePos);
outputPair.first.fix(VARP::CONSTANT); outputPair.first.fix(VARP::CONSTANT);
} }
@ -772,12 +820,7 @@ public:
{ {
std::vector<int> dims = {x->getInfo()->dim[1]}; std::vector<int> dims = {x->getInfo()->dim[1]};
auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>()); auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>());
VARP channelScale; VARP channelScale = _Reciprocal(_Fill(dimVar, mInputScale));
if (mFeatureScaleStatMethod == NN::PerTensor) {
channelScale = _Reciprocal(_Fill(dimVar, mInputScale));
} else {
channelScale = _Reciprocal(mInputScale);
}
x = _FloatToInt8(x, channelScale, -127, 127);// TODO add clamp x = _FloatToInt8(x, channelScale, -127, 127);// TODO add clamp
} }
@ -824,12 +867,7 @@ public:
{ {
std::vector<int> dims = {res->getInfo()->dim[1]}; std::vector<int> dims = {res->getInfo()->dim[1]};
auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>()); auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>());
VARP channelScale; VARP channelScale = _Fill(dimVar, mOutputScale);
if (mFeatureScaleStatMethod == NN::PerTensor) {
channelScale = _Fill(dimVar, mOutputScale);
} else {
channelScale = mOutputScale;
}
res = _Int8ToFloat(res, channelScale); res = _Int8ToFloat(res, channelScale);
} }
} }
@ -838,6 +876,34 @@ public:
} }
private: private:
ConvBNReluFusedModule() = default;
Module* clone(CloneContext* ctx) const override {
ConvBNReluFusedModule* module(new ConvBNReluFusedModule);
module->mConvParameter = mConvParameter;
module->mConvParameter.weight = ctx->getOrClone(mConvParameter.weight);
module->mConvParameter.bias = ctx->getOrClone(mConvParameter.bias);
module->mOption = mOption;
module->mGroup = mGroup;
module->mWeight = ctx->getOrClone(mWeight);
module->mBias = ctx->getOrClone(mBias);
module->mActivation = mActivation;
module->mLimitScale = ctx->getOrClone(mLimitScale);
module->mInputScalePos = mInputScalePos;
module->mOutputScalePos = mOutputScalePos;
module->mInputScale = ctx->getOrClone(mInputScale);
module->mOutputScale = ctx->getOrClone(mOutputScale);
module->mClampValue = ctx->getOrClone(mClampValue);
module->mMomentum = mMomentum;
module->mFeatureScaleStatMethod = mFeatureScaleStatMethod;
module->mScaleUpdateMethod = mScaleUpdateMethod;
if (mBatchNorm) {
module->mBatchNorm.reset(mBatchNorm->clone(ctx));
module->registerModel({module->mBatchNorm});
}
return this->cloneBaseTo(ctx, module);
}
NN::ConvParameters mConvParameter; NN::ConvParameters mConvParameter;
NN::ConvOption mOption; NN::ConvOption mOption;
int mGroup; int mGroup;
@ -846,6 +912,8 @@ private:
NN::ActivationFunctionType mActivation = NN::ActivationFunctionType::None; NN::ActivationFunctionType mActivation = NN::ActivationFunctionType::None;
std::shared_ptr<Module> mBatchNorm = nullptr; std::shared_ptr<Module> mBatchNorm = nullptr;
VARP mLimitScale; VARP mLimitScale;
int mInputScalePos = -1;
int mOutputScalePos = -1;
VARP mInputScale = nullptr; VARP mInputScale = nullptr;
VARP mOutputScale = nullptr; VARP mOutputScale = nullptr;
VARP mClampValue; VARP mClampValue;
@ -870,5 +938,5 @@ Module* NN::ConvInt8(const ConvParameters& para, int bits, NN::FeatureScaleStatM
return new ConvBNReluFusedModule({conv}, featureMethod, method, bits); return new ConvBNReluFusedModule({conv}, featureMethod, method, bits);
} }
} // namespace Train } // namespace Express
} // namespace MNN } // namespace MNN

View File

@ -0,0 +1,761 @@
//
// PipelineModule.cpp
// MNN
//
// Created by MNN on 2020/01/09.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "PipelineModule.hpp"
#include "MNN_generated.h"
#include <set>
#include <vector>
#include "StaticModule.hpp"
#include "IfModule.hpp"
#include "WhileModule.hpp"
using namespace MNN::Express;
namespace MNN {
namespace Express {
//#define DYNAMIC
#define PIPELINE_MODULE "_pipeline_module__"
class ExprModule : public Module {
public:
ExprModule(EXPRP expr) {
mExpr = expr;
setName(expr->name());
mInputs = expr->inputs();
auto op = mExpr->get();
if (op) {
auto typeName = EnumNameOpType(op->type());
setType(typeName);
}
for (int i = 0; i < mInputs.size(); ++i) {
auto inputExpr = mInputs[i]->expr().first;
if (inputExpr->get() != nullptr) {
mInputs[i] = nullptr;
mInputIndexes.emplace_back(i);
continue;
}
switch (inputExpr->inputType()) {
case VARP::INPUT:
mInputs[i] = nullptr;
mInputIndexes.emplace_back(i);
break;
case VARP::CONSTANT:
break;
case VARP::TRAINABLE:
addParameter(mInputs[i]);
break;
default:
break;
}
}
}
virtual std::vector<VARP> onForward(const std::vector<VARP>& inputs) override {
MNN_ASSERT(mInputIndexes.size() == inputs.size());
if (nullptr == mExpr->get()) {
return {Variable::create(mExpr)};
}
std::vector<VARP> tempInputs = mInputs;
for (int i = 0; i < inputs.size(); ++i) {
tempInputs[mInputIndexes[i]] = inputs[i];
}
std::vector<VARP> outputVars;
auto newExpr = Expr::create(mExpr->extra(), std::move(tempInputs), mExpr->outputSize());
newExpr->setName(mExpr->name());
for (int i = 0; i < mExpr->outputSize(); ++i) {
outputVars.emplace_back(Variable::create(newExpr, i));
}
return outputVars;
}
const std::vector<int>& inputIndexes() const {
return mInputIndexes;
}
private:
Module* clone(CloneContext* ctx) const override {
ExprModule* module(new ExprModule(ctx->getOrClone(mExpr)));
for (const VARP& var : mInputs) {
module->mInputs.push_back(ctx->getOrClone(var));
}
module->mInputIndexes = mInputIndexes;
return this->cloneBaseTo(ctx, module);
}
EXPRP mExpr;
std::vector<VARP> mInputs;
std::vector<int> mInputIndexes;
};
Module* PipelineModule::extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph) {
std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(EXPRP)> transformFunction;
if (fortrain) {
transformFunction =
[&subGraph](EXPRP source) {
if (source->get() == nullptr) {
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
}
std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
if (nullptr != m) {
m->setName(source->name());
return std::make_pair(std::vector<int>{}, m);
}
auto convExtracted = NN::Utils::ExtractConvolution(source);
if (convExtracted.weight == nullptr) {
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
}
std::shared_ptr<Module> module(NN::Conv(convExtracted));
module->setName(source->name());
return std::make_pair(std::vector<int>{0}, module);
};
} else {
transformFunction = [&subGraph](EXPRP source) {
if (source->get() == nullptr) {
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
}
std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
if (nullptr != m) {
m->setName(source->name());
return std::make_pair(std::vector<int>{}, m);
}
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
};
}
return new PipelineModule(inputs, outputs, transformFunction);
}
PipelineModule::PipelineModule(std::vector<VARP> inputs, std::vector<VARP> outputs, const Transformer& transformFunction) {
setType(PIPELINE_MODULE);
std::vector<EXPRP> executeOrder;
std::set<EXPRP> inputExpr;
for (auto v : inputs) {
inputExpr.insert(v->expr().first);
}
for (auto output : outputs) {
Expr::visit(output->expr().first,
[&executeOrder, &inputExpr](EXPRP expr) {
if (expr->visited()) {
return false;
}
if (inputExpr.find(expr)!= inputExpr.end()) {
expr->setVisited(true);
executeOrder.emplace_back(expr);
return false;
}
return true;
},
[&executeOrder](EXPRP expr) {
//FUNC_PRINT_ALL(var->name().c_str(), s);
if (!expr->visited()) {
executeOrder.emplace_back(expr);
expr->setVisited(true);
}
return true;
});
}
for (auto expr : executeOrder) {
expr->setVisited(false);
}
// Set Indexes
std::map<EXPRP, int> indexes;
int currentIndexes = 0;
for (auto expr : executeOrder) {
indexes[expr] = currentIndexes;
currentIndexes += expr->outputSize();
}
std::set<EXPRP> inputSets;
mInputIndexes.clear();
mStackSize = currentIndexes;
for (auto v : inputs) {
auto inputExpr = v->expr();
mInputIndexes.emplace_back(indexes[inputExpr.first] + inputExpr.second);
inputSets.insert(inputExpr.first);
}
// Create All SubModule
for (auto expr : executeOrder) {
if (inputSets.find(expr) != inputSets.end()) {
continue;
}
std::pair<std::vector<int>, std::shared_ptr<Module> > moduleResult;
bool extracted = false;
if (!transformFunction) {
moduleResult = std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
} else {
moduleResult = transformFunction(expr);
}
if (moduleResult.second == nullptr) {
std::shared_ptr<Module> module(new ExprModule(expr));
moduleResult.first = ((ExprModule*)module.get())->inputIndexes();
moduleResult.second = module;
} else {
extracted = true;
}
auto subInputs = expr->inputs();
auto& exprInputIndexes = moduleResult.first;
std::vector<int> inputIndexes;
if (exprInputIndexes.empty() && extracted) {
inputIndexes.resize(subInputs.size());
for (int i = 0; i < inputIndexes.size(); ++i) {
auto inputExpr = subInputs[i]->expr();
inputIndexes[i] = indexes[inputExpr.first] + inputExpr.second;
}
} else {
inputIndexes.resize(exprInputIndexes.size());
for (int i = 0; i < inputIndexes.size(); ++i) {
auto inputExpr = subInputs[exprInputIndexes[i]]->expr();
inputIndexes[i] = indexes[inputExpr.first] + inputExpr.second;
}
}
std::vector<int> outputIndexes(expr->outputSize());
for (int i = 0; i < outputIndexes.size(); ++i) {
outputIndexes[i] = indexes[expr] + i;
}
mSubModules.emplace_back(std::make_tuple(moduleResult.second, inputIndexes, outputIndexes));
registerModel({moduleResult.second});
}
mOutputIndexes.clear();
for (auto output : outputs) {
auto outputExpr = output->expr();
mOutputIndexes.emplace_back(indexes[outputExpr.first] + outputExpr.second);
}
}
bool PipelineModule::turnQuantize(Module* module, const int bit, NN::FeatureScaleStatMethod featureScaleStatMethod, NN::ScaleUpdateMethod scaleUpdateMethod) {
if (nullptr == module || module->type() != PIPELINE_MODULE) {
MNN_ERROR("Invalide module for quantized\n");
return false;
}
((PipelineModule*)module)->toTrainQuant(bit, featureScaleStatMethod, scaleUpdateMethod);
return true;
}
std::vector<int> PipelineModule::countOutputReference(std::vector<int> outputIndices) {
MNN_ASSERT(outputIndices.size() > 0);
std::vector<int> countResult(outputIndices.size(), 0);
for (int i = 0; i < mSubModules.size(); i++) {
auto &m = mSubModules[i];
auto& theModule = std::get<0>(m);
auto name = theModule->name();
auto &inputIndices = std::get<1>(m);
for (int j = 0; j < inputIndices.size(); j++) {
int index = inputIndices[j];
for (int k = 0; k < countResult.size(); k++) {
if (index == outputIndices[k]) {
countResult[k]++;
}
}
}
}
return countResult;
}
void PipelineModule::toTrainQuant(const int bits, NN::FeatureScaleStatMethod featureScaleStatMethod,
NN::ScaleUpdateMethod scaleUpdateMethod) {
std::vector<int> needEraseIndices;
for (int i = 0; i < mSubModules.size(); i++) {
auto& m = mSubModules[i];
auto& theModule = std::get<0>(m);
auto moduleType = theModule->type();
//auto& inputIndices = std::get<1>(m);
auto& outputIndices = std::get<2>(m);
if (moduleType == "Conv" && i < mSubModules.size() - 1) {
auto& p1 = mSubModules[i+1];
auto p1Module = std::get<0>(p1);
auto& p1ModuleType = p1Module->type();
auto& p1InputIndices = std::get<1>(p1);
auto& p1OutputIndices = std::get<2>(p1);
auto convOutputCount = countOutputReference(outputIndices);
bool convSingleOutputReference = ((outputIndices.size() == 1) && (convOutputCount[0] == 1));
// only conv
if ((!convSingleOutputReference) || (p1ModuleType == "Conv") ||
(p1ModuleType != "BatchNorm" && p1ModuleType != "ReLU" && p1ModuleType != "ReLU6")) {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
continue;
}
// conv + bn + ?
if (p1ModuleType == "BatchNorm") {
bool convBnConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
if (!convBnConnected) {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
continue;
}
// last conv + bn
if (i == mSubModules.size() - 2) {
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
}
// maybe there is a relu or relu6 after conv + bn
auto& p2 = mSubModules[i+2];
auto& p2Module = std::get<0>(p2);
auto p2ModuleType = p2Module->type();
auto& p2InputIndices = std::get<1>(p2);
auto& p2OutputIndices = std::get<2>(p2);
auto bnOutputCount = countOutputReference(p1OutputIndices);
bool bnSingleOutputReference = ((p1OutputIndices.size() == 1) && (bnOutputCount[0] == 1));
// only conv + bn
if ((!bnSingleOutputReference) || (p2ModuleType != "ReLU" && p2ModuleType != "ReLU6")) {
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
} else { // conv + bn + relu or conv + bn + relu6
bool convBnReluConnected = ((bnSingleOutputReference) && (p2InputIndices.size() == 1) && (p2InputIndices[0] == p1OutputIndices[0]));
if (!convBnReluConnected) {
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
}
theModule.reset(NN::ConvBNReluFused({theModule, p1Module, p2Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p2OutputIndices;
needEraseIndices.emplace_back(i + 1);
needEraseIndices.emplace_back(i + 2);
continue;
}
}
// conv + relu or conv + relu6
if (p1ModuleType == "ReLU" || p1ModuleType == "ReLU6") {
bool convReluConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
if (!convReluConnected) {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
continue;
}
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
}
}
if (i == mSubModules.size() - 1 && moduleType == "Conv") {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
}
}
// erase useless submodules
const int eraseSize = needEraseIndices.size();
int alreadyErasedCount = 0;
for (int i = 0; i < eraseSize; i++) {
auto position = needEraseIndices[i] - alreadyErasedCount;
auto type = std::get<0>(mSubModules[position])->type();
MNN_ASSERT(type == "BatchNorm" || type == "ReLU" || type == "ReLU6");
mSubModules.erase(mSubModules.begin() + position);
alreadyErasedCount++;
}
}
std::vector<VARP> PipelineModule::onForward(const std::vector<VARP>& inputs) {
std::vector<VARP> mStack(mStackSize);
for (int i = 0; i < mInputIndexes.size(); ++i) {
mStack[mInputIndexes[i]] = inputs[i];
}
for (int index = 0; index < mSubModules.size(); ++index) {
auto& m = mSubModules[index];
std::vector<VARP> tempInputs(std::get<1>(m).size());
for (int i = 0; i < tempInputs.size(); ++i) {
tempInputs[i] = mStack[std::get<1>(m)[i]];
MNN_ASSERT(nullptr != tempInputs[i]);
}
std::vector<VARP> tempOutputs = std::get<0>(m)->onForward(tempInputs);
MNN_ASSERT(tempOutputs.size() == std::get<2>(m).size());
for (int i = 0; i < tempOutputs.size(); ++i) {
mStack[std::get<2>(m)[i]] = tempOutputs[i];
MNN_ASSERT(nullptr != tempOutputs[i]);
}
}
std::vector<VARP> outputs(mOutputIndexes.size());
for (int i = 0; i < mOutputIndexes.size(); ++i) {
outputs[i] = mStack[mOutputIndexes[i]];
}
return outputs;
}
void PipelineModule::onClearCache() {
// Do nothing
}
static std::map<std::string, SubGraph> _createSubGraph(const MNN::Net* net, bool dynamic) {
std::map<std::string, SubGraph> subGraphMap;
auto subGraphs = net->subgraphs();
if (nullptr == subGraphs) {
return subGraphMap;
}
for (int i=0; i<subGraphs->size(); ++i) {
auto graph = subGraphs->GetAs<SubGraphProto>(i);
std::vector<std::string> subInputs;
std::vector<std::string> subOutputs;
if (nullptr != graph->inputs()) {
for (int v=0; v<graph->inputs()->size(); ++v) {
auto index = graph->inputs()->data()[v];
subInputs.emplace_back(graph->tensors()->GetAsString(index)->str());
}
}
for (int v=0; v<graph->outputs()->size(); ++v) {
auto index = graph->outputs()->data()[v];
subOutputs.emplace_back(graph->tensors()->GetAsString(index)->str());
}
// Pack to Net for loading
std::shared_ptr<Module> submodule;
{
std::unique_ptr<SubGraphProtoT> _tempInfo(graph->UnPack());
std::unique_ptr<NetT> _tempNet(new NetT);
_tempNet->oplists = std::move(_tempInfo->nodes);
_tempNet->tensorName = std::move(_tempInfo->tensors);
flatbuffers::FlatBufferBuilder builder(1024);
auto offset = Net::Pack(builder, _tempNet.get());
builder.Finish(offset);
if (dynamic) {
submodule.reset(PipelineModule::load(subInputs, subOutputs, (const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), dynamic));
} else {
submodule.reset(new StaticModule((const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), subInputs, subOutputs));
}
if (graph->name() != nullptr) {
submodule->setName(graph->name()->str());
}
}
auto key = graph->name()->str();
SubGraph subgraph;
subgraph.inputs = std::move(subInputs);
subgraph.outputs = std::move(subOutputs);
subgraph.m = submodule;
subGraphMap.insert(std::make_pair(key, subgraph));
}
return subGraphMap;
}
struct SubModuleInfo {
std::vector<int> opList;
std::vector<int> inputs;;
std::vector<int> outputs;
std::vector<uint8_t> tensorMask;
};
static std::vector<SubModuleInfo> _createSubModuleInfo(const MNN::Net* net, const std::set<int>& inputIndexes, const std::set<int>& outputIndexes) {
std::vector<SubModuleInfo> submodule;
SubModuleInfo current;
std::vector<int> inputOps;
// Seperate the graph to serveral submodule
for (int i=0; i<net->oplists()->size(); ++i) {
auto op = net->oplists()->GetAs<Op>(i);
// Collect Input
if (op->type() == OpType_Input) {
inputOps.emplace_back(i);
continue;
}
if (op->type() == OpType_If || op->type() == OpType_While) {
if (current.opList.size() > 0) {
// Not empty
submodule.emplace_back(std::move(current));
}
SubModuleInfo controlOp;
controlOp.opList = {i};
submodule.emplace_back(std::move(controlOp));
continue;
}
current.opList.emplace_back(i);
}
if (!current.opList.empty()) {
submodule.emplace_back(std::move(current));
}
/**Compute All SubModule's inputs and outputs*/
// 0: not use, 1: input, 2: output, 3: mid, 4: valid output
for (int moduleIndex=0; moduleIndex < submodule.size(); ++moduleIndex) {
auto& m = submodule[moduleIndex];
if (1 == m.opList.size()) {
// Fast way to determine
auto op = net->oplists()->GetAs<Op>(m.opList[0]);
if (nullptr != op->inputIndexes()) {
m.inputs.resize(op->inputIndexes()->size());
::memcpy(m.inputs.data(), op->inputIndexes()->data(), m.inputs.size() * sizeof(int));
}
if (nullptr != op->outputIndexes()) {
m.outputs.resize(op->outputIndexes()->size());
::memcpy(m.outputs.data(), op->outputIndexes()->data(), m.outputs.size() * sizeof(int));
}
} else {
m.tensorMask = std::vector<uint8_t>(net->tensorName()->size(), 0);
auto& tensorMask = m.tensorMask;
for (auto opIndex : m.opList) {
auto op = net->oplists()->GetAs<Op>(opIndex);
if (nullptr != op->inputIndexes()) {
for (int v=0; v<op->inputIndexes()->size(); ++v) {
auto index = op->inputIndexes()->data()[v];
tensorMask[index] = tensorMask[index] | 1;
}
}
if (nullptr != op->outputIndexes()) {
for (int v=0; v<op->outputIndexes()->size(); ++v) {
auto index = op->outputIndexes()->data()[v];
tensorMask[index] = tensorMask[index] | 2;
}
}
}
for (int i=0; i<tensorMask.size(); ++i) {
if (0 == tensorMask[i]) {
continue;
}
if (1 == tensorMask[i]) {
m.inputs.emplace_back(i);
continue;
}
if (2 == tensorMask[i]) {
m.outputs.emplace_back(i);
continue;
}
if (3 == tensorMask[i]) {
if (outputIndexes.find(i) != outputIndexes.end()) {
m.outputs.emplace_back(i);
}
}
}
}
// Check if the module's input is valid
for (int i=0; i<m.inputs.size(); ++i) {
auto index = m.inputs[i];
if (inputIndexes.find(index) != inputIndexes.end()) {
continue;
}
bool find = false;
for (int sub=0; sub < moduleIndex; ++sub) {
for (auto out : submodule[sub].outputs) {
if (out == index) {
find = true;
break;
}
}
if (find) {
break;
}
}
if (find) {
continue;
}
// Find from module
for (int sub=0; sub < moduleIndex; ++sub) {
if (submodule[sub].tensorMask.empty()) {
continue;
}
if (submodule[sub].tensorMask[index] == 2) {
find = true;
break;
}
if (submodule[sub].tensorMask[index] == 3) {
submodule[sub].outputs.emplace_back(index);
submodule[sub].tensorMask[index] = 2;
find = true;
break;
}
}
MNN_ASSERT(find);
}
}
for (auto& m : submodule) {
m.tensorMask.clear();
}
return submodule;
}
static Module* _createSubModule(const MNN::Net* net, const SubModuleInfo& info, const std::map<std::string, SubGraph>& subs) {
if (1 == info.opList.size()) {
auto op = net->oplists()->GetAs<Op>(info.opList[0]);
if (OpType_If == op->type()) {
return IfModule::create(op, subs);
}
if (OpType_While == op->type()) {
return WhileModule::create(op, subs);
}
MNN_ASSERT(false);
}
std::unique_ptr<NetT> _tempNet(new NetT);
// Copy Tensor Name
_tempNet->tensorName.resize(net->tensorName()->size());
for (int i=0; i<net->tensorName()->size(); ++i) {
_tempNet->tensorName[i] = net->tensorName()->GetAsString(i)->str();
}
// Create Input node
std::vector<std::string> inputNames;
for (auto index : info.inputs) {
std::unique_ptr<OpT> inputOp(new OpT);
inputOp->outputIndexes = {index};
inputOp->type = OpType_Input;
inputOp->main.type = OpParameter_Input;
inputOp->main.value = new InputT;
inputOp->main.AsInput()->dims = {0, 0, -1, -1};
_tempNet->oplists.emplace_back(std::move(inputOp));
inputNames.emplace_back(_tempNet->tensorName[index]);
}
// Create compute node
for (auto opIndex : info.opList) {
std::unique_ptr<OpT> op(net->oplists()->GetAs<Op>(opIndex)->UnPack());
_tempNet->oplists.emplace_back(std::move(op));
}
// Get output names
std::vector<std::string> outputNames;
for (auto index : info.outputs) {
outputNames.emplace_back(_tempNet->tensorName[index]);
}
// Create Net Buffer
flatbuffers::FlatBufferBuilder builder(1024);
auto offset = Net::Pack(builder, _tempNet.get());
builder.Finish(offset);
_tempNet.reset();
return new StaticModule((const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), inputNames, outputNames);
}
Module* PipelineModule::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic) {
// Create Subgraph
auto net = GetNet(buffer);
auto subGraphs = net->subgraphs();
if (nullptr == net->oplists() || nullptr == net->tensorName()) {
MNN_ERROR("Invalid net, for null oplist or tensorName\n");
return nullptr;
}
if (!dynamic) {
if (nullptr == subGraphs) {
// Has no control flow, can just use static module
return new StaticModule(buffer, length, inputs, outputs);
}
}
auto subGraphMap = _createSubGraph(net, dynamic);
if (dynamic) {
// For dynamic mode
auto varMaps = Variable::loadMap(buffer, length);
std::vector<VARP> inputVars(inputs.size());
for (int i=0; i<inputs.size(); ++i) {
inputVars[i] = varMaps[inputs[i]];
}
std::vector<VARP> outputVars(outputs.size());
for (int i=0; i<outputs.size(); ++i) {
outputVars[i] = varMaps[outputs[i]];
}
return extract(inputVars, outputVars, false, subGraphMap);
}
std::set<int> inputIndexes;
std::set<int> outputIndexes;
std::map<std::string, int> inputsMap;
std::map<std::string, int> outputsMap;
for (int i=0; i<net->tensorName()->size(); ++i) {
auto tname = net->tensorName()->GetAsString(i)->str();
for (auto& s : inputs) {
if (tname == s) {
inputIndexes.emplace(i);
inputsMap.insert(std::make_pair(s, i));
break;
}
}
for (auto& s : outputs) {
if (tname == s) {
outputIndexes.emplace(i);
outputsMap.insert(std::make_pair(s, i));
break;
}
}
}
std::vector<int> inputIndexesVec(inputs.size());
for (int i=0; i<inputs.size(); ++i) {
inputIndexesVec[i] = inputsMap[inputs[i]];
}
std::vector<int> outputIndexesVec(outputs.size());
for (int i=0; i<outputs.size(); ++i) {
outputIndexesVec[i] = outputsMap[outputs[i]];
}
auto subModulesInfo = _createSubModuleInfo(net, inputIndexes, outputIndexes);
std::vector<std::shared_ptr<Module>> subModules(subModulesInfo.size());
for (int i=0; i<subModulesInfo.size(); ++i) {
subModules[i].reset(_createSubModule(net, subModulesInfo[i], subGraphMap));
}
auto result = new PipelineModule;
/**
Compute:
std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
std::vector<int> mInputIndexes;
std::vector<int> mOutputIndexes;
int mStackSize = 0;
*/
// Make Stack, first: origin, second: new
std::map<int, int> stackMap;
int stackIndex = 0;
for (auto& m : subModulesInfo) {
for (auto index : m.inputs) {
if (stackMap.find(index) == stackMap.end()) {
stackMap.insert(std::make_pair(index, stackIndex));
stackIndex++;
}
}
for (auto index : m.outputs) {
if (stackMap.find(index) == stackMap.end()) {
stackMap.insert(std::make_pair(index, stackIndex));
stackIndex++;
}
}
}
result->mStackSize = stackMap.size();
for (int i=0; i<subModulesInfo.size(); ++i) {
auto& info = subModulesInfo[i];
// Reindex stack index
std::vector<int> subInputs(info.inputs.size());
for (int i=0; i<info.inputs.size(); ++i) {
subInputs[i] = stackMap[info.inputs[i]];
}
std::vector<int> subOutputs(info.outputs.size());
for (int i=0; i<info.outputs.size(); ++i) {
subOutputs[i] = stackMap[info.outputs[i]];
}
result->mSubModules.emplace_back(std::make_tuple(subModules[i], subInputs, subOutputs));
}
for (int i=0; i<inputIndexesVec.size(); ++i) {
inputIndexesVec[i] = stackMap[inputIndexesVec[i]];
}
for (int i=0; i<outputIndexesVec.size(); ++i) {
outputIndexesVec[i] = stackMap[outputIndexesVec[i]];
}
result->mInputIndexes = std::move(inputIndexesVec);
result->mOutputIndexes = std::move(outputIndexesVec);
return result;
}
Module* PipelineModule::clone(CloneContext* ctx) const {
PipelineModule* module(new PipelineModule);
for (const auto& it : mSubModules) {
const std::shared_ptr<Module>& submodule = std::get<0>(it);
const std::vector<int>& input_indices = std::get<1>(it);
const std::vector<int>& output_indices = std::get<2>(it);
std::shared_ptr<Module> replica_submodule(submodule->clone(ctx));
module->mSubModules.push_back(
std::make_tuple(replica_submodule, input_indices, output_indices));
module->registerModel({replica_submodule});
}
module->mInputIndexes = mInputIndexes;
module->mOutputIndexes = mOutputIndexes;
module->mStackSize = mStackSize;
return this->cloneBaseTo(ctx, module);
}
} // namespace Express
} // namespace MNN

View File

@ -8,16 +8,20 @@
#ifndef PipelineModule_hpp #ifndef PipelineModule_hpp
#define PipelineModule_hpp #define PipelineModule_hpp
#include "Module.hpp" #include <MNN/expr/Module.hpp>
#include "NN.hpp" #include <MNN/expr/NN.hpp>
#include <MNN/expr/ExprCreator.hpp> #include <MNN/expr/ExprCreator.hpp>
namespace MNN { namespace MNN {
namespace Train { namespace Express {
class MNN_PUBLIC PipelineModule : public Module { class MNN_PUBLIC PipelineModule : public Module {
public: public:
typedef std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(Express::EXPRP)> Transformer; typedef std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(Express::EXPRP)> Transformer;
static Module* extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain); static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic = false);
static Module* extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph = {});
static Module* extractOrigin(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain) {
return extract(inputs, outputs, fortrain);
}
static bool turnQuantize(Module* module, const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor, NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage); static bool turnQuantize(Module* module, const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor, NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
void toTrainQuant(const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor, void toTrainQuant(const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor,
NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage); NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
@ -26,14 +30,18 @@ public:
std::vector<int> countOutputReference(std::vector<int> outputIndices); std::vector<int> countOutputReference(std::vector<int> outputIndices);
private: private:
PipelineModule(){}
PipelineModule(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, PipelineModule(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs,
const Transformer& transformFunction = {}); const Transformer& transformFunction = {});
Module* clone(CloneContext* ctx) const override;
std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules; std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
std::vector<Express::VARP> mStack;
std::vector<int> mInputIndexes; std::vector<int> mInputIndexes;
std::vector<int> mOutputIndexes; std::vector<int> mOutputIndexes;
int mStackSize = 0;
}; };
} // namespace Train } // namespace Express
} // namespace MNN } // namespace MNN
#endif #endif

View File

@ -0,0 +1,186 @@
//
// StaticModule.cpp
// MNN
//
// Created by MNN on b'2020/09/10'.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "StaticModule.hpp"
#include <MNN/expr/ExprCreator.hpp>
#include <MNN/AutoTime.hpp>
#include "core/TensorUtils.hpp"
#include "core/Session.hpp"
#include <MNN/expr/Executor.hpp>
#include <MNN/AutoTime.hpp>
#include <MNN/expr/ExecutorScope.hpp>
namespace MNN {
namespace Express {
StaticModule::StaticModule(const void* buffer, size_t length, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool shapeFix) : mInputs(inputs), mOutputs(outputs) {
mShapeFix = shapeFix;
mOutputNumbers = (int)outputs.size();
/** Compute:
std::vector<int, int> mOutputFromTensor;
std::vector<int, int> mOutputFromInput;
*/
for (int i=0; i<outputs.size(); ++i) {
auto& t = outputs[i];
bool fromInput = false;
for (int j=0; j<inputs.size(); ++j) {
if (inputs[j] == t) {
fromInput = true;
mOutputFromInput.emplace_back(std::make_pair(i, j));
break;
}
}
if (fromInput) {
continue;
}
mOutputFromTensor.emplace_back(i);
}
if (mOutputFromTensor.empty()) {
return;
}
mNet.reset(Interpreter::createFromBuffer(buffer, length));
#ifdef MNN_EXPR_ENABLE_PROFILER
mNet->setSessionMode(Interpreter::Session_Debug);
#else
mNet->setSessionMode(Interpreter::Session_Release);
#endif
if (mShapeFix) {
mNet->setSessionMode(Interpreter::Session_Input_Inside);
} else {
mNet->setSessionMode(Interpreter::Session_Input_User);
}
auto rt = Express::ExecutorScope::Current()->getRuntime();
// TODO: Add Config
ScheduleConfig config;
config.numThread = 1;
config.type = rt.first.begin()->first;
config.saveTensors = outputs;
mSession = mNet->createSession(config, rt);
mInputTensors.resize(inputs.size());
for (int i=0; i<inputs.size(); ++i) {
mInputTensors[i] = mNet->getSessionInput(mSession, inputs[i].c_str());
}
mOutputTensors.resize(mOutputFromTensor.size());
for (int i=0; i<mOutputFromTensor.size(); ++i) {
mOutputTensors[i] = mNet->getSessionOutput(mSession, outputs[mOutputFromTensor[i]].c_str());
}
}
StaticModule:: ~ StaticModule() {
// Do nothing
}
std::vector<Express::VARP> StaticModule::onForward(const std::vector<Express::VARP>& inputs) {
AUTOTIME;
std::vector<Express::VARP> outputs(mOutputNumbers);
for (auto& iter : mOutputFromInput) {
outputs[iter.first] = inputs[iter.second];
}
if (mOutputFromTensor.empty()) {
return outputs;
}
MNN_ASSERT(inputs.size() == mInputTensors.size());
for (int i=0; i<inputs.size(); ++i) {
auto info = inputs[i]->getInfo();
mInputTensors[i]->buffer().type = info->type;
auto des = TensorUtils::getDescribe(mInputTensors[i]);
if (info->order == Express::NCHW) {
des->dimensionFormat = MNN_DATA_FORMAT_NCHW;
}
if (info->order == Express::NHWC) {
des->dimensionFormat = MNN_DATA_FORMAT_NHWC;
}
if (info->order == Express::NC4HW4) {
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
}
mNet->resizeTensor(mInputTensors[i], info->dim);
}
if (!mShapeFix) {
for (int i=0; i<inputs.size(); ++i) {
mInputTensors[i]->buffer().host = (uint8_t*)inputs[i]->readMap<void>();
}
// FIXME: Use Interpreter's API
mSession->setNeedResize();
}
mNet->resizeSession(mSession);
if (mShapeFix) {
for (int i=0; i<inputs.size(); ++i) {
// For Shape only usage input, don't alloc memory
if (nullptr != mInputTensors[i]->host<void>()) {
::memcpy(mInputTensors[i]->host<void>(), inputs[i]->readMap<void>(), mInputTensors[i]->size());
}
}
}
#ifdef MNN_EXPR_ENABLE_PROFILER
auto globalExecutor = ExecutorScope::Current();
Timer cost;
TensorCallBackWithInfo beforeCallBack = [&cost] (const std::vector<Tensor*>&, const OperatorInfo* info) {
cost.reset();
return true;
};
TensorCallBackWithInfo afterCallBack = [&cost, globalExecutor] (const std::vector<Tensor*>&, const OperatorInfo* info) {
auto costTimes = (float)cost.durationInUs() / 1000.0f;
globalExecutor->addOpCostTime(info->type(), costTimes);
globalExecutor->addOpFlops(info->type(), info->flops());
return true;
};
mNet->runSessionWithCallBackInfo(mSession, beforeCallBack, afterCallBack);
#else
mNet->runSession(mSession);
#endif
for (int i=0; i<mOutputTensors.size(); ++i) {
Express::Variable::Info info;
info.dim = mOutputTensors[i]->shape();
info.type = mOutputTensors[i]->getType();
auto format = TensorUtils::getDescribe(mOutputTensors[i])->dimensionFormat;
info.order = Express::NHWC;
if (format == MNN_DATA_FORMAT_NCHW) {
info.order = Express::NCHW;
} else if (format == MNN_DATA_FORMAT_NC4HW4) {
info.order = Express::NC4HW4;
}
outputs[mOutputFromTensor[i]] = Express::Variable::create(Express::Expr::create(std::move(info), mOutputTensors[i]->host<void>(), Express::VARP::CONSTANT, true), 0);
//::memcpy(outputs[i]->writeMap<void>(), mOutputTensors[i]->host<void>(), mOutputTensors[i]->size());
}
return outputs;
}
Module* StaticModule::clone(CloneContext* ctx) const {
StaticModule* module(new StaticModule);
module->mInputs = mInputs;
module->mOutputs = mOutputs;
module->mShapeFix = mShapeFix;
module->mOutputNumbers = mOutputNumbers;
module->mOutputFromInput = mOutputFromInput;
module->mOutputFromTensor = mOutputFromTensor;
if (mOutputFromTensor.empty()) {
return this->cloneBaseTo(ctx, module);
}
module->mNet = mNet;
auto rt = Express::ExecutorScope::Current()->getRuntime();
ScheduleConfig config;
config.numThread = 1;
config.type = rt.first.begin()->first;
config.saveTensors = mOutputs;
module->mSession = module->mNet->createSession(config, rt);
module->mInputTensors.resize(mInputs.size());
module->mOutputTensors.resize(mOutputFromTensor.size());
for (int i=0; i<mInputs.size(); ++i) {
module->mInputTensors[i] =
module->mNet->getSessionInput(module->mSession, mInputs[i].c_str());
}
for (int i=0; i<mOutputFromTensor.size(); ++i) {
module->mOutputTensors[i] = module->mNet->getSessionOutput(
module->mSession, mOutputs[mOutputFromTensor[i]].c_str());
}
return this->cloneBaseTo(ctx, module);
}
}
}

View File

@ -0,0 +1,44 @@
//
// StaticModule.hpp
// MNN
//
// Created by MNN on b'2020/09/10'.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef StaticModule_hpp
#define StaticModule_hpp
#include <MNN/expr/Module.hpp>
#include <MNN/Interpreter.hpp>
namespace MNN {
namespace Express {
class StaticModule : public Module {
public:
StaticModule(const void* buffer, size_t length, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool shapeFix = false);
virtual ~ StaticModule();
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
private:
StaticModule() = default;
Module* clone(CloneContext* ctx) const override;
std::vector<std::string> mInputs;
std::vector<std::string> mOutputs;
std::shared_ptr<Interpreter> mNet;
Session* mSession;
std::vector<Tensor*> mInputTensors;
std::vector<Tensor*> mOutputTensors;
bool mShapeFix;
int mOutputNumbers;
// First: outputIndex, Second: outputTensor Index
std::vector<int> mOutputFromTensor;
// First: outputIndex, Second: input var index
std::vector<std::pair<int, int>> mOutputFromInput;
};
}
}
#endif

View File

@ -0,0 +1,186 @@
//
// WhileModule.cpp
// MNN
//
// Created by MNN on b'2020/09/10'.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "WhileModule.hpp"
#include <MNN/expr/ExprCreator.hpp>
#include "MNN_generated.h"
//#define MNN_OPEN_TIME_TRACE
#include <MNN/AutoTime.hpp>
namespace MNN {
namespace Express {
static int _findPos(const std::vector<std::string>& names, const std::string& key) {
for (int i=0; i<names.size(); ++i) {
if (names[i] == key) {
return i;
}
}
return -1;
}
WhileModule* WhileModule::create(const Op* op, const std::map<std::string, SubGraph>& subGraph) {
auto module = new WhileModule;
auto whileParam = op->main_as_WhileParam();
auto& body = subGraph.find(whileParam->body_graph()->str())->second;
auto& cond = subGraph.find(whileParam->cond_graph()->str())->second;
module->mBody = body.m;
module->mCond = cond.m;
/** Compute map index
int mCondInputNumber;
int mBodyInputNumber;
// First mCondInputs' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForCond;
// First mBodyInputs' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForBody;
std::vector<int> mOutputFromBody;
std::vector<std::pair<int, int>> mUpdateForCond;
std::vector<std::pair<int, int>> mUpdateForBody;
std::vector<std::pair<int, int>> mCondUpdateForCond;
std::vector<std::pair<int, int>> mCondUpdateForBody;
*/
// Map Inputs
module->mBodyInputNumber = body.inputs.size();
module->mCondInputNumber = cond.inputs.size();
for (int i=0; i<whileParam->aliases_inputs()->size(); ++i) {
auto index = i;
auto data = whileParam->aliases_inputs()->GetAs<StringVec>(i);
for (int s=0; s<data->data()->size(); ++s) {
auto name = data->data()->GetAsString(s)->str();
auto bodyInputPos = _findPos(body.inputs, name);
if (bodyInputPos >= 0) {
module->mInputForBody.emplace_back(std::make_pair(bodyInputPos, i));
}
auto condInputPos = _findPos(cond.inputs, name);
if (condInputPos >= 0) {
module->mInputForCond.emplace_back(std::make_pair(condInputPos, i));
}
}
}
// Map update
auto update = whileParam->aliases_updates();
std::map<int, int> replaceOutputs;
for (int i=0; i<update->size(); ++i) {
auto data = update->GetAs<StringVec>(i);
int bodyInputPos = -1;
int condInputPos = -1;
int bodyOutputPos = -1;
int condOutputPos = -1;
MNN_ASSERT(2 == data->data()->size());
auto outputName = data->data()->GetAsString(0)->str();
auto inputName = data->data()->GetAsString(1)->str();
bodyInputPos = _findPos(body.inputs, inputName);
condInputPos = _findPos(cond.inputs, inputName);
bodyOutputPos = _findPos(body.outputs, outputName);
condOutputPos = _findPos(cond.outputs, outputName);
auto updateBodyOutputPos = _findPos(body.outputs, inputName);
MNN_ASSERT(bodyOutputPos == -1 || condOutputPos == -1);
if (condOutputPos >= 0) {
if (bodyInputPos >= 0) {
module->mCondUpdateForBody.emplace_back(std::make_pair(bodyInputPos, condOutputPos));
}
if (condInputPos >= 0) {
module->mCondUpdateForCond.emplace_back(std::make_pair(condInputPos, condOutputPos));
}
}
if (bodyOutputPos >= 0) {
if (bodyInputPos >= 0) {
module->mUpdateForBody.emplace_back(std::make_pair(bodyInputPos, bodyOutputPos));
}
if (condInputPos >= 0) {
module->mUpdateForCond.emplace_back(std::make_pair(condInputPos, bodyOutputPos));
}
if (updateBodyOutputPos >= 0) {
replaceOutputs.insert(std::make_pair(updateBodyOutputPos, bodyOutputPos));
}
}
}
// Map outputs
auto output = whileParam->aliases_outputs();
for (int i=0; i<output->size(); ++i) {
auto data = output->GetAsString(i);
auto pos = _findPos(body.outputs, data->str());
MNN_ASSERT(pos >= 0);
if (replaceOutputs.find(pos) != replaceOutputs.end()) {
pos = replaceOutputs[pos];
}
module->mOutputFromBody.emplace_back(pos);
}
return module;
}
std::vector<Express::VARP> WhileModule::onForward(const std::vector<Express::VARP>& inputsI) {
std::vector<Express::VARP> condInputs(mCondInputNumber);
std::vector<Express::VARP> bodyInputs(mBodyInputNumber);
auto& inputs = inputsI;
for (auto& p : mInputForCond) {
condInputs[p.first] = inputs[p.second];
}
for (auto& p : mInputForBody) {
bodyInputs[p.first] = inputs[p.second];
}
std::vector<Express::VARP> outputs(mOutputFromBody.size());
while (true) {
auto res = mCond->onForward(condInputs)[0];
auto resPtr = res->readMap<int>();
if (resPtr[0] <= 0) {
break;
}
auto bodyOutputs = mBody->onForward(bodyInputs);
Express::Variable::prepareCompute(bodyOutputs);
for (int i=0; i<bodyOutputs.size(); ++i) {
auto p = bodyOutputs[i];
if (p->expr().first->get() != nullptr) {
auto ptr = p->readMap<void>();
auto info = p->getInfo();
auto newV = Express::_Input(info->dim, info->order, info->type);
if (nullptr != ptr) {
::memcpy(newV->writeMap<void>(), ptr, info->type.bytes() * info->size);
}
bodyOutputs[i] = newV;
}
}
for (int i=0; i<mOutputFromBody.size(); ++i) {
outputs[i] = bodyOutputs[mOutputFromBody[i]];
}
for (auto& p : mUpdateForCond) {
condInputs[p.first] = bodyOutputs[p.second];
}
for (auto& p : mUpdateForBody) {
bodyInputs[p.first] = bodyOutputs[p.second];
}
for (auto& p : mCondUpdateForCond) {
condInputs[p.first] = res;
}
for (auto& p : mCondUpdateForBody) {
bodyInputs[p.first] = res;
}
}
return outputs;
}
Module* WhileModule::clone(CloneContext* ctx) const {
WhileModule* module(new WhileModule);
module->mCondInputNumber = mCondInputNumber;
module->mBodyInputNumber = mBodyInputNumber;
module->mInputForCond = mInputForCond;
module->mInputForBody = mInputForBody;
module->mOutputFromBody = mOutputFromBody;
module->mUpdateForCond = mUpdateForCond;
module->mUpdateForBody = mUpdateForBody;
module->mCondUpdateForCond = mCondUpdateForCond;
module->mCondUpdateForBody = mCondUpdateForBody;
module->mCond.reset(mCond->clone(ctx));
module->mBody.reset(mBody->clone(ctx));
return this->cloneBaseTo(ctx, module);
}
};
};

View File

@ -0,0 +1,46 @@
//
// WhileModule.hpp
// MNN
//
// Created by MNN on b'2020/09/10'.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef WhileModule_hpp
#define WhileModule_hpp
#include <MNN/expr/Module.hpp>
namespace MNN {
namespace Express {
class WhileModule : public Module {
public:
virtual ~ WhileModule() {
// Do nothing
}
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
static WhileModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
private:
WhileModule(){}
Module* clone(CloneContext* ctx) const override;
int mCondInputNumber;
int mBodyInputNumber;
// First mCondInputs' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForCond;
// First mBodyInputs' index, Second: inputs's index
std::vector<std::pair<int, int>> mInputForBody;
std::vector<int> mOutputFromBody;
std::vector<std::pair<int, int>> mUpdateForCond;
std::vector<std::pair<int, int>> mUpdateForBody;
std::vector<std::pair<int, int>> mCondUpdateForCond;
std::vector<std::pair<int, int>> mCondUpdateForBody;
std::shared_ptr<Module> mCond;
std::shared_ptr<Module> mBody;
};
}
}
#endif

View File

@ -11,6 +11,7 @@
#include <functional> #include <functional>
#include <map> #include <map>
#include <memory>
#include <string> #include <string>
#include <MNN/ErrorCode.hpp> #include <MNN/ErrorCode.hpp>
#include <MNN/MNNForwardType.h> #include <MNN/MNNForwardType.h>
@ -67,6 +68,7 @@ class Session;
struct Content; struct Content;
class Tensor; class Tensor;
class Backend; class Backend;
class Runtime;
class MNN_PUBLIC OperatorInfo { class MNN_PUBLIC OperatorInfo {
struct Info; struct Info;
@ -89,6 +91,7 @@ protected:
typedef std::function<bool(const std::vector<Tensor*>&, const std::string& /*opName*/)> TensorCallBack; typedef std::function<bool(const std::vector<Tensor*>&, const std::string& /*opName*/)> TensorCallBack;
typedef std::function<bool(const std::vector<Tensor*>&, const OperatorInfo*)> TensorCallBackWithInfo; typedef std::function<bool(const std::vector<Tensor*>&, const OperatorInfo*)> TensorCallBackWithInfo;
typedef std::pair<std::map<MNNForwardType, std::shared_ptr<Runtime>>, std::shared_ptr<Runtime>> RuntimeInfo;
/** net data holder. multiple sessions could share same net. */ /** net data holder. multiple sessions could share same net. */
class MNN_PUBLIC Interpreter { class MNN_PUBLIC Interpreter {
@ -108,7 +111,43 @@ public:
static Interpreter* createFromBuffer(const void* buffer, size_t size); static Interpreter* createFromBuffer(const void* buffer, size_t size);
~Interpreter(); ~Interpreter();
enum SessionMode {
/** About CallBack, Default Session_Debug*/
/** runSessionWithCallBack is allowed and can get internal op info*/
Session_Debug = 0,
/** runSessionWithCallBack is not valid and can't get any info of op in session*/
Session_Release = 1,
/** About input tenosr, Default Session_Input_Inside*/
/** The input tensor is alloced by session, input data after session resized*/
Session_Input_Inside = 2,
/** The input tensor is alloced by user, set input data before session resize*/
Session_Input_User = 3,
};
/**
* @brief The API shoud be called before create session.
* @param mode session mode
* @return void
*/
void setSessionMode(SessionMode mode);
/**
* @brief The API shoud be called before create session.
* If the cache exist, try to load cache from file.
* After createSession, try to save cache to file.
* @param cacheFile cache file name
* @param keySize the first `keySize` bytes used as the key to check if the `cacheFile` exists.
* @return void
*/
void setCacheFile(const char* cacheFile, size_t keySize = 128);
public: public:
/**
* @brief create runtimeInfo seperately with schedule config.
* @param config session schedule configs.
*/
static RuntimeInfo createRuntime(const std::vector<ScheduleConfig>& configs);
/** /**
* @brief create session with schedule config. created session will be managed in net. * @brief create session with schedule config. created session will be managed in net.
* @param config session schedule config. * @param config session schedule config.
@ -116,6 +155,13 @@ public:
*/ */
Session* createSession(const ScheduleConfig& config); Session* createSession(const ScheduleConfig& config);
/**
* @brief create session with schedule config and user-specified runtime.
* @param config session schedule config, runtime runtimeInfo used by the created session.
* @return created session if success, NULL otherwise.
*/
Session* createSession(const ScheduleConfig& config, const RuntimeInfo& runtime);
/** /**
* @brief create multi-path session with schedule configs. created session will be managed in net. * @brief create multi-path session with schedule configs. created session will be managed in net.
* @param configs session schedule configs. * @param configs session schedule configs.
@ -123,6 +169,14 @@ public:
*/ */
Session* createMultiPathSession(const std::vector<ScheduleConfig>& configs); Session* createMultiPathSession(const std::vector<ScheduleConfig>& configs);
/**
* @brief create multi-path session with schedule configs and user-specified runtime.
created session will be managed in net.
* @param configs session schedule configs.
* @return created session if success, NULL otherwise.
*/
Session* createMultiPathSession(const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtime);
/** /**
* @brief release session. * @brief release session.
* @param session given session. * @param session given session.
@ -204,17 +258,39 @@ public:
*/ */
Tensor* getSessionOutput(const Session* session, const char* name); Tensor* getSessionOutput(const Session* session, const char* name);
enum SessionInfoCode {
/** memory session used in MB, float* */
MEMORY = 0,
/** float operation needed in session in M, float* */
FLOPS = 1,
/** Backends in session in M, int*, length >= the configs when create session */
BACKENDS = 2,
ALL
};
/** /**
* @brief get all input tensors. * @brief get session info
* @param session given session. * @param session given session.
* @return all input tensors mapped with name. * @param code given info code.
* @param void* given info ptr, see SessionInfoCode for detail
* @return true if support the code, false otherwise.
*/ */
const std::map<std::string, Tensor*>& getSessionOutputAll(const Session* session) const; bool getSesionInfo(const Session* session, SessionInfoCode code, void* ptr);
/** /**
* @brief get all output tensors. * @brief get all output tensors.
* @param session given session. * @param session given session.
* @return all output tensors mapped with name. * @return all output tensors mapped with name.
*/ */
const std::map<std::string, Tensor*>& getSessionOutputAll(const Session* session) const;
/**
* @brief get all input tensors.
* @param session given session.
* @return all input tensors mapped with name.
*/
const std::map<std::string, Tensor*>& getSessionInputAll(const Session* session) const; const std::map<std::string, Tensor*>& getSessionInputAll(const Session* session) const;
public: public:

View File

@ -38,13 +38,7 @@
} \ } \
} }
#else #else
#define MNN_ASSERT(x) \ #define MNN_ASSERT(x)
{ \
int res = (x); \
if (!res) { \
MNN_ERROR("Error for %d\n", __LINE__); \
} \
}
#endif #endif
#define FUNC_PRINT(x) MNN_PRINT(#x "=%d in %s, %d \n", x, __func__, __LINE__); #define FUNC_PRINT(x) MNN_PRINT(#x "=%d in %s, %d \n", x, __func__, __LINE__);

View File

@ -23,8 +23,8 @@ typedef enum {
/*Hand write metal*/ /*Hand write metal*/
MNN_FORWARD_METAL = 1, MNN_FORWARD_METAL = 1,
/*Use IOS's MPS instead of hand-write metal, Not Support yet*/ /*NVIDIA GPU API*/
MNN_FORWARD_MPS = 2, MNN_FORWARD_CUDA = 2,
/*Android / Common Device GPU API*/ /*Android / Common Device GPU API*/
MNN_FORWARD_OPENCL = 3, MNN_FORWARD_OPENCL = 3,
@ -41,13 +41,13 @@ typedef enum {
MNN_FORWARD_USER_3 = 11, MNN_FORWARD_USER_3 = 11,
MNN_FORWARD_ALL, MNN_FORWARD_ALL,
/* Apply arm extension instruction set to accelerate some Ops, this forward type /* Apply arm extension instruction set to accelerate some Ops, this forward type
is only used in MNN internal, and will be active automatically when user set forward type is only used in MNN internal, and will be active automatically when user set forward type
to be MNN_FORWARD_CPU and extension instruction set is valid on hardware. to be MNN_FORWARD_CPU and extension instruction set is valid on hardware.
*/ */
MNN_FORWARD_CPU_EXTENSION MNN_FORWARD_CPU_EXTENSION
} MNNForwardType; } MNNForwardType;
#ifdef __cplusplus #ifdef __cplusplus
namespace MNN { namespace MNN {

View File

@ -12,6 +12,7 @@
#include <vector> #include <vector>
#include <MNN/HalideRuntime.h> #include <MNN/HalideRuntime.h>
#include <MNN/MNNDefine.h> #include <MNN/MNNDefine.h>
#define MNN_MAX_TENSOR_DIM 6
namespace MNN { namespace MNN {

View File

@ -10,6 +10,7 @@
#include <MNN/ErrorCode.hpp> #include <MNN/ErrorCode.hpp>
#include <MNN/expr/Expr.hpp> #include <MNN/expr/Expr.hpp>
#include <MNN/Tensor.hpp> #include <MNN/Tensor.hpp>
#include <MNN/Interpreter.hpp>
#include <vector> #include <vector>
#include <mutex> #include <mutex>
#include <set> #include <set>
@ -17,41 +18,19 @@
namespace MNN { namespace MNN {
class Backend; class Backend;
class Execution; class Execution;
class Runtime;
struct Op;
namespace Express { namespace Express {
class MNN_PUBLIC Executor { class MNN_PUBLIC Executor {
public: public:
class ComputeCache { class ComputeCache;
public: struct Unit;
void setShapeDirty(int offset, Variable::Info* info); static void setShapeDirty(ComputeCache* cache);
void setContentDirty(); static void setContentDirty(ComputeCache* cache);
void setContentReady(); static void* mapOutput(ComputeCache* cache, int offset, Tensor* dest);
void syncInput(int offset, const Variable::Info* info);
void syncOutput(int offset, Variable::Info* info);
struct TensorContent {
std::shared_ptr<Tensor> tensor;
int refCount = 0;
void reset();
bool aliveOutside = false;
};
struct Unit;
virtual ~ ComputeCache() {}
ComputeCache() {}
virtual ErrorCode compute() = 0;
virtual ErrorCode resize() = 0;
protected:
// Get the index tensor with the need of needBackend
// If the Tensor don't belong to the backend, need use needBackend to alloc it and return
virtual Tensor* getTensor(int index, bool host) = 0;
void _setShapeDirty();
friend class Executor;
bool mContentDirty = true;
bool mShapeDirty = true;
};
struct Requirement { struct Requirement {
std::vector<bool> contentNeedContent; std::vector<bool> contentNeedContent;
std::vector<bool> shapeNeedContent; std::vector<bool> shapeNeedContent;
std::vector<bool> supportError;
}; };
~Executor(); ~Executor();
Requirement getRequirement(Expr* expr) const; Requirement getRequirement(Expr* expr) const;
@ -65,25 +44,27 @@ public:
}; };
void gc(GCFlag flag = FULL); void gc(GCFlag flag = FULL);
static std::shared_ptr<Executor> getGlobalExecutor(); static std::shared_ptr<Executor> getGlobalExecutor();
static std::shared_ptr<Executor> newExecutor(MNNForwardType type,
const BackendConfig& config,
int numberThread);
void resetProfile(); void resetProfile();
void dumpProfile(); void dumpProfile();
void addOpCostTime(int op, float costTime); void addOpCostTime(int op, float costTime);
void addOpCostTime(const std::string& type, float costTime);
void addOpFlops(const std::string& type, float flops);
class Profiler; class Profiler;
static RuntimeInfo getRuntime();
private: private:
void _createSingle(EXPRP expr); void _makeCache(const std::vector<EXPRP>& outputs, bool forceCPU);
void _create(const std::vector<EXPRP>& outputs, std::set<std::shared_ptr<Executor::ComputeCache>>&& inputCaches, std::vector<ComputeCache::TensorContent>&& tensors, bool forceCPU); void _create(const std::vector<EXPRP>& outputs, std::set<std::shared_ptr<Executor::ComputeCache>>&& inputCaches, std::set<std::shared_ptr<Expr::Inside>>&& inputNode, bool forceCPU);
void _addToCache(const std::vector<std::shared_ptr<ComputeCache>>& caches); void _visit(EXPRP expr, std::set<std::shared_ptr<Executor::ComputeCache>>& inputCaches, std::set<std::shared_ptr<Expr::Inside>>& inputNode);
void _resetCache();
void _visit(EXPRP expr, std::set<std::shared_ptr<Executor::ComputeCache>>& inputCaches, std::vector<ComputeCache::TensorContent>& tensors);
Executor(std::shared_ptr<Backend> backend); Executor(std::shared_ptr<Runtime> backend, MNNForwardType type);
std::shared_ptr<Backend> mBackend; std::pair<std::shared_ptr<Runtime>, MNNForwardType> mRuntime;
std::shared_ptr<Backend> mBackupBackend; std::pair<std::shared_ptr<Runtime>, MNNForwardType> mBackupRuntime;
std::mutex mMutex; std::mutex mMutex;
std::vector<std::shared_ptr<Tensor>> mStack;
std::vector<Tensor*> mStackInputs;
std::vector<Tensor*> mStackOutputs;
std::shared_ptr<Profiler> mProfiler; std::shared_ptr<Profiler> mProfiler;
}; };
} // namespace Express } // namespace Express

View File

@ -0,0 +1,33 @@
//
// ExecutorScope.hpp
// MNN
//
// Created by MNN on 2020/10/26.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef MNN_EXPR_EXECUTOR_SCOPE_HPP_
#define MNN_EXPR_EXECUTOR_SCOPE_HPP_
#include <MNN/expr/Executor.hpp>
namespace MNN {
namespace Express {
struct ExecutorScope final {
public:
ExecutorScope() = delete;
explicit ExecutorScope(const ExecutorScope&) = delete;
explicit ExecutorScope(const std::shared_ptr<Executor>& current);
explicit ExecutorScope(const std::string& scope_name,
const std::shared_ptr<Executor>& current);
virtual ~ExecutorScope();
static const std::shared_ptr<Executor> Current();
};
} // namespace MNN
} // namespace Express
#endif // MNN_EXPR_EXECUTOR_SCOPE_HPP_

View File

@ -87,6 +87,7 @@ public:
}; };
bool fix(InputType type) const; bool fix(InputType type) const;
private: private:
friend class Variable;
std::shared_ptr<Variable> mContent; std::shared_ptr<Variable> mContent;
}; };
inline bool operator==(Variable* src, VARP dst) { inline bool operator==(Variable* src, VARP dst) {
@ -107,7 +108,6 @@ public:
INTS dim; INTS dim;
halide_type_t type; halide_type_t type;
int size; int size;
void* ptr = nullptr;
void syncSize(); void syncSize();
}; };
const std::string& name() const; const std::string& name() const;
@ -173,7 +173,7 @@ private:
class MNN_PUBLIC Expr { class MNN_PUBLIC Expr {
public: public:
struct Inside; struct Inside;
static EXPRP create(Variable::Info&& info); static EXPRP create(Variable::Info&& info, const void* ptr, VARP::InputType type, bool copy = true);
static EXPRP create(const OpT* op, std::vector<VARP> inputs, int outputSize = 1); static EXPRP create(const OpT* op, std::vector<VARP> inputs, int outputSize = 1);
static EXPRP create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize = 1); static EXPRP create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize = 1);
static EXPRP create(std::unique_ptr<OpT>&& op, std::vector<VARP> inputs, int outputSize = 1) { static EXPRP create(std::unique_ptr<OpT>&& op, std::vector<VARP> inputs, int outputSize = 1) {
@ -188,7 +188,7 @@ public:
return mInputs; return mInputs;
} }
int outputSize() const { int outputSize() const {
return mOutputNames.size(); return (int)mOutputNames.size();
} }
static void replace(EXPRP oldExpr, EXPRP newExpr); static void replace(EXPRP oldExpr, EXPRP newExpr);
bool requireInfo(); bool requireInfo();

View File

@ -8,9 +8,14 @@
#ifndef MNN_Train_Module_hpp #ifndef MNN_Train_Module_hpp
#define MNN_Train_Module_hpp #define MNN_Train_Module_hpp
#include <vector>
#include <unordered_map>
#include <MNN/expr/Expr.hpp> #include <MNN/expr/Expr.hpp>
namespace MNN { namespace MNN {
namespace Train { namespace Express {
class MNN_PUBLIC Module { class MNN_PUBLIC Module {
public: public:
Module() = default; Module() = default;
@ -21,9 +26,6 @@ public:
bool loadParameters(const std::vector<Express::VARP>& parameters); bool loadParameters(const std::vector<Express::VARP>& parameters);
void setIsTraining(const bool isTraining); void setIsTraining(const bool isTraining);
bool getIsTraining(); bool getIsTraining();
static std::shared_ptr<Module> transform(const std::vector<Express::VARP>& inputs,
const std::vector<Express::VARP>& outputs);
void clearCache(); void clearCache();
const std::string& name() const { const std::string& name() const {
@ -38,12 +40,45 @@ public:
void setType(std::string type) { void setType(std::string type) {
mType = std::move(type); mType = std::move(type);
} }
// Return the parameter index
int addParameter(Express::VARP parameter);
void setParameter(Express::VARP parameter, int index);
static Module* createEmpty(const std::vector<Express::VARP>& parameters);
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic = false);
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const char* fileName, bool dynamic = false);
static Module* clone(const Module* module, const bool shareParams = false);
class CloneContext {
public:
CloneContext() = default;
explicit CloneContext(const bool shareParams)
: mShareParams(shareParams) {}
virtual ~CloneContext() = default;
const bool shareParams() const { return mShareParams; }
EXPRP getOrClone(const EXPRP expr);
VARP getOrClone(const VARP var);
private:
bool mShareParams = false;
std::unordered_map<const Expr*, EXPRP> mExprMap;
std::unordered_map<const Variable*, VARP> mVarMap;
};
virtual Module* clone(CloneContext* ctx) const {
return nullptr;
}
protected: protected:
void registerModel(const std::vector<std::shared_ptr<Module>>& children); void registerModel(const std::vector<std::shared_ptr<Module>>& children);
void addParameter(Express::VARP parameter);
virtual void onClearCache() { virtual void onClearCache() {
} }
Module* cloneBaseTo(CloneContext* ctx, Module* module) const;
private: private:
void _collectParameters(std::vector<Express::VARP>& result) const; void _collectParameters(std::vector<Express::VARP>& result) const;
std::vector<std::shared_ptr<Module>> mChildren; std::vector<std::shared_ptr<Module>> mChildren;
@ -52,6 +87,13 @@ private:
std::string mName; std::string mName;
std::string mType; std::string mType;
}; };
struct SubGraph {
std::vector<std::string> inputs;
std::vector<std::string> outputs;
std::shared_ptr<Module> m;
};
} // namespace Train } // namespace Train
} // namespace MNN } // namespace MNN

View File

@ -9,11 +9,10 @@
#ifndef MNN_Train_NN_hpp #ifndef MNN_Train_NN_hpp
#define MNN_Train_NN_hpp #define MNN_Train_NN_hpp
#include <MNN/expr/ExprCreator.hpp> #include <MNN/expr/ExprCreator.hpp>
#include "Distributions.hpp" #include <MNN/expr/Module.hpp>
#include "Module.hpp"
#include <vector> #include <vector>
namespace MNN { namespace MNN {
namespace Train { namespace Express {
class Initializer; class Initializer;
class MNN_PUBLIC NN { class MNN_PUBLIC NN {
@ -29,7 +28,7 @@ public:
}; };
enum FeatureScaleStatMethod { enum FeatureScaleStatMethod {
PerTensor = 0, PerTensor = 0,
PerChannel = 1 PerChannel = 1 // Depercerate
}; };
/* Unlike enum in class, class in class need be dllimport or dllexport explcility. /* Unlike enum in class, class in class need be dllimport or dllexport explcility.
Compiling in other system will not be affected. Compiling in other system will not be affected.
@ -86,7 +85,7 @@ public:
static ConvParameters ExtractConvolution(Express::EXPRP expr); static ConvParameters ExtractConvolution(Express::EXPRP expr);
// Extract BatchNormal and Dropout // Extract BatchNormal and Dropout
static Module* ExtractNotRunableOp(Express::EXPRP expr); static Module* ExtractNotRunableOp(Express::EXPRP expr, const std::map<std::string, SubGraph>& subgraphs);
}; };
}; };

View File

@ -31,25 +31,30 @@ MNN_PUBLIC VARP _Const(const void* ptr, INTS shape = {}, Dimensionformat format
MNN_PUBLIC VARP _TrainableParam(float value, INTS dims, Dimensionformat format); MNN_PUBLIC VARP _TrainableParam(float value, INTS dims, Dimensionformat format);
MNN_PUBLIC VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format, MNN_PUBLIC VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format,
halide_type_t type = halide_type_of<float>()); halide_type_t type = halide_type_of<float>());
MNN_PUBLIC VARP _InnerProduct(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS outputShape);
MNN_PUBLIC VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1}, MNN_PUBLIC VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1},
INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}); INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
MNN_PUBLIC VARP _Conv(float weight, float bias, VARP x, INTS channel, INTS kernelSize, PaddingMode pad = VALID, MNN_PUBLIC VARP _Conv(float weight, float bias, VARP x, INTS channel, INTS kernelSize, PaddingMode pad = VALID,
INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1); INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1);
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize, MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false); PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false, int nbits = 8);
MNN_PUBLIC VARP _Conv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize, MNN_PUBLIC VARP _Conv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false); PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
MNN_PUBLIC VARP _Deconv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1}, MNN_PUBLIC VARP _Deconv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1},
INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}); INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
MNN_PUBLIC VARP _Deconv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
MNN_PUBLIC VARP _MaxPool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0}); MNN_PUBLIC VARP _MaxPool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0});
MNN_PUBLIC VARP _AvePool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0}); MNN_PUBLIC VARP _AvePool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0});
MNN_PUBLIC VARP _Reshape(VARP x, INTS shape, Dimensionformat original_format = NHWC); MNN_PUBLIC VARP _Reshape(VARP x, INTS shape, Dimensionformat original_format = NCHW);
MNN_PUBLIC VARP _Reshape(VARP x, VARP shape); MNN_PUBLIC VARP _Reshape(VARP x, VARP shape);
MNN_PUBLIC VARP _Scale(VARP x, int channels, std::vector<float>&& scales, std::vector<float>&& bias); MNN_PUBLIC VARP _Scale(VARP x, int channels, std::vector<float>&& scales, std::vector<float>&& bias);
MNN_PUBLIC VARP _Relu(VARP x, float slope = 0.0f); MNN_PUBLIC VARP _Relu(VARP x, float slope = 0.0f);
MNN_PUBLIC VARP _Relu6(VARP x); MNN_PUBLIC VARP _Relu6(VARP x, float minValue = 0.0f, float maxValue = 6.0f);
MNN_PUBLIC VARP _PRelu(VARP x, std::vector<float> &&slopes); MNN_PUBLIC VARP _PRelu(VARP x, std::vector<float> &&slopes);
MNN_PUBLIC VARP _Softmax(VARP logits, int axis = -1); MNN_PUBLIC VARP _Softmax(VARP logits, int axis = -1);
MNN_PUBLIC VARP _Softplus(VARP features); MNN_PUBLIC VARP _Softplus(VARP features);
@ -76,7 +81,7 @@ MNN_PUBLIC VARP _Pad(VARP x, VARP paddings, PadValueMode mode = CONSTANT);
MNN_PUBLIC VARP _ExpandDims(VARP input, int axis); MNN_PUBLIC VARP _ExpandDims(VARP input, int axis);
MNN_PUBLIC VARP _ExpandDims(VARP input, VARP axis); MNN_PUBLIC VARP _ExpandDims(VARP input, VARP axis);
MNN_PUBLIC VARP _Shape(VARP input); MNN_PUBLIC VARP _Shape(VARP input, bool nchw = false);
MNN_PUBLIC VARP _Stack(VARPS values, int axis=0); MNN_PUBLIC VARP _Stack(VARPS values, int axis=0);
enum InterpolationMethod {BILINEAR, NEAREST}; enum InterpolationMethod {BILINEAR, NEAREST};
MNN_PUBLIC VARP _CropAndResize(VARP image, VARP boxes, VARP box_ind, VARP crop_size, MNN_PUBLIC VARP _CropAndResize(VARP image, VARP boxes, VARP box_ind, VARP crop_size,
@ -92,6 +97,7 @@ MNN_PUBLIC VARP _GatherND(VARP params, VARP indices);
MNN_PUBLIC VARP _Selu(VARP features, float scale, float alpha); MNN_PUBLIC VARP _Selu(VARP features, float scale, float alpha);
MNN_PUBLIC VARP _Size(VARP input); MNN_PUBLIC VARP _Size(VARP input);
MNN_PUBLIC VARP _Elu(VARP features, float alpha=1.0); MNN_PUBLIC VARP _Elu(VARP features, float alpha=1.0);
MNN_PUBLIC VARP _Threshold(VARP features, float alpha=1.0);
MNN_PUBLIC VARP _MatrixBandPart(VARP input, VARP num_lower, VARP num_upper); MNN_PUBLIC VARP _MatrixBandPart(VARP input, VARP num_lower, VARP num_upper);
MNN_PUBLIC std::vector<VARP> _Moments(VARP x, INTS axis, VARP shift, bool keepDims); MNN_PUBLIC std::vector<VARP> _Moments(VARP x, INTS axis, VARP shift, bool keepDims);
MNN_PUBLIC VARP _SetDiff1D(VARP x, VARP y); MNN_PUBLIC VARP _SetDiff1D(VARP x, VARP y);
@ -123,7 +129,8 @@ MNN_PUBLIC VARP _ZeroGrad(VARP x);
// Int8 Inference // Int8 Inference
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize, MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu); PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, int nbits = 8);
MNN_PUBLIC VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim);
MNN_PUBLIC VARP _FloatToInt8(VARP x, VARP scale, char minValue, char maxValue); MNN_PUBLIC VARP _FloatToInt8(VARP x, VARP scale, char minValue, char maxValue);
MNN_PUBLIC VARP _Int8ToFloat(VARP x, VARP scale); MNN_PUBLIC VARP _Int8ToFloat(VARP x, VARP scale);

102
include/MNN/expr/Scope.hpp Normal file
View File

@ -0,0 +1,102 @@
//
// RuntimeScope.hpp
// MNN
//
// Created by MNN on 2020/10/26.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef MNN_EXPR_SCOPE_HPP_
#define MNN_EXPR_SCOPE_HPP_
#include <cstdio>
#include <vector>
#include <string>
#include <mutex>
#include <MNN/Interpreter.hpp>
namespace MNN {
namespace Express {
template <typename T>
class Scope {
public:
Scope();
virtual ~Scope() = default;
struct ScopedContent {
std::string scope_name;
T content;
};
void EnterScope(const ScopedContent& current);
void EnterScope(const T& current);
void EnterScope(const std::string& scope_name, const T& current);
void ExitScope();
const ScopedContent& Current() const;
int ScopedLevel() const { return scoped_level_; }
private:
std::string MakeScopeName(const std::string& prefix, int level) const;
mutable std::mutex mutex_;
int scoped_level_ = 0;
std::vector<ScopedContent> scoped_contents_;
};
template <typename T>
Scope<T>::Scope() : scoped_level_(0) {
}
template <typename T>
void Scope<T>::EnterScope(const ScopedContent& current) {
std::lock_guard<std::mutex> lock(mutex_);
++scoped_level_;
scoped_contents_.push_back(current);
}
template <typename T>
void Scope<T>::EnterScope(const T& current) {
EnterScope("scope", current);
}
template <typename T>
void Scope<T>::EnterScope(const std::string& scope_name,
const T& current) {
std::lock_guard<std::mutex> lock(mutex_);
int scoped_level = ScopedLevel();
std::string name = MakeScopeName(scope_name, scoped_level++);
ScopedContent content{name, current};
++scoped_level_;
scoped_contents_.push_back(content);
}
template <typename T>
void Scope<T>::ExitScope() {
std::lock_guard<std::mutex> lock(mutex_);
--scoped_level_;
scoped_contents_.resize(scoped_level_);
}
template <typename T>
const typename Scope<T>::ScopedContent& Scope<T>::Current() const {
std::lock_guard<std::mutex> lock(mutex_);
MNN_CHECK(scoped_contents_.size() > 0, "Scope level should not be 0.");
return scoped_contents_.back();
}
template <typename T>
std::string Scope<T>::MakeScopeName(const std::string& prefix,
int level) const {
char s[16];
snprintf(s, 16, "%d", level);
return prefix + "/" + std::string(s);
}
} // namespace Express
} // namespace MNN
#endif // MNN_EXPR_SCOPE_HPP_

View File

@ -1,12 +1,14 @@
# MNN_Windows # MNN
# |------- MNN_Windows_lib # |-- Debug
# |---------- Dynamic_Library # | |--- MD
# |---------- Static_Library # | |--- MT
# |------- MNN_Windows_tools # |-- Release
# |--- MD
# |--- MT
$erroractionpreference = "stop" $erroractionpreference = "stop"
Set-Variable -Name WINDOWS_PACKAGE_NAME -Value "MNN_Windows" Set-Variable -Name WINDOWS_PACKAGE_NAME -Value "MNN"
#clear and create package directory #clear and create package directory
powershell ./schema/generate.ps1 powershell ./schema/generate.ps1
@ -14,32 +16,50 @@ Set-Variable -Name WINDOWS_PACKAGE_PATH -Value "$(pwd)\$WINDOWS_PACKAGE_NAME"
Remove-Item $WINDOWS_PACKAGE_PATH -Recurse -ErrorAction Ignore Remove-Item $WINDOWS_PACKAGE_PATH -Recurse -ErrorAction Ignore
mkdir $WINDOWS_PACKAGE_PATH\ mkdir $WINDOWS_PACKAGE_PATH\
cd $WINDOWS_PACKAGE_PATH cd $WINDOWS_PACKAGE_PATH
mkdir -p MNN_Windows_lib\Dynamic_Library mkdir -p Debug\MD
mkdir -p MNN_Windows_lib\Static_Library mkdir -p Debug\MT
mkdir MNN_Windows_tools mkdir -p Release\MD
mkdir -p Release\MT
cd .. cd ..
Remove-Item build -Recurse -ErrorAction Ignore Remove-Item build -Recurse -ErrorAction Ignore
mkdir build mkdir build
cd build pushd build
# tools without dependency, static library without sep_build # tools without dependency, static library without sep_build
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_BUILD_CONVERTER=ON -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_TRAIN=ON -DMNN_BUILD_DEMO=ON -DMNN_BUILD_QUANTOOLS=ON -DMNN_EVALUATION=ON .. #cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_BUILD_CONVERTER=ON -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_TRAIN=ON -DMNN_BUILD_DEMO=ON -DMNN_BUILD_QUANTOOLS=ON -DMNN_EVALUATION=ON ..
ninja #ninja
pushd $WINDOWS_PACKAGE_PATH #pushd $WINDOWS_PACKAGE_PATH
cp ..\build\*.exe MNN_Windows_tools #cp ..\build\*.exe MNN_Windows_tools
cp ..\build\*.pdb MNN_Windows_tools #cp ..\build\*.pdb MNN_Windows_tools
cp ..\build\MNN.lib MNN_Windows_lib\Static_Library #cp ..\build\MNN.lib MNN_Windows_lib\Static_Library
popd #popd
#dynamic library without sep_build Remove-Item CMakeCache.txt -ErrorAction Ignore
rm .\CMakeCache.txt cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Debug -DMNN_WIN_RUNTIME_MT=ON -DMNN_OPENCL=ON ..
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF ..
ninja ninja
cd $WINDOWS_PACKAGE_PATH cp MNN.lib $WINDOWS_PACKAGE_PATH\Debug\MT
cp ..\build\MNN.lib MNN_Windows_lib\Dynamic_Library cp MNN.dll $WINDOWS_PACKAGE_PATH\Debug\MT
cp ..\build\MNN.dll MNN_Windows_lib\Dynamic_Library cp MNN.pdb $WINDOWS_PACKAGE_PATH\Debug\MT
cp ..\build\MNN.pdb MNN_Windows_lib\Dynamic_Library
# Compress MNN_Windows_lib and MNN_Windows_tools Remove-Item CMakeCache.txt -ErrorAction Ignore
Compress-Archive -Path MNN_Windows_lib -DestinationPath MNN_Windows_lib.zip -Update -CompressionLevel Optimal cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Debug -DMNN_WIN_RUNTIME_MT=OFF -DMNN_OPENCL=ON ..
Compress-Archive -Path MNN_Windows_tools -DestinationPath MNN_Windows_tools.zip -Update -CompressionLevel Optimal ninja
cp MNN.lib $WINDOWS_PACKAGE_PATH\Debug\MD
cp MNN.dll $WINDOWS_PACKAGE_PATH\Debug\MD
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Debug\MD
Remove-Item CMakeCache.txt -ErrorAction Ignore
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Release -DMNN_WIN_RUNTIME_MT=ON -DMNN_OPENCL=ON ..
ninja
cp MNN.lib $WINDOWS_PACKAGE_PATH\Release\MT
cp MNN.dll $WINDOWS_PACKAGE_PATH\Release\MT
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Release\MT
Remove-Item CMakeCache.txt -ErrorAction Ignore
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Release -DMNN_WIN_RUNTIME_MT=OFF -DMNN_OPENCL=ON ..
ninja
cp MNN.lib $WINDOWS_PACKAGE_PATH\Release\MD
cp MNN.dll $WINDOWS_PACKAGE_PATH\Release\MD
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Release\MD
popd

View File

@ -8,15 +8,14 @@ set_target_properties(
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN.so ${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN.so
) )
add_library( MNN_Arm82 SHARED IMPORTED GLOBAL)
set_target_properties(
MNN_Arm82
PROPERTIES IMPORTED_LOCATION
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_Arm82.so
)
add_library( MNN_CL SHARED IMPORTED GLOBAL ) add_library( MNN_CL SHARED IMPORTED GLOBAL )
set_target_properties( MNN_CL set_target_properties( MNN_CL
PROPERTIES IMPORTED_LOCATION PROPERTIES IMPORTED_LOCATION
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_CL.so ${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_CL.so
) )
add_library( MNN_Express SHARED IMPORTED GLOBAL )
set_target_properties( MNN_Express
PROPERTIES IMPORTED_LOCATION
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_Express.so
)

View File

@ -5,7 +5,6 @@ adb push ./libMNN_CL.so /data/local/tmp/MNN/libMNN_CL.so
adb push ./libMNN_Vulkan.so /data/local/tmp/MNN/libMNN_Vulkan.so adb push ./libMNN_Vulkan.so /data/local/tmp/MNN/libMNN_Vulkan.so
adb push ./libMNN_GL.so /data/local/tmp/MNN/libMNN_GL.so adb push ./libMNN_GL.so /data/local/tmp/MNN/libMNN_GL.so
adb push ./libMNN_Express.so /data/local/tmp/MNN/libMNN_Express.so adb push ./libMNN_Express.so /data/local/tmp/MNN/libMNN_Express.so
adb push ./libMNN_Arm82.so /data/local/tmp/MNN/libMNN_Arm82.so
adb push ./MNNV2Basic.out /data/local/tmp/MNN/MNNV2Basic.out adb push ./MNNV2Basic.out /data/local/tmp/MNN/MNNV2Basic.out
adb shell "cd /data/local/tmp/MNN && rm -r output" adb shell "cd /data/local/tmp/MNN && rm -r output"
adb shell "cd /data/local/tmp/MNN && mkdir output" adb shell "cd /data/local/tmp/MNN && mkdir output"
@ -18,3 +17,4 @@ adb push ./timeProfile.out /data/local/tmp/MNN/timeProfile.out
adb push ./train.out /data/local/tmp/MNN/train.out adb push ./train.out /data/local/tmp/MNN/train.out
adb push ./benchmark.out /data/local/tmp/MNN/benchmark.out adb push ./benchmark.out /data/local/tmp/MNN/benchmark.out
adb push ./benchmarkExprModels.out /data/local/tmp/MNN/benchmarkExprModels.out adb push ./benchmarkExprModels.out /data/local/tmp/MNN/benchmarkExprModels.out
adb push ./run_test.out /data/local/tmp/MNN/run_test.out

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,8 @@
<dict> <dict>
<key>CFBundleDevelopmentRegion</key> <key>CFBundleDevelopmentRegion</key>
<string>$(DEVELOPMENT_LANGUAGE)</string> <string>$(DEVELOPMENT_LANGUAGE)</string>
<key>CFBundleIdentifier</key>
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
<key>CFBundleInfoDictionaryVersion</key> <key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string> <string>6.0</string>
<key>CFBundleName</key> <key>CFBundleName</key>

View File

@ -1,57 +0,0 @@
#!bin/sh
echo "Register Op Begin"
function read_dir(){
str1=`grep -e $2 $1/*.$4|sed s/[[:space:]]//g`
array=(${str1//\;/ })
for var in ${array[@]}; do
`echo $var|awk -F $3 '{
a="___";
b="__();";
c="extern void ";
print(c""a""$3"__"$4""b) >> "extern";
print (a""$3"__"$4""b) >> "call"
}'`
done
}
start=$(date +%s)
SEP='[:(,)]'
FILE_EXTERN_CPP='cpp'
FILE_EXTERN_MM='mm'
SHELL_FOLDER=$(dirname $0)'/../../..'
# handle CPU
CPUFILE=$SHELL_FOLDER/source/backend/cpu/CPUOPRegister.cpp
echo "// This file is generated by Shell for ops register\nnamespace MNN {\n#ifdef MNN_CODEGEN_REGISTER" > $CPUFILE
echo "Start Register CPU"
CPU=$SHELL_FOLDER/source/backend/cpu
CPU_KEY='REGISTER_CPU_OP_CREATOR'
read_dir $CPU $CPU_KEY $SEP $FILE_EXTERN_CPP
cat extern >> $CPUFILE
rm extern
echo '\nvoid registerCPUOps() {' >> $CPUFILE
cat call >> $CPUFILE
echo '}\n#endif\n}' >> $CPUFILE
rm call
# handle Shape
echo "Start Register Shape"
SHAPEFILE=$SHELL_FOLDER/source/shape/ShapeRegister.cpp
SHAPE=$SHELL_FOLDER/source/shape
SHAPE_KEY="REGISTER_SHAPE"
echo "// This file is generated by Shell for ops register\nnamespace MNN {\n#ifdef MNN_CODEGEN_REGISTER" > $SHAPEFILE
read_dir $SHAPE $SHAPE_KEY $SEP $FILE_EXTERN_CPP
cat extern >> $SHAPEFILE
rm extern
echo '\nvoid registerShapeOps() {' >> $SHAPEFILE
cat call >> $SHAPEFILE
echo '}\n#endif\n}' >> $SHAPEFILE
rm call
echo "Register Op End"
dur=$(echo "$(date +%s) - $start" | bc)
printf "Execution time: %.6f seconds" $dur

View File

@ -8,10 +8,14 @@
#import "AppDelegate.h" #import "AppDelegate.h"
#import "MNNTestSuite.h" #import "MNNTestSuite.h"
#import <MNN/expr/Executor.hpp>
@implementation AppDelegate @implementation AppDelegate
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { - (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
MNN::BackendConfig config;
// If want to test metal, change MNN_FORWARD_CPU to MNN_FORWARD_METAL
MNN::Express::Executor::getGlobalExecutor()->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 1);
MNNTestSuite::runAll(); MNNTestSuite::runAll();
return YES; return YES;
} }

View File

@ -8,6 +8,9 @@ import cv2
def inference(): def inference():
""" inference mobilenet_v1 using a specific picture """ """ inference mobilenet_v1 using a specific picture """
interpreter = MNN.Interpreter("mobilenet_v1.mnn") interpreter = MNN.Interpreter("mobilenet_v1.mnn")
interpreter.setCacheFile('.tempcache')
config = {}
config['precision'] = 'low'
session = interpreter.createSession() session = interpreter.createSession()
input_tensor = interpreter.getSessionInput(session) input_tensor = interpreter.getSessionInput(session)
image = cv2.imread('ILSVRC2012_val_00049999.JPEG') image = cv2.imread('ILSVRC2012_val_00049999.JPEG')

View File

@ -96,8 +96,7 @@ def demo():
train_dataloader = MNN.data.DataLoader(train_dataset, batch_size = 64, shuffle = True) train_dataloader = MNN.data.DataLoader(train_dataset, batch_size = 64, shuffle = True)
test_dataloader = MNN.data.DataLoader(test_dataset, batch_size = 100, shuffle = False) test_dataloader = MNN.data.DataLoader(test_dataset, batch_size = 100, shuffle = False)
opt = MNN.optim.SGD(0.01, 0.9, 0.0005) opt = MNN.optim.SGD(model, 0.01, 0.9, 0.0005)
opt.append(model.parameters)
F.set_thread_number(4) F.set_thread_number(4)

View File

@ -125,8 +125,7 @@ def demo():
net = Net(feature_extractor, num_classes) net = Net(feature_extractor, num_classes)
opt = MNN.optim.SGD(1e-3, 0.9, 0.00004) opt = MNN.optim.SGD(net, 1e-3, 0.9, 0.00004)
opt.append(net.parameters)
for epoch in range(10): for epoch in range(10):
train_func(net, train_dataloader, opt, num_classes) train_func(net, train_dataloader, opt, num_classes)

View File

@ -0,0 +1,15 @@
import numpy as np
import MNN
nn = MNN.nn
F = MNN.expr
v0 = F.const([0.3,0.1, -0.3,0.4], [4])
v2 = F.const([0.3,0.1, -0.3,0.4], [4])
v1 = v0 * v0
outputDiff = F.const([0.05, 0.03, 0.02, 0.01], [4])
v0Grad = nn.grad(v1, [v0, v2], [outputDiff], "")
print(v0Grad)
print(v0Grad[0].read())
F.save(v0Grad, "temp.grad")

View File

@ -0,0 +1,36 @@
import numpy as np
import MNN
nn = MNN.nn
F = MNN.expr
class Net(nn.Module):
"""construct a lenet 5 model"""
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.conv(1, 20, [5, 5])
self.conv2 = nn.conv(20, 50, [5, 5])
self.fc1 = nn.linear(800, 500)
self.fc2 = nn.linear(500, 10)
self.step = F.const([10], [], F.NCHW, F.int)
self.lr = F.const([0.0004],[], F.NCHW, F.float)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool(x, [2, 2], [2, 2])
x = F.relu(self.conv2(x))
x = F.max_pool(x, [2, 2], [2, 2])
x = F.reshape(x, [0, -1])
x = F.relu(self.fc1(x))
x = self.fc2(x)
x = F.softmax(x, 1)
return x
model = Net()
F.save(model.parameters, 'mnist.snapshot')
model2 = Net()
model2.load_parameters(F.load_as_list('mnist.snapshot'))
print(model2.lr.read())
print(model2.step.read())

Some files were not shown because too many files have changed in this diff Show More