mirror of https://github.com/alibaba/MNN.git
Github release 1.1.0
This commit is contained in:
parent
939a80dba8
commit
d6795ad031
|
@ -330,7 +330,6 @@ project/android/.idea/caches/build_file_checksums.ser
|
||||||
# FIXME(haijing): Xcode pre-build stage breaks compilation of flatbuffers by setting envs that do cmake cross-compilation for iOS
|
# FIXME(haijing): Xcode pre-build stage breaks compilation of flatbuffers by setting envs that do cmake cross-compilation for iOS
|
||||||
# schema/current
|
# schema/current
|
||||||
schema/private
|
schema/private
|
||||||
schema/current
|
|
||||||
tools/converter/source/IR
|
tools/converter/source/IR
|
||||||
benchmark/benchmark.txt
|
benchmark/benchmark.txt
|
||||||
|
|
||||||
|
@ -345,18 +344,13 @@ pymnn/android/.idea/modules.xml
|
||||||
pymnn/android/.idea/runConfigurations.xml
|
pymnn/android/.idea/runConfigurations.xml
|
||||||
pymnn/android/.idea/vcs.xml
|
pymnn/android/.idea/vcs.xml
|
||||||
pymnn/android/.idea/caches/build_file_checksums.ser
|
pymnn/android/.idea/caches/build_file_checksums.ser
|
||||||
|
pymnn/src/pybind_private/
|
||||||
|
|
||||||
buildios
|
buildios
|
||||||
build*/
|
build*/
|
||||||
include/MNN/VCS.h
|
include/MNN/VCS.h
|
||||||
source/backend/opencl/execution/cl/codegen/opencl_program.cc
|
|
||||||
source/backend/opencl/execution/cl/opencl_program.cc
|
|
||||||
# FIXME(haijing): MTL issues.....
|
|
||||||
# source/backend/metal/MetalOPRegister.mm
|
|
||||||
source/backend/opengl/AllShader.cpp
|
source/backend/opengl/AllShader.cpp
|
||||||
include/MNN/backend/opengl/shaders/AllShader.h
|
include/MNN/backend/opengl/shaders/AllShader.h
|
||||||
source/backend/vulkan/compiler/AllShader.cpp
|
|
||||||
include/MNN/backend/vulkan/shaders/AllShader.h
|
|
||||||
.idea
|
.idea
|
||||||
project/ios/ios_64
|
project/ios/ios_64
|
||||||
project/ios/ios_32
|
project/ios/ios_32
|
||||||
|
|
124
CMakeLists.txt
124
CMakeLists.txt
|
@ -49,6 +49,7 @@ include(FindPythonInterp REQUIRED)
|
||||||
option(MNN_USE_SYSTEM_LIB "For opencl and vulkan, use system lib or use dlopen" OFF)
|
option(MNN_USE_SYSTEM_LIB "For opencl and vulkan, use system lib or use dlopen" OFF)
|
||||||
option(MNN_BUILD_HARD "Build -mfloat-abi=hard or not" OFF)
|
option(MNN_BUILD_HARD "Build -mfloat-abi=hard or not" OFF)
|
||||||
option(MNN_BUILD_SHARED_LIBS "MNN build shared or static lib" ON)
|
option(MNN_BUILD_SHARED_LIBS "MNN build shared or static lib" ON)
|
||||||
|
option(MNN_WIN_RUNTIME_MT "MNN use /MT on Windows dll" OFF)
|
||||||
option(MNN_FORBID_MULTI_THREAD "Disable Multi Thread" OFF)
|
option(MNN_FORBID_MULTI_THREAD "Disable Multi Thread" OFF)
|
||||||
option(MNN_OPENMP "Use OpenMP's thread pool implementation. Does not work on iOS or Mac OS" OFF)
|
option(MNN_OPENMP "Use OpenMP's thread pool implementation. Does not work on iOS or Mac OS" OFF)
|
||||||
option(MNN_USE_THREAD_POOL "Use MNN's own thread pool implementation" ON)
|
option(MNN_USE_THREAD_POOL "Use MNN's own thread pool implementation" ON)
|
||||||
|
@ -62,14 +63,14 @@ option(MNN_SUPPORT_TFLITE_QUAN "Enable MNN's tflite quantized op" ON)
|
||||||
option(MNN_DEBUG_MEMORY "MNN Debug Memory Access" OFF)
|
option(MNN_DEBUG_MEMORY "MNN Debug Memory Access" OFF)
|
||||||
option(MNN_DEBUG_TENSOR_SIZE "Enable Tensor Size" OFF)
|
option(MNN_DEBUG_TENSOR_SIZE "Enable Tensor Size" OFF)
|
||||||
option(MNN_GPU_TRACE "Enable MNN Gpu Debug" OFF)
|
option(MNN_GPU_TRACE "Enable MNN Gpu Debug" OFF)
|
||||||
option(MNN_OPENCL_LWS_TUNE "Enable MNN OpenCL Lws Tuning" ON)
|
|
||||||
option(MNN_PORTABLE_BUILD "Link the static version of third party libraries where possible to improve the portability of built executables" OFF)
|
option(MNN_PORTABLE_BUILD "Link the static version of third party libraries where possible to improve the portability of built executables" OFF)
|
||||||
option(MNN_SEP_BUILD "Build MNN Backends and expression seperately. Only works with MNN_BUILD_SHARED_LIBS=ON" ON)
|
option(MNN_SEP_BUILD "Build MNN Backends and expression seperately. Only works with MNN_BUILD_SHARED_LIBS=ON" ON)
|
||||||
option(NATIVE_LIBRARY_OUTPUT "Native Library Path" OFF)
|
option(NATIVE_LIBRARY_OUTPUT "Native Library Path" OFF)
|
||||||
option(NATIVE_INCLUDE_OUTPUT "Native Include Path" OFF)
|
option(NATIVE_INCLUDE_OUTPUT "Native Include Path" OFF)
|
||||||
option(MNN_AAPL_FMWK "Build MNN.framework instead of traditional .a/.dylib" OFF)
|
option(MNN_AAPL_FMWK "Build MNN.framework instead of traditional .a/.dylib" OFF)
|
||||||
option(MNN_FMA_ENABLE "x86 routine use fma extension" OFF)
|
|
||||||
option(MNN_WITH_PLUGIN "Build with plugin op support." OFF)
|
option(MNN_WITH_PLUGIN "Build with plugin op support." OFF)
|
||||||
|
option(MNN_BUILD_MINI "Build MNN-MINI that just supports fixed shape models." OFF)
|
||||||
|
option(MNN_USE_SSE "Use SSE optimization for x86 if possiable" ON)
|
||||||
|
|
||||||
IF(NOT MNN_BUILD_SHARED_LIBS)
|
IF(NOT MNN_BUILD_SHARED_LIBS)
|
||||||
message(WARNING "Close MNN_SEP_BUILD for static library")
|
message(WARNING "Close MNN_SEP_BUILD for static library")
|
||||||
|
@ -79,13 +80,14 @@ IF(APPLE AND MNN_AAPL_FMWK AND MNN_SEP_BUILD)
|
||||||
message(WARNING "MNN_SEP_BUILD AND MNN_AAPL_FMWK can't coexist. Turning off MNN_SEP_BUILD")
|
message(WARNING "MNN_SEP_BUILD AND MNN_AAPL_FMWK can't coexist. Turning off MNN_SEP_BUILD")
|
||||||
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
|
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
|
||||||
ENDIF()
|
ENDIF()
|
||||||
IF(MSVC OR WIN32)
|
IF(WIN32)
|
||||||
IF(MNN_SEP_BUILD)
|
IF(MNN_SEP_BUILD)
|
||||||
message(WARNING "MNN_SEP_BUILD IS TROUBLESOME ON Windows. Forcing OFF...")
|
message(WARNING "MNN_SEP_BUILD IS TROUBLESOME ON Windows. Forcing OFF...")
|
||||||
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
|
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
|
||||||
ENDIF()
|
ENDIF()
|
||||||
SET(MNN_USE_SYSTEM_LIB ON CACHE BOOL "<docstring>" FORCE)
|
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
|
||||||
|
|
||||||
|
IF(MSVC)
|
||||||
# generate optimized (release) exe and library with pdb debug file, https://stackoverflow.com/a/31264946
|
# generate optimized (release) exe and library with pdb debug file, https://stackoverflow.com/a/31264946
|
||||||
SET(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
|
SET(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
|
||||||
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
|
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
|
||||||
|
@ -94,12 +96,13 @@ IF(MSVC OR WIN32)
|
||||||
|
|
||||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
|
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
|
||||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
|
||||||
|
ENDIF()
|
||||||
ENDIF()
|
ENDIF()
|
||||||
|
|
||||||
include(${CMAKE_CURRENT_LIST_DIR}/cmake/macros.cmake)
|
include(${CMAKE_CURRENT_LIST_DIR}/cmake/macros.cmake)
|
||||||
|
|
||||||
IF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT MNN_BUILD_SHARED_LIBS AND NOT (MSVC OR WIN32))
|
IF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT MNN_BUILD_SHARED_LIBS AND NOT (MSVC OR WIN32))
|
||||||
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
|
||||||
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
|
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
|
||||||
IF(MNN_BUILD_CONVERTER)
|
IF(MNN_BUILD_CONVERTER)
|
||||||
SET(MNN_PORTABLE_BUILD ON CACHE BOOL "<docstring>" FORCE)
|
SET(MNN_PORTABLE_BUILD ON CACHE BOOL "<docstring>" FORCE)
|
||||||
|
@ -117,6 +120,9 @@ endif()
|
||||||
if(MNN_SUPPORT_TFLITE_QUAN)
|
if(MNN_SUPPORT_TFLITE_QUAN)
|
||||||
add_definitions(-DMNN_SUPPORT_TFLITE_QUAN)
|
add_definitions(-DMNN_SUPPORT_TFLITE_QUAN)
|
||||||
endif()
|
endif()
|
||||||
|
if(MNN_BUILD_MINI)
|
||||||
|
add_definitions(-DMNN_BUILD_MINI)
|
||||||
|
endif()
|
||||||
|
|
||||||
# debug options
|
# debug options
|
||||||
if(MNN_DEBUG_MEMORY)
|
if(MNN_DEBUG_MEMORY)
|
||||||
|
@ -128,9 +134,6 @@ endif()
|
||||||
if(MNN_GPU_TRACE)
|
if(MNN_GPU_TRACE)
|
||||||
add_definitions(-DMNN_GPU_FORCE_FINISH)
|
add_definitions(-DMNN_GPU_FORCE_FINISH)
|
||||||
endif()
|
endif()
|
||||||
if(MNN_OPENCL_LWS_TUNE)
|
|
||||||
add_definitions(-DMNN_OPENCL_LWS_TUNE)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# backend options
|
# backend options
|
||||||
option(MNN_METAL "Enable Metal" OFF)
|
option(MNN_METAL "Enable Metal" OFF)
|
||||||
|
@ -138,11 +141,8 @@ option(MNN_OPENCL "Enable OpenCL" OFF)
|
||||||
option(MNN_OPENGL "Enable OpenGL" OFF)
|
option(MNN_OPENGL "Enable OpenGL" OFF)
|
||||||
option(MNN_VULKAN "Enable Vulkan" OFF)
|
option(MNN_VULKAN "Enable Vulkan" OFF)
|
||||||
option(MNN_ARM82 "Enable ARM82" OFF)
|
option(MNN_ARM82 "Enable ARM82" OFF)
|
||||||
|
option(MNN_CUDA "Enable CUDA" OFF)
|
||||||
# codegen register ops
|
option(MNN_TENSORRT "Enable TensorRT" OFF)
|
||||||
if (MNN_METAL)
|
|
||||||
add_definitions(-DMNN_CODEGEN_REGISTER)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# target options
|
# target options
|
||||||
option(MNN_BUILD_BENCHMARK "Build benchmark or not" OFF)
|
option(MNN_BUILD_BENCHMARK "Build benchmark or not" OFF)
|
||||||
|
@ -165,11 +165,13 @@ message(STATUS "\tOpenCL: ${MNN_OPENCL}")
|
||||||
message(STATUS "\tOpenGL: ${MNN_OPENGL}")
|
message(STATUS "\tOpenGL: ${MNN_OPENGL}")
|
||||||
message(STATUS "\tVulkan: ${MNN_VULKAN}")
|
message(STATUS "\tVulkan: ${MNN_VULKAN}")
|
||||||
message(STATUS "\tARM82: ${MNN_ARM82}")
|
message(STATUS "\tARM82: ${MNN_ARM82}")
|
||||||
|
message(STATUS "\tTensorRT: ${MNN_TENSORRT}")
|
||||||
|
message(STATUS "\tCUDA: ${MNN_CUDA}")
|
||||||
message(STATUS "\tOpenMP: ${MNN_OPENMP}")
|
message(STATUS "\tOpenMP: ${MNN_OPENMP}")
|
||||||
message(STATUS "\tHidden: ${MNN_HIDDEN}")
|
message(STATUS "\tHidden: ${MNN_HIDDEN}")
|
||||||
message(STATUS "\tBuild Path: ${CMAKE_CURRENT_BINARY_DIR}")
|
message(STATUS "\tBuild Path: ${CMAKE_CURRENT_BINARY_DIR}")
|
||||||
|
|
||||||
if(WIN32)
|
if(MSVC)
|
||||||
if(${CMAKE_VERSION} VERSION_LESS "3.14.0")
|
if(${CMAKE_VERSION} VERSION_LESS "3.14.0")
|
||||||
message(FATAL_ERROR "MNN requires CMake 3.14+ to build on Windows!")
|
message(FATAL_ERROR "MNN requires CMake 3.14+ to build on Windows!")
|
||||||
endif()
|
endif()
|
||||||
|
@ -178,14 +180,14 @@ if(WIN32)
|
||||||
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
|
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
|
||||||
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
|
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
|
||||||
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
|
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
|
||||||
if (MNN_BUILD_SHARED_LIBS)
|
if (MNN_WIN_RUNTIME_MT)
|
||||||
if(${flag_var} MATCHES "/MT")
|
|
||||||
string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
|
|
||||||
endif()
|
|
||||||
else ()
|
|
||||||
if(${flag_var} MATCHES "/MD")
|
if(${flag_var} MATCHES "/MD")
|
||||||
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
|
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
|
||||||
endif()
|
endif()
|
||||||
|
else ()
|
||||||
|
if(${flag_var} MATCHES "/MT")
|
||||||
|
string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
|
||||||
|
endif()
|
||||||
endif ()
|
endif ()
|
||||||
endforeach()
|
endforeach()
|
||||||
elseif(CMAKE_SYSTEM_NAME MATCHES "^Android" OR CMAKE_SYSTEM_NAME MATCHES "^Linux")
|
elseif(CMAKE_SYSTEM_NAME MATCHES "^Android" OR CMAKE_SYSTEM_NAME MATCHES "^Linux")
|
||||||
|
@ -270,6 +272,8 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "^Linux")
|
||||||
endif()
|
endif()
|
||||||
include_directories(${CMAKE_CURRENT_LIST_DIR}/include/
|
include_directories(${CMAKE_CURRENT_LIST_DIR}/include/
|
||||||
${CMAKE_CURRENT_LIST_DIR}/source/
|
${CMAKE_CURRENT_LIST_DIR}/source/
|
||||||
|
${CMAKE_CURRENT_LIST_DIR}/express/
|
||||||
|
${CMAKE_CURRENT_LIST_DIR}/tools/
|
||||||
${CMAKE_CURRENT_LIST_DIR}/schema/current/
|
${CMAKE_CURRENT_LIST_DIR}/schema/current/
|
||||||
${CMAKE_CURRENT_LIST_DIR}/3rd_party/
|
${CMAKE_CURRENT_LIST_DIR}/3rd_party/
|
||||||
${CMAKE_CURRENT_LIST_DIR}/3rd_party/flatbuffers/include
|
${CMAKE_CURRENT_LIST_DIR}/3rd_party/flatbuffers/include
|
||||||
|
@ -293,13 +297,13 @@ FILE(GLOB MNN_CV_SRC ${CMAKE_CURRENT_LIST_DIR}/source/cv/*)
|
||||||
add_library(MNNCV OBJECT ${MNN_CV_SRC})
|
add_library(MNNCV OBJECT ${MNN_CV_SRC})
|
||||||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCV>)
|
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCV>)
|
||||||
list(APPEND MNN_TARGETS MNNCV)
|
list(APPEND MNN_TARGETS MNNCV)
|
||||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(X86_64)|(x64)|(X64)|(amd64)|(AMD64)|(i686)")
|
if (MNN_USE_SSE)
|
||||||
if(WIN32 OR MSVC)
|
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(X86_64)|(x64)|(X64)|(amd64)|(AMD64)|(i686)")
|
||||||
target_compile_options(MNNCV PRIVATE /arch:AVX)
|
if (NOT MSVC)
|
||||||
else()
|
|
||||||
target_compile_options(MNNCV PRIVATE -msse3)
|
target_compile_options(MNNCV PRIVATE -msse3)
|
||||||
target_compile_options(MNNCV PRIVATE -mavx)
|
target_compile_options(MNNCV PRIVATE -mavx)
|
||||||
endif()
|
endif()
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Math
|
# Math
|
||||||
|
@ -308,11 +312,19 @@ add_library(MNNMath OBJECT ${MNN_Math_SRC})
|
||||||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMath>)
|
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMath>)
|
||||||
list(APPEND MNN_TARGETS MNNMath)
|
list(APPEND MNN_TARGETS MNNMath)
|
||||||
|
|
||||||
# Shape
|
# Transform
|
||||||
FILE(GLOB MNN_Shape_SRC ${CMAKE_CURRENT_LIST_DIR}/source/shape/*)
|
FILE(GLOB MNN_Transform_SRC ${CMAKE_CURRENT_LIST_DIR}/source/shape/* ${CMAKE_CURRENT_LIST_DIR}/source/geometry/*)
|
||||||
add_library(MNNShape OBJECT ${MNN_Shape_SRC})
|
add_library(MNNTransform OBJECT ${MNN_Transform_SRC})
|
||||||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNShape>)
|
IF (NOT MNN_BUILD_MINI)
|
||||||
list(APPEND MNN_TARGETS MNNShape)
|
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNTransform>)
|
||||||
|
ENDIF()
|
||||||
|
list(APPEND MNN_TARGETS MNNTransform)
|
||||||
|
|
||||||
|
# Utils
|
||||||
|
FILE(GLOB MNN_Utils_SRC ${CMAKE_CURRENT_LIST_DIR}/source/utils/*)
|
||||||
|
add_library(MNNUtils OBJECT ${MNN_Utils_SRC})
|
||||||
|
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNUtils>)
|
||||||
|
list(APPEND MNN_TARGETS MNNUtils)
|
||||||
|
|
||||||
# Compute
|
# Compute
|
||||||
FILE(GLOB MNN_Compute_SRC ${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/compute/*)
|
FILE(GLOB MNN_Compute_SRC ${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/compute/*)
|
||||||
|
@ -327,7 +339,9 @@ list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCPU>)
|
||||||
list(APPEND MNN_TARGETS MNNCPU)
|
list(APPEND MNN_TARGETS MNNCPU)
|
||||||
|
|
||||||
# X86_64 AVX/SSE
|
# X86_64 AVX/SSE
|
||||||
|
if (MNN_USE_SSE)
|
||||||
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/x86_x64/CMakeLists.txt)
|
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/x86_x64/CMakeLists.txt)
|
||||||
|
endif()
|
||||||
|
|
||||||
# AArch32/64 Assemblies
|
# AArch32/64 Assemblies
|
||||||
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/arm/CMakeLists.txt)
|
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/arm/CMakeLists.txt)
|
||||||
|
@ -377,7 +391,7 @@ if (NOT APPLE)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
|
||||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
|
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
|
||||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
|
||||||
if (WIN32)
|
if (MSVC)
|
||||||
set(OpenMP_C_FLAGS "/openmp ${OpenMP_C_FLAGS}")
|
set(OpenMP_C_FLAGS "/openmp ${OpenMP_C_FLAGS}")
|
||||||
set(OpenMP_CXX_FLAGS "/openmp ${OpenMP_CXX_FLAGS}")
|
set(OpenMP_CXX_FLAGS "/openmp ${OpenMP_CXX_FLAGS}")
|
||||||
endif()
|
endif()
|
||||||
|
@ -387,20 +401,22 @@ endif()
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS_ORIGIN ${CMAKE_CXX_FLAGS})
|
set(CMAKE_CXX_FLAGS_ORIGIN ${CMAKE_CXX_FLAGS})
|
||||||
set(CMAKE_C_FLAGS_ORIGIN ${CMAKE_C_FLAGS})
|
set(CMAKE_C_FLAGS_ORIGIN ${CMAKE_C_FLAGS})
|
||||||
if ((NOT (MSVC OR WIN32)) AND MNN_HIDDEN)
|
if ((NOT MSVC) AND MNN_HIDDEN)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden -fvisibility=hidden")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden -fvisibility=hidden")
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
|
||||||
if (NOT APPLE)
|
# Omit frame pointer may cause difficult debug
|
||||||
|
if ((NOT APPLE) AND (NOT WIN32))
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
if (NOT (MSVC OR WIN32))
|
if (NOT MSVC)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions ")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions ")
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Metal
|
# Metal
|
||||||
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/metal/CMakeLists.txt)
|
set(MNN_DEPS "")
|
||||||
|
set(MNN_EXTRA_DEPENDS "")
|
||||||
list(APPEND MNN_DEPS MNN)
|
list(APPEND MNN_DEPS MNN)
|
||||||
|
|
||||||
# Plugin
|
# Plugin
|
||||||
|
@ -409,6 +425,14 @@ if(MNN_WITH_PLUGIN)
|
||||||
include(${CMAKE_CURRENT_LIST_DIR}/source/plugin/CMakeLists.txt)
|
include(${CMAKE_CURRENT_LIST_DIR}/source/plugin/CMakeLists.txt)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Metal
|
||||||
|
if(MNN_METAL AND APPLE)
|
||||||
|
add_definitions(-DMNN_METAL_ENABLED=1)
|
||||||
|
include(${CMAKE_CURRENT_LIST_DIR}/source/backend/metal/CMakeLists.txt)
|
||||||
|
list(APPEND MNN_TARGETS MNNMetal)
|
||||||
|
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMetal>)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Vulkan
|
# Vulkan
|
||||||
IF(MNN_VULKAN)
|
IF(MNN_VULKAN)
|
||||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/vulkan/)
|
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/vulkan/)
|
||||||
|
@ -446,22 +470,34 @@ IF(MNN_OPENGL)
|
||||||
ENDIF()
|
ENDIF()
|
||||||
ENDIF()
|
ENDIF()
|
||||||
|
|
||||||
|
# CUDA
|
||||||
|
IF(MNN_CUDA)
|
||||||
|
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/cuda/)
|
||||||
|
list(APPEND MNN_TARGETS MNN_CUDA)
|
||||||
|
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_CUDA>)
|
||||||
|
list(APPEND MNN_EXTRA_DEPENDS ${MNN_CUDA_LIBS})
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR IOS_ARCH STREQUAL "arm64")
|
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR IOS_ARCH STREQUAL "arm64")
|
||||||
# ARM82 Assemblies
|
# ARM82 Assemblies
|
||||||
IF(MNN_ARM82)
|
IF(MNN_ARM82)
|
||||||
add_definitions(-DENABLE_ARMV82)
|
add_definitions(-DENABLE_ARMV82)
|
||||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/arm82/)
|
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/arm82/)
|
||||||
IF(MNN_SEP_BUILD)
|
|
||||||
list(APPEND MNN_DEPS MNN_Arm82)
|
|
||||||
ELSE()
|
|
||||||
list(APPEND MNN_TARGETS MNN_Arm82)
|
list(APPEND MNN_TARGETS MNN_Arm82)
|
||||||
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_Arm82>)
|
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_Arm82>)
|
||||||
ENDIF()
|
ENDIF()
|
||||||
ENDIF()
|
|
||||||
ENDIF()
|
ENDIF()
|
||||||
# Express
|
# Express
|
||||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/express/)
|
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/express/)
|
||||||
|
|
||||||
|
# TensorRT
|
||||||
|
IF(MNN_TENSORRT)
|
||||||
|
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/tensorrt/)
|
||||||
|
list(APPEND MNN_TARGETS MNN_TRT)
|
||||||
|
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_TRT>)
|
||||||
|
list(APPEND MNN_EXTRA_DEPENDS ${MNN_TRT_LIBS})
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
IF(MNN_SEP_BUILD)
|
IF(MNN_SEP_BUILD)
|
||||||
add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS})
|
add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS})
|
||||||
target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS})
|
target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS})
|
||||||
|
@ -471,7 +507,7 @@ ELSE()
|
||||||
list(APPEND MNN_TARGETS MNNExpress)
|
list(APPEND MNN_TARGETS MNNExpress)
|
||||||
IF(MNN_BUILD_SHARED_LIBS)
|
IF(MNN_BUILD_SHARED_LIBS)
|
||||||
add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS})
|
add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS})
|
||||||
if (MSVC OR WIN32)
|
if (WIN32)
|
||||||
foreach(TARGET ${MNN_TARGETS})
|
foreach(TARGET ${MNN_TARGETS})
|
||||||
target_compile_definitions(${TARGET} PRIVATE "-DBUILDING_MNN_DLL")
|
target_compile_definitions(${TARGET} PRIVATE "-DBUILDING_MNN_DLL")
|
||||||
target_compile_definitions(${TARGET} INTERFACE "-DUSING_MNN_DLL")
|
target_compile_definitions(${TARGET} INTERFACE "-DUSING_MNN_DLL")
|
||||||
|
@ -484,7 +520,7 @@ ELSE()
|
||||||
ENDIF()
|
ENDIF()
|
||||||
target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS})
|
target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS})
|
||||||
ENDIF()
|
ENDIF()
|
||||||
if (MSVC OR WIN32)
|
if (MSVC)
|
||||||
target_link_options(MNN PRIVATE "/IGNORE:4049,4217")
|
target_link_options(MNN PRIVATE "/IGNORE:4049,4217")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -504,9 +540,11 @@ if(APPLE)
|
||||||
target_link_libraries(MNN PUBLIC ${FOUNDATION})
|
target_link_libraries(MNN PUBLIC ${FOUNDATION})
|
||||||
find_library(METAL Metal REQUIRED)
|
find_library(METAL Metal REQUIRED)
|
||||||
target_link_libraries(MNN PUBLIC ${METAL})
|
target_link_libraries(MNN PUBLIC ${METAL})
|
||||||
|
find_library(GRAPHIC CoreGraphics)
|
||||||
|
target_link_libraries(MNN PUBLIC ${GRAPHIC})
|
||||||
ENDIF()
|
ENDIF()
|
||||||
endif()
|
endif()
|
||||||
add_dependencies(MNN MNNCore MNNCV MNNShape MNNMath MNNCompute MNNCPU GenVCSHDR)
|
add_dependencies(MNN MNNCore MNNCV MNNTransform MNNMath MNNCompute MNNCPU GenVCSHDR)
|
||||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/tools/converter)
|
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/tools/converter)
|
||||||
|
|
||||||
if(CMAKE_SYSTEM_NAME MATCHES "^Linux")
|
if(CMAKE_SYSTEM_NAME MATCHES "^Linux")
|
||||||
|
@ -532,12 +570,6 @@ if (NOT MNN_BUILD_SHARED_LIBS)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
list(APPEND MNN_TARGETS MNN)
|
list(APPEND MNN_TARGETS MNN)
|
||||||
FOREACH(TARGET ${MNN_TARGETS})
|
|
||||||
IF((NOT MSVC) AND (NOT WIN32))
|
|
||||||
else()
|
|
||||||
target_compile_definitions(${TARGET} PRIVATE _CRT_SECURE_NO_WARNINGS)
|
|
||||||
endif()
|
|
||||||
ENDFOREACH()
|
|
||||||
list(REMOVE_ITEM MNN_TARGETS MNN)
|
list(REMOVE_ITEM MNN_TARGETS MNN)
|
||||||
IF(MNN_BUILD_DEMO)
|
IF(MNN_BUILD_DEMO)
|
||||||
include(${CMAKE_CURRENT_LIST_DIR}/demo/exec/CMakeLists.txt)
|
include(${CMAKE_CURRENT_LIST_DIR}/demo/exec/CMakeLists.txt)
|
||||||
|
|
|
@ -46,6 +46,7 @@ Pod::Spec.new do |s|
|
||||||
'schema/current/*.{h}',\
|
'schema/current/*.{h}',\
|
||||||
'3rd_party/flatbuffers/include/flatbuffers/*.{h}',\
|
'3rd_party/flatbuffers/include/flatbuffers/*.{h}',\
|
||||||
'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\
|
'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\
|
||||||
|
'source/geometry/**/*.{h,c,m,mm,cc,hpp,cpp}',\
|
||||||
'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\
|
'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\
|
||||||
'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
|
'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
|
||||||
'source/shape/*.{h,c,m,mm,cc,hpp,cpp}',\
|
'source/shape/*.{h,c,m,mm,cc,hpp,cpp}',\
|
||||||
|
|
|
@ -66,7 +66,7 @@ Interpreter由Engine和Backends构成。前者负责模型的加载、计算图
|
||||||
|
|
||||||
三群:
|
三群:
|
||||||
|
|
||||||
<img src="doc/DingTalkQR3.png" height="256"/>
|
<img src="doc/DingTalkQR23.png" height="256"/>
|
||||||
|
|
||||||
## License
|
## License
|
||||||
Apache 2.0
|
Apache 2.0
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
//
|
||||||
|
// CPUBatchMatMul.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2019/03/25.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "backend/cpu/CPUBatchMatMul.hpp"
|
||||||
|
#include "backend/cpu/CPUBackend.hpp"
|
||||||
|
#include "math/Matrix.hpp"
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
|
||||||
|
CPUBatchMatMul::CPUBatchMatMul(Backend* backend, bool adjX, bool adjY) : Execution(backend) {
|
||||||
|
mMatMul.reset(new CPUMatMul(backend, adjX, adjY, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
ErrorCode CPUBatchMatMul::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
|
||||||
|
auto input0 = inputs[0];
|
||||||
|
auto input1 = inputs[1];
|
||||||
|
auto output = outputs[0];
|
||||||
|
// Fill output by zero if one of inputs is empty.
|
||||||
|
if (input0->elementSize() == 0 || input1->elementSize() == 0) {
|
||||||
|
return NO_ERROR;
|
||||||
|
}
|
||||||
|
auto dimensions = input0->dimensions();
|
||||||
|
mMatrixA.reset(Tensor::createDevice<float>({input0->length(input0->dimensions()-2), input0->length(input0->dimensions()-1)}));
|
||||||
|
mMatrixB.reset(Tensor::createDevice<float>({input1->length(input1->dimensions()-2), input1->length(input0->dimensions()-1)}));
|
||||||
|
mMatrixC.reset(Tensor::createDevice<float>({output->length(output->dimensions()-2), output->length(output->dimensions()-1)}));
|
||||||
|
mTempInputs = {mMatrixA.get(), mMatrixB.get()};
|
||||||
|
mTempOutputs = {mMatrixC.get()};
|
||||||
|
auto res = backend()->onAcquireBuffer(mMatrixA.get(), Backend::DYNAMIC);
|
||||||
|
res = res && backend()->onAcquireBuffer(mMatrixB.get(), Backend::DYNAMIC);
|
||||||
|
res = res && backend()->onAcquireBuffer(mMatrixC.get(), Backend::DYNAMIC);
|
||||||
|
|
||||||
|
if (!res) {
|
||||||
|
return OUT_OF_MEMORY;
|
||||||
|
}
|
||||||
|
int batch = 1;
|
||||||
|
for (int i = 0; i < dimensions - 2; ++i) {
|
||||||
|
batch *= input0->length(i);
|
||||||
|
}
|
||||||
|
mBatch = batch;
|
||||||
|
auto code = mMatMul->onResize(mTempInputs, mTempOutputs);
|
||||||
|
backend()->onReleaseBuffer(mMatrixA.get(), Backend::DYNAMIC);
|
||||||
|
backend()->onReleaseBuffer(mMatrixB.get(), Backend::DYNAMIC);
|
||||||
|
backend()->onReleaseBuffer(mMatrixC.get(), Backend::DYNAMIC);
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
ErrorCode CPUBatchMatMul::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
|
||||||
|
auto input0 = inputs[0];
|
||||||
|
auto input1 = inputs[1];
|
||||||
|
auto output = outputs[0];
|
||||||
|
// Fill output by zero if one of inputs is empty.
|
||||||
|
if (input0->elementSize() == 0 || input1->elementSize() == 0) {
|
||||||
|
::memset(output->host<float>(), 0, output->size());
|
||||||
|
return NO_ERROR;
|
||||||
|
}
|
||||||
|
const int dimensions = input0->dimensions();
|
||||||
|
MNN_ASSERT(dimensions >= 3);
|
||||||
|
const int input0Stride = input0->length(dimensions - 1) * input0->length(dimensions - 2);
|
||||||
|
const int input1Stride = input1->length(dimensions - 1) * input1->length(dimensions - 2);
|
||||||
|
const int outputStride = output->length(dimensions - 1) * output->length(dimensions - 2);
|
||||||
|
const auto input0Ptr = input0->host<float>();
|
||||||
|
const auto input1Ptr = input1->host<float>();
|
||||||
|
float* const outputPtr = output->host<float>();
|
||||||
|
|
||||||
|
for (int i = 0; i < mBatch; ++i) {
|
||||||
|
::memcpy(mMatrixA->host<float>(), input0Ptr + i * input0Stride, input0Stride * sizeof(float));
|
||||||
|
::memcpy(mMatrixB->host<float>(), input1Ptr + i * input1Stride, input1Stride * sizeof(float));
|
||||||
|
mMatMul->onExecute(mTempInputs, mTempOutputs);
|
||||||
|
::memcpy(outputPtr + i * outputStride, mMatrixC->host<float>(), outputStride * sizeof(float));
|
||||||
|
}
|
||||||
|
return NO_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
class CPUBatchMatMulCreator : public CPUBackend::Creator {
|
||||||
|
public:
|
||||||
|
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
||||||
|
const MNN::Op* op, Backend* backend) const override {
|
||||||
|
return new CPUBatchMatMul(backend, op->main_as_BatchMatMulParam()->adjX(), op->main_as_BatchMatMulParam()->adjY());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
REGISTER_CPU_OP_CREATOR(CPUBatchMatMulCreator, OpType_BatchMatMul);
|
||||||
|
|
||||||
|
} // namespace MNN
|
|
@ -0,0 +1,35 @@
|
||||||
|
//
|
||||||
|
// CPUBatchMatMul.hpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2019/03/25.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef CPUBatchMatMul_hpp
|
||||||
|
#define CPUBatchMatMul_hpp
|
||||||
|
|
||||||
|
#include "backend/cpu/CPUMatMul.hpp"
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
|
||||||
|
class CPUBatchMatMul : public Execution {
|
||||||
|
public:
|
||||||
|
CPUBatchMatMul(Backend *backend, bool adjX, bool adjY);
|
||||||
|
virtual ~CPUBatchMatMul() = default;
|
||||||
|
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
|
||||||
|
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
int mBatch;
|
||||||
|
std::shared_ptr<Execution> mMatMul;
|
||||||
|
std::vector<Tensor*> mTempInputs;
|
||||||
|
std::vector<Tensor*> mTempOutputs;
|
||||||
|
std::shared_ptr<Tensor> mMatrixA;
|
||||||
|
std::shared_ptr<Tensor> mMatrixB;
|
||||||
|
std::shared_ptr<Tensor> mMatrixC;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace MNN
|
||||||
|
|
||||||
|
#endif /* CPUBatchMatMul_hpp */
|
|
@ -18,7 +18,6 @@
|
||||||
#include "backend/cpu/compute/ConvOpt.h"
|
#include "backend/cpu/compute/ConvOpt.h"
|
||||||
#include "backend/cpu/CPUBackend.hpp"
|
#include "backend/cpu/CPUBackend.hpp"
|
||||||
#include "backend/cpu/compute/ConvolutionFloatFactory.h"
|
#include "backend/cpu/compute/ConvolutionFloatFactory.h"
|
||||||
#include "math/Vec4.hpp"
|
|
||||||
|
|
||||||
#define MIN_CON_PLANESIZE 256
|
#define MIN_CON_PLANESIZE 256
|
||||||
|
|
|
@ -10,7 +10,9 @@
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include "backend/cpu/CPUBackend.hpp"
|
#include "backend/cpu/CPUBackend.hpp"
|
||||||
#include "core/Macro.h"
|
#include "core/Macro.h"
|
||||||
#include "math/Vec4.hpp"
|
#include "math/Vec.hpp"
|
||||||
|
|
||||||
|
using Vec4 = MNN::Math::Vec<float, 4>;
|
||||||
|
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
|
|
||||||
|
@ -39,12 +41,12 @@ ErrorCode CPUCosineSimilarity::onExecute(const std::vector<Tensor*>& inputs, con
|
||||||
const auto x1ChannelPtr = x1DataBatchPtr + j;
|
const auto x1ChannelPtr = x1DataBatchPtr + j;
|
||||||
const auto x2ChannelPtr = x2DataBatchPtr + j;
|
const auto x2ChannelPtr = x2DataBatchPtr + j;
|
||||||
|
|
||||||
Math::Vec4 innerProduct(.0f);
|
Vec4 innerProduct(.0f);
|
||||||
Math::Vec4 x1Square(.0f);
|
Vec4 x1Square(.0f);
|
||||||
Math::Vec4 x2Square(.0f);
|
Vec4 x2Square(.0f);
|
||||||
for (int c = 0; c < channel; ++c) {
|
for (int c = 0; c < channel; ++c) {
|
||||||
Math::Vec4 x1Data = Math::Vec4::load(x1ChannelPtr + c * channleStride);
|
Vec4 x1Data = Vec4::load(x1ChannelPtr + c * channleStride);
|
||||||
Math::Vec4 x2Data = Math::Vec4::load(x2ChannelPtr + c * channleStride);
|
Vec4 x2Data = Vec4::load(x2ChannelPtr + c * channleStride);
|
||||||
auto x1Xx2 = x1Data * x2Data;
|
auto x1Xx2 = x1Data * x2Data;
|
||||||
innerProduct = innerProduct + x1Xx2;
|
innerProduct = innerProduct + x1Xx2;
|
||||||
x1Square = x1Square + x1Data * x1Data;
|
x1Square = x1Square + x1Data * x1Data;
|
|
@ -12,8 +12,8 @@
|
||||||
#include "core/Concurrency.h"
|
#include "core/Concurrency.h"
|
||||||
#include "core/Macro.h"
|
#include "core/Macro.h"
|
||||||
|
|
||||||
#include "math/Vec4.hpp"
|
#include "math/Vec.hpp"
|
||||||
using MNN::Math::Vec4;
|
using Vec4 = MNN::Math::Vec<float, 4>;
|
||||||
|
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
|
|
|
@ -21,7 +21,7 @@ public:
|
||||||
auto parameter = op->main_as_InnerProduct();
|
auto parameter = op->main_as_InnerProduct();
|
||||||
int outputCount = parameter->outputCount();
|
int outputCount = parameter->outputCount();
|
||||||
int srcCount = parameter->weight()->size() / outputCount;
|
int srcCount = parameter->weight()->size() / outputCount;
|
||||||
mWeight.reset(CPUConvolution::reorderWeightSize(srcCount, outputCount, 1, 4));
|
mWeight.reset(CPUConvolution::reorderWeightSize(srcCount, outputCount, 1, 4, 4));
|
||||||
if (mWeight.get() == nullptr) {
|
if (mWeight.get() == nullptr) {
|
||||||
mValid = false;
|
mValid = false;
|
||||||
return;
|
return;
|
|
@ -180,6 +180,14 @@ ErrorCode CPULSTM::onResize(const std::vector<Tensor *> &inputs, const std::vect
|
||||||
::memcpy(mBiasC->host<float>(), mLSTM->bias()->float32s()->data(), mBiasC->size());
|
::memcpy(mBiasC->host<float>(), mLSTM->bias()->float32s()->data(), mBiasC->size());
|
||||||
::memcpy(mWeightH->host<float>(), mLSTM->weightH()->float32s()->data(), mWeightH->size());
|
::memcpy(mWeightH->host<float>(), mLSTM->weightH()->float32s()->data(), mWeightH->size());
|
||||||
}
|
}
|
||||||
|
if (mGateHaveBias) {
|
||||||
|
// Merge bias
|
||||||
|
auto biasPtr = mBiasC->host<float>();
|
||||||
|
auto biasPtr2 = biasPtr + 4 * numUnits;
|
||||||
|
for (int i=0; i<4*numUnits; ++i) {
|
||||||
|
biasPtr[i] = biasPtr[i] + biasPtr2[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inputs.size() > 1) {
|
if (inputs.size() > 1) {
|
||||||
|
@ -260,16 +268,8 @@ ErrorCode CPULSTM::onExecute(const std::vector<Tensor *> &inputs, const std::vec
|
||||||
MNN_CONCURRENCY_END();
|
MNN_CONCURRENCY_END();
|
||||||
|
|
||||||
float* biasStartPtr = mBiasC->host<float>();
|
float* biasStartPtr = mBiasC->host<float>();
|
||||||
if(!mGateHaveBias){
|
|
||||||
biasStartPtr = nullptr;
|
|
||||||
}
|
|
||||||
mRetriveOutputFunction(mGates.host<float>(), biasStartPtr);
|
mRetriveOutputFunction(mGates.host<float>(), biasStartPtr);
|
||||||
|
|
||||||
float* recurrenceBiasStartPtr = mBiasC->host<float>();
|
|
||||||
if(mGateHaveBias){
|
|
||||||
recurrenceBiasStartPtr += 4 * numUnits;
|
|
||||||
}
|
|
||||||
|
|
||||||
// tranform
|
// tranform
|
||||||
const float *contData = nullptr;
|
const float *contData = nullptr;
|
||||||
if (inputs.size() > 1) {
|
if (inputs.size() > 1) {
|
||||||
|
@ -330,14 +330,11 @@ ErrorCode CPULSTM::onExecute(const std::vector<Tensor *> &inputs, const std::vec
|
||||||
}
|
}
|
||||||
|
|
||||||
// add bias
|
// add bias
|
||||||
auto biasPtr = recurrenceBiasStartPtr + oc;
|
//MNN_PRINT("%f, %f, %f, %f\n", I, O, F, G);
|
||||||
I = sigmoid(*biasPtr + I);
|
I = sigmoid(I);
|
||||||
biasPtr = biasPtr + numUnits;
|
F = sigmoid(F);
|
||||||
F = sigmoid(*biasPtr + F);
|
O = sigmoid(O);
|
||||||
biasPtr = biasPtr + numUnits;
|
G = tanhf(G);
|
||||||
O = sigmoid(*biasPtr + O);
|
|
||||||
biasPtr = biasPtr + numUnits;
|
|
||||||
G = tanhf(*biasPtr + G);
|
|
||||||
|
|
||||||
auto newCell = F * cellData[oc] + I * G;
|
auto newCell = F * cellData[oc] + I * G;
|
||||||
cellData[oc] = newCell;
|
cellData[oc] = newCell;
|
|
@ -0,0 +1,311 @@
|
||||||
|
//
|
||||||
|
// CPUSoftmax.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2018/07/16.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "backend/cpu/CPUSoftmax.hpp"
|
||||||
|
#include <math.h>
|
||||||
|
#include "backend/cpu/CPUBackend.hpp"
|
||||||
|
#include "backend/cpu/compute/CommonOptFunction.h"
|
||||||
|
#include "core/Concurrency.h"
|
||||||
|
#include "core/Macro.h"
|
||||||
|
#include "core/TensorUtils.hpp"
|
||||||
|
#ifdef MNN_USE_NEON
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
|
||||||
|
int CPUSoftmax::_softmax1(const float *srcData, float *dstData, int outside, int channel, int threadNum) {
|
||||||
|
// Max and sub
|
||||||
|
MNN_CONCURRENCY_BEGIN(tId, threadNum)
|
||||||
|
{
|
||||||
|
const float *srcY = srcData + tId * channel;
|
||||||
|
float *dstY = dstData + tId * channel;
|
||||||
|
for (int y = (int)tId; y < outside; y += threadNum, srcY += channel * threadNum, dstY += channel * threadNum) {
|
||||||
|
float maxValue = srcY[0];
|
||||||
|
{
|
||||||
|
int c = 1;
|
||||||
|
#ifdef MNN_USE_NEON
|
||||||
|
#if !(defined(__ARM_FEATURE_FMA) && defined(__aarch64__))
|
||||||
|
#define vmaxvq_f32(v) \
|
||||||
|
({ \
|
||||||
|
float __m = v[0]; \
|
||||||
|
for (int i = 1; i < 4; i++) { \
|
||||||
|
if (v[i] > __m) \
|
||||||
|
__m = v[i]; \
|
||||||
|
} \
|
||||||
|
__m; \
|
||||||
|
})
|
||||||
|
#endif
|
||||||
|
if (c + 3 < channel) {
|
||||||
|
float32x4_t maxx4 = vld1q_f32(srcY + c);
|
||||||
|
c += 4;
|
||||||
|
for (; c + 3 < channel; c += 4) {
|
||||||
|
maxx4 = vmaxq_f32(maxx4, vld1q_f32(srcY + c));
|
||||||
|
}
|
||||||
|
float value = vmaxvq_f32(maxx4);
|
||||||
|
if (value > maxValue)
|
||||||
|
maxValue = value;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (; c < channel; ++c) {
|
||||||
|
float value = srcY[c];
|
||||||
|
if (value > maxValue)
|
||||||
|
maxValue = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int c = 0; c < channel; ++c) {
|
||||||
|
dstY[c] = -srcY[c] + maxValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MNN_CONCURRENCY_END();
|
||||||
|
|
||||||
|
//Exp
|
||||||
|
auto schedule = ((CPUBackend*)backend())->multiThreadDivide(channel * outside);
|
||||||
|
int sizeDivide = schedule.first;
|
||||||
|
int scheduleNumber = schedule.second;
|
||||||
|
|
||||||
|
MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
|
||||||
|
int start = sizeDivide * (int)tId;
|
||||||
|
int realSize = sizeDivide;
|
||||||
|
if (tId == scheduleNumber -1 ) {
|
||||||
|
realSize = channel * outside - start;
|
||||||
|
}
|
||||||
|
if (realSize > 0) {
|
||||||
|
MNNExp(dstData + start, dstData + start, realSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MNN_CONCURRENCY_END();
|
||||||
|
|
||||||
|
// Sum and div
|
||||||
|
MNN_CONCURRENCY_BEGIN(tId, threadNum);
|
||||||
|
{
|
||||||
|
float *dstY = dstData + tId * channel;
|
||||||
|
for (int y = (int)tId; y < outside; y += threadNum, dstY += channel * threadNum) {
|
||||||
|
// sum
|
||||||
|
float sumValue = 0;
|
||||||
|
|
||||||
|
for (int c = 0; c < channel; ++c) {
|
||||||
|
sumValue += dstY[c];
|
||||||
|
}
|
||||||
|
|
||||||
|
// div
|
||||||
|
{
|
||||||
|
int c = 0;
|
||||||
|
#ifdef MNN_USE_NEON
|
||||||
|
float div = 1.f / sumValue;
|
||||||
|
for (; c + 3 < channel; c += 4) {
|
||||||
|
vst1q_f32(dstY + c, vmulq_n_f32(vld1q_f32(dstY + c), div));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (; c < channel; ++c) {
|
||||||
|
dstY[c] /= sumValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MNN_CONCURRENCY_END();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int CPUSoftmax::_softmaxCommon(const float *srcData, float *dstData, int inside, int outside, int channel,
|
||||||
|
float *maxValue, float *sumValue, int threadNum) {
|
||||||
|
if (inside == 1)
|
||||||
|
return _softmax1(srcData, dstData, outside, channel, threadNum);
|
||||||
|
|
||||||
|
const int stepY = inside * channel;
|
||||||
|
MNN_CONCURRENCY_BEGIN(tId, threadNum);
|
||||||
|
{
|
||||||
|
const float *srcY = srcData + tId * stepY;
|
||||||
|
float *dstY = dstData + tId * stepY;
|
||||||
|
float *maxValueSub = maxValue + tId * inside;
|
||||||
|
|
||||||
|
for (int y = (int)tId; y < outside; y += threadNum, srcY += stepY * threadNum, dstY += stepY * threadNum) {
|
||||||
|
memcpy(maxValueSub, srcY, sizeof(float) * inside);
|
||||||
|
const float *src = srcY + inside;
|
||||||
|
for (int c = 1; c < channel; ++c, src += inside) {
|
||||||
|
for (int x = 0; x < inside; ++x) {
|
||||||
|
if (src[x] > maxValueSub[x])
|
||||||
|
maxValueSub[x] = src[x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
src = srcY;
|
||||||
|
float *dst = dstY;
|
||||||
|
for (int c = 0; c < channel; ++c, src += inside, dst += inside) {
|
||||||
|
for (int x = 0; x < inside; ++x) {
|
||||||
|
dst[x] = -src[x] + maxValueSub[x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MNN_CONCURRENCY_END();
|
||||||
|
|
||||||
|
auto totalSize = channel * inside * outside;
|
||||||
|
//Exp
|
||||||
|
auto schedule = ((CPUBackend*)backend())->multiThreadDivide(totalSize);
|
||||||
|
int sizeDivide = schedule.first;
|
||||||
|
int scheduleNumber = schedule.second;
|
||||||
|
|
||||||
|
MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
|
||||||
|
int start = sizeDivide * (int)tId;
|
||||||
|
int realSize = sizeDivide;
|
||||||
|
if (tId == scheduleNumber -1 ) {
|
||||||
|
realSize = totalSize - start;
|
||||||
|
}
|
||||||
|
if (realSize > 0) {
|
||||||
|
MNNExp(dstData + start, dstData + start, realSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MNN_CONCURRENCY_END();
|
||||||
|
|
||||||
|
MNN_CONCURRENCY_BEGIN(tId, threadNum);
|
||||||
|
{
|
||||||
|
const float *srcY = srcData + tId * stepY;
|
||||||
|
float *dstY = dstData + tId * stepY;
|
||||||
|
float *sumValueSub = sumValue + tId * inside;
|
||||||
|
for (int y = (int)tId; y < outside; y += threadNum, srcY += stepY * threadNum, dstY += stepY * threadNum) {
|
||||||
|
memset(sumValueSub, 0, sizeof(float) * inside);
|
||||||
|
float *dst = dstY;
|
||||||
|
for (int c = 0; c < channel; ++c, dst += inside) {
|
||||||
|
for (int x = 0; x < inside; ++x) {
|
||||||
|
sumValueSub[x] += dst[x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dst = dstY;
|
||||||
|
for (int c = 0; c < channel; ++c, dst += inside) {
|
||||||
|
for (int x = 0; x < inside; ++x) {
|
||||||
|
dst[x] /= sumValueSub[x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MNN_CONCURRENCY_END();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ErrorCode CPUSoftmax::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
||||||
|
auto input = inputs[0];
|
||||||
|
const int dimensions = input->buffer().dimensions;
|
||||||
|
|
||||||
|
const auto layout = TensorUtils::getDescribe(input)->dimensionFormat;
|
||||||
|
mNeedUnpackC4 = layout == MNN_DATA_FORMAT_NC4HW4;
|
||||||
|
|
||||||
|
if (mNeedUnpackC4) {
|
||||||
|
int totalSize = 1;
|
||||||
|
for (int i = 1; i < dimensions; ++i) {
|
||||||
|
totalSize *= input->length(i);
|
||||||
|
}
|
||||||
|
mStorage.buffer().dim[0].extent = input->length(0);
|
||||||
|
mStorage.buffer().dim[1].extent = totalSize;
|
||||||
|
TensorUtils::getDescribe(&mStorage)->dimensionFormat = MNN_DATA_FORMAT_NHWC;
|
||||||
|
mStorage.buffer().dimensions = 2;
|
||||||
|
mStorage.buffer().type = input->getType();
|
||||||
|
backend()->onAcquireBuffer(&mStorage, Backend::DYNAMIC);
|
||||||
|
}
|
||||||
|
|
||||||
|
int inside = 1;
|
||||||
|
int dims = input->buffer().dimensions;
|
||||||
|
for (int i = mAxis + 1; i < dims; ++i) {
|
||||||
|
inside *= input->length(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inside != 1) { // not run _softmax1, we need maxValue Tensor and sumValue Tensor.
|
||||||
|
int threadNum = ((CPUBackend *)backend())->threadNumber();
|
||||||
|
|
||||||
|
mMaxValue.buffer().dim[0].extent = inside * threadNum;
|
||||||
|
mMaxValue.buffer().dimensions = 1;
|
||||||
|
mMaxValue.setType(DataType_DT_FLOAT);
|
||||||
|
backend()->onAcquireBuffer(&mMaxValue, Backend::DYNAMIC);
|
||||||
|
|
||||||
|
mSumValue.buffer().dim[0].extent = inside * threadNum;
|
||||||
|
mSumValue.buffer().dimensions = 1;
|
||||||
|
mSumValue.setType(DataType_DT_FLOAT);
|
||||||
|
backend()->onAcquireBuffer(&mSumValue, Backend::DYNAMIC);
|
||||||
|
|
||||||
|
backend()->onReleaseBuffer(&mMaxValue, Backend::DYNAMIC);
|
||||||
|
backend()->onReleaseBuffer(&mSumValue, Backend::DYNAMIC);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mNeedUnpackC4) {
|
||||||
|
backend()->onReleaseBuffer(&mStorage, Backend::DYNAMIC);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NO_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
ErrorCode CPUSoftmax::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
||||||
|
MNN_ASSERT(1 == inputs.size());
|
||||||
|
MNN_ASSERT(1 == outputs.size());
|
||||||
|
auto inputTensor = inputs[0];
|
||||||
|
auto outputTensor = outputs[0];
|
||||||
|
const auto inputDataPtr = inputTensor->host<float>();
|
||||||
|
auto outputDataPtr = outputTensor->host<float>();
|
||||||
|
const int batch = inputTensor->batch();
|
||||||
|
const auto dims = inputTensor->buffer().dimensions;
|
||||||
|
|
||||||
|
float *tempData = nullptr;
|
||||||
|
if (mNeedUnpackC4) {
|
||||||
|
tempData = mStorage.host<float>();
|
||||||
|
}
|
||||||
|
|
||||||
|
int areaInput = 1;
|
||||||
|
for (int i = 2; i < dims; ++i) {
|
||||||
|
areaInput *= inputTensor->length(i);
|
||||||
|
}
|
||||||
|
int inside = 1;
|
||||||
|
int outside = 1;
|
||||||
|
int channel = 1;
|
||||||
|
for (int i = 0; i < mAxis; ++i) {
|
||||||
|
outside *= inputTensor->length(i);
|
||||||
|
}
|
||||||
|
channel = inputTensor->length(mAxis);
|
||||||
|
for (int i = mAxis + 1; i < dims; ++i) {
|
||||||
|
inside *= inputTensor->length(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int threadNum = ((CPUBackend *)backend())->threadNumber();
|
||||||
|
if (!mNeedUnpackC4) {
|
||||||
|
_softmaxCommon(inputDataPtr, outputDataPtr, inside, outside, channel, mMaxValue.host<float>(),
|
||||||
|
mSumValue.host<float>(), threadNum);
|
||||||
|
return NO_ERROR;
|
||||||
|
}
|
||||||
|
auto outputSize = outputTensor->elementSize();
|
||||||
|
int batchSize = outputSize / batch;
|
||||||
|
for (int batchIndex = 0; batchIndex < batch; ++batchIndex) {
|
||||||
|
auto inputData = inputDataPtr + batchIndex * batchSize;
|
||||||
|
MNNUnpackC4(outputDataPtr + batchIndex * mStorage.length(1), inputData, areaInput, inputTensor->channel());
|
||||||
|
}
|
||||||
|
_softmaxCommon(outputDataPtr, tempData, inside, outside, channel, mMaxValue.host<float>(), mSumValue.host<float>(), threadNum);
|
||||||
|
for (int batchIndex = 0; batchIndex < batch; ++batchIndex) {
|
||||||
|
auto outputData = outputDataPtr + batchIndex * batchSize;
|
||||||
|
auto tempPtr = tempData + batchIndex * mStorage.length(1);
|
||||||
|
MNNPackC4(outputData, tempPtr, areaInput, outputTensor->channel());
|
||||||
|
}
|
||||||
|
return NO_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
CPUSoftmax::CPUSoftmax(Backend *b, int axis) : MNN::Execution(b), mAxis(axis), mStorage(2), mNeedUnpackC4(false) {
|
||||||
|
// nothing to do
|
||||||
|
}
|
||||||
|
|
||||||
|
class CPUSoftmaxCreator : public CPUBackend::Creator {
|
||||||
|
public:
|
||||||
|
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
|
||||||
|
const MNN::Op *op, Backend *backend) const override {
|
||||||
|
auto axis = op->main_as_Axis()->axis();
|
||||||
|
if (axis < 0) {
|
||||||
|
axis = inputs[0]->dimensions() + axis;
|
||||||
|
}
|
||||||
|
return new CPUSoftmax(backend, axis);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
REGISTER_CPU_OP_CREATOR(CPUSoftmaxCreator, OpType_Softmax);
|
||||||
|
|
||||||
|
} // namespace MNN
|
|
@ -0,0 +1,35 @@
|
||||||
|
//
|
||||||
|
// CPUSoftmax.hpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2018/07/16.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef CPUSoftmax_hpp
|
||||||
|
#define CPUSoftmax_hpp
|
||||||
|
|
||||||
|
#include "core/Execution.hpp"
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
class CPUSoftmax : public Execution {
|
||||||
|
public:
|
||||||
|
CPUSoftmax(Backend *b, int axis);
|
||||||
|
virtual ~CPUSoftmax() = default;
|
||||||
|
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
|
||||||
|
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
int _softmaxCommon(const float *srcData, float *dstData, int inside, int outside, int channel, float *maxValue,
|
||||||
|
float *sumValue, int threadNum);
|
||||||
|
int _softmax1(const float *srcData, float *dstData, int outside, int channel, int threadNum);
|
||||||
|
|
||||||
|
int mAxis;
|
||||||
|
Tensor mStorage;
|
||||||
|
Tensor mMaxValue;
|
||||||
|
Tensor mSumValue;
|
||||||
|
bool mNeedUnpackC4;
|
||||||
|
};
|
||||||
|
} // namespace MNN
|
||||||
|
|
||||||
|
#endif /* CPUSoftmax_hpp */
|
|
@ -13,10 +13,8 @@
|
||||||
#include "backend/cpu/compute/ConvOpt.h"
|
#include "backend/cpu/compute/ConvOpt.h"
|
||||||
#include "core/Macro.h"
|
#include "core/Macro.h"
|
||||||
#include "core/TensorUtils.hpp"
|
#include "core/TensorUtils.hpp"
|
||||||
#include "math/Vec4.hpp"
|
#include "math/Vec.hpp"
|
||||||
using namespace MNN::Math;
|
using Vec4 = MNN::Math::Vec<float, 4>;
|
||||||
|
|
||||||
typedef Vec4 float4;
|
|
||||||
|
|
||||||
#define SOURCE_BLOCK 64
|
#define SOURCE_BLOCK 64
|
||||||
#define WEIGHT_BLOCK 256
|
#define WEIGHT_BLOCK 256
|
|
@ -0,0 +1,128 @@
|
||||||
|
//
|
||||||
|
// GeometryCropAndResize.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2020/08/5.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "geometry/GeometryComputer.hpp"
|
||||||
|
#include "core/OpCommonUtils.hpp"
|
||||||
|
#include "geometry/GeometryComputerUtils.hpp"
|
||||||
|
#include "ConvertUtils.hpp"
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
class GeometryCropAndResize : public GeometryComputer {
|
||||||
|
public:
|
||||||
|
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& res) const override {
|
||||||
|
MNN_ASSERT(4 == inputs.size());
|
||||||
|
MNN_ASSERT(1 == outputs.size());
|
||||||
|
auto img = inputs[0];
|
||||||
|
auto boxes = inputs[1];
|
||||||
|
auto box_ind = inputs[2];
|
||||||
|
auto crop_size = inputs[3];
|
||||||
|
auto output = outputs[0];
|
||||||
|
auto extrapolation = op->main_as_CropAndResize()->extrapolationValue();
|
||||||
|
auto method = op->main_as_CropAndResize()->method();
|
||||||
|
// resizeType of Interp : 1-NEAREST, 2-BILINEAR
|
||||||
|
const int resizeType = method == CropAndResizeMethod_BILINEAR ? 2 : 1;
|
||||||
|
|
||||||
|
int batch = img->length(0), ih = img->length(1), iw = img->length(2),
|
||||||
|
depth = img->length(3), boxNum = boxes->length(0);
|
||||||
|
const int cropHeight = crop_size->host<uint32_t>()[0],
|
||||||
|
cropWidth = crop_size->host<uint32_t>()[1];
|
||||||
|
|
||||||
|
auto des = TensorUtils::getDescribe(output);
|
||||||
|
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||||
|
des->dimensionFormat = MNN_DATA_FORMAT_NHWC;
|
||||||
|
des->regions.clear();
|
||||||
|
des->regions.reserve(boxNum);
|
||||||
|
for (int i = 0; i < boxNum; i++) {
|
||||||
|
const float y1 = boxes->host<float>()[i*4];
|
||||||
|
const float x1 = boxes->host<float>()[i*4+1];
|
||||||
|
const float y2 = boxes->host<float>()[i*4+2];
|
||||||
|
const float x2 = boxes->host<float>()[i*4+3];
|
||||||
|
const int ind = box_ind->host<uint32_t>()[i];
|
||||||
|
const float ch = (y2 - y1) * (ih - 1), cw = (x2 - x1) * (iw - 1);
|
||||||
|
const float yScale = ch / static_cast<float>(cropHeight - 1);
|
||||||
|
const float xScale = cw / static_cast<float>(cropWidth - 1);
|
||||||
|
const float yOffset = y1 * (ih - 1), xOffset = x1 * (iw - 1);
|
||||||
|
// select croped image from images, convert it's format from NHWC to NC4HW4
|
||||||
|
std::shared_ptr<Tensor> cropValue(new Tensor);
|
||||||
|
{
|
||||||
|
cropValue->buffer().type = halide_type_of<float>();
|
||||||
|
cropValue->buffer().dimensions = 4;
|
||||||
|
cropValue->setLength(0, 1);
|
||||||
|
cropValue->setLength(1, depth);
|
||||||
|
cropValue->setLength(2, ih);
|
||||||
|
cropValue->setLength(3, iw);
|
||||||
|
auto des = TensorUtils::getDescribe(cropValue.get());
|
||||||
|
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||||
|
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
|
||||||
|
des->regions.clear();
|
||||||
|
Tensor::InsideDescribe::Region region;
|
||||||
|
region.origin = img;
|
||||||
|
region.size[1] = depth;
|
||||||
|
region.size[2] = ih * iw;
|
||||||
|
region.src.offset = ind * ih * iw * depth;
|
||||||
|
region.dst.offset = 0;
|
||||||
|
region.src.stride[1] = 1;
|
||||||
|
region.src.stride[2] = depth;
|
||||||
|
region.dst.stride[1] = ih * iw;
|
||||||
|
region.dst.stride[2] = 1;
|
||||||
|
des->regions.emplace_back(std::move(region));
|
||||||
|
res.extras.emplace_back(cropValue);
|
||||||
|
}
|
||||||
|
// using Interp Op deal with crop and resize for selected image
|
||||||
|
std::shared_ptr<Tensor> resizeValue;
|
||||||
|
{
|
||||||
|
resizeValue.reset(Tensor::createDevice<float>({1, depth, cropHeight, cropWidth}));
|
||||||
|
auto des = TensorUtils::getDescribe(resizeValue.get());
|
||||||
|
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
|
||||||
|
std::unique_ptr<OpT> interp(new OpT);
|
||||||
|
interp->type = OpType_Interp;
|
||||||
|
interp->main.type = OpParameter_Interp;
|
||||||
|
interp->main.value = new InterpT;
|
||||||
|
interp->main.AsInterp()->widthScale = xScale;
|
||||||
|
interp->main.AsInterp()->heightScale = yScale;
|
||||||
|
interp->main.AsInterp()->widthOffset = xOffset;
|
||||||
|
interp->main.AsInterp()->heightOffset = yOffset;
|
||||||
|
interp->main.AsInterp()->alignCorners = false;
|
||||||
|
interp->main.AsInterp()->resizeType = resizeType;
|
||||||
|
auto cmd = GeometryComputerUtils::makeCommand(interp.get(), {cropValue.get()}, {resizeValue.get()});
|
||||||
|
res.extras.emplace_back(resizeValue);
|
||||||
|
res.command.emplace_back(cmd);
|
||||||
|
}
|
||||||
|
// convert resize image's format from NC4HW4 to NHWC, add it to output's batch
|
||||||
|
{
|
||||||
|
Tensor::InsideDescribe::Region region;
|
||||||
|
region.origin = resizeValue.get();
|
||||||
|
region.size[1] = cropHeight * cropWidth;
|
||||||
|
region.size[2] = depth;
|
||||||
|
region.src.offset = 0;
|
||||||
|
region.dst.offset = i * cropHeight * cropWidth * depth;
|
||||||
|
region.src.stride[1] = 1;
|
||||||
|
region.src.stride[2] = cropHeight * cropWidth;
|
||||||
|
region.dst.stride[1] = depth;
|
||||||
|
region.dst.stride[2] = 1;
|
||||||
|
des->regions.emplace_back(std::move(region));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
|
||||||
|
const std::vector<Tensor*>& outputs) const override {
|
||||||
|
//return {false};
|
||||||
|
return {true};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static void _create() {
|
||||||
|
std::shared_ptr<GeometryComputer> comp(new GeometryCropAndResize);
|
||||||
|
// GeometryComputer::registerGeometryComputer(comp, {OpType_CropAndResize});
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_GEOMETRY(GeometryCropAndResize, _create);
|
||||||
|
|
||||||
|
} // namespace MNN
|
|
@ -0,0 +1,304 @@
|
||||||
|
//
|
||||||
|
// GeometryGather.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2020/06/09.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "geometry/GeometryComputer.hpp"
|
||||||
|
#include "core/OpCommonUtils.hpp"
|
||||||
|
namespace MNN {
|
||||||
|
|
||||||
|
class GeometryGather : public DefaultGeometryComputer {
|
||||||
|
public:
|
||||||
|
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
|
||||||
|
const std::vector<Tensor*>& outputs) const override {
|
||||||
|
MNN_ASSERT(inputs.size() == 2);
|
||||||
|
MNN_ASSERT(1 == outputs.size());
|
||||||
|
auto embedding = inputs[0];
|
||||||
|
auto indices = inputs[1];
|
||||||
|
auto output = outputs[0];
|
||||||
|
|
||||||
|
const int firstDimStride = embedding->buffer().dim[0].stride;
|
||||||
|
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && firstDimStride != 0) {
|
||||||
|
std::vector<bool> res(outputs.size(), true);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
return std::vector<bool>(outputs.size(), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
||||||
|
Context& context, CommandBuffer& res) const override {
|
||||||
|
MNN_ASSERT(2 == inputs.size());
|
||||||
|
MNN_ASSERT(1 == outputs.size());
|
||||||
|
auto embedding = inputs[0];
|
||||||
|
auto indices = inputs[1];
|
||||||
|
auto output = outputs[0];
|
||||||
|
|
||||||
|
const int firstDimStride = embedding->buffer().dim[0].stride;
|
||||||
|
if (TensorUtils::getDescribe(indices)->usage != MNN::Tensor::InsideDescribe::CONSTANT || firstDimStride == 0) {
|
||||||
|
Command cmd;
|
||||||
|
cmd.op = op;
|
||||||
|
cmd.inputs = std::move(inputs);
|
||||||
|
cmd.outputs = std::move(outputs);
|
||||||
|
res.command.emplace_back(std::move(cmd));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto bytes = embedding->buffer().type.bytes();
|
||||||
|
|
||||||
|
const size_t indicesCount = indices->elementSize();
|
||||||
|
const auto limit = embedding->length(0);
|
||||||
|
const int* indicesData = indices->host<int32_t>();
|
||||||
|
|
||||||
|
auto outputDes = TensorUtils::getDescribe(output);
|
||||||
|
outputDes->regions.clear();
|
||||||
|
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||||
|
for (int i = 0; i < indicesCount; i++) {
|
||||||
|
if (indicesData[i] < 0 || indicesData[i] > limit) {
|
||||||
|
MNN_PRINT("Gather indice error\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Tensor::InsideDescribe::Region slice;
|
||||||
|
slice.origin = embedding;
|
||||||
|
slice.size[0] = 1;
|
||||||
|
slice.size[1] = 1;
|
||||||
|
slice.size[2] = firstDimStride;
|
||||||
|
slice.src.offset = firstDimStride * indicesData[i];
|
||||||
|
slice.dst.offset = i * firstDimStride;
|
||||||
|
slice.src.stride[0] = 1;
|
||||||
|
slice.src.stride[1] = 1;
|
||||||
|
slice.src.stride[2] = 1;
|
||||||
|
slice.dst.stride[0] = 1;
|
||||||
|
slice.dst.stride[1] = 1;
|
||||||
|
slice.dst.stride[2] = 1;
|
||||||
|
outputDes->regions.emplace_back(std::move(slice));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class GeometryGatherND : public DefaultGeometryComputer {
|
||||||
|
public:
|
||||||
|
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
|
||||||
|
const std::vector<Tensor*>& outputs) const override {
|
||||||
|
MNN_ASSERT(inputs.size() == 2);
|
||||||
|
MNN_ASSERT(1 == outputs.size());
|
||||||
|
auto params = inputs[0];
|
||||||
|
auto indices = inputs[1];
|
||||||
|
auto output = outputs[0];
|
||||||
|
|
||||||
|
int mSliceN = 1;
|
||||||
|
int mSliceSize = 1;
|
||||||
|
for (int i = 0; i < indices->dimensions() - 1; ++i) {
|
||||||
|
mSliceN *= indices->length(i);
|
||||||
|
}
|
||||||
|
auto indiceNd = indices->length(indices->dimensions() - 1);
|
||||||
|
std::vector<int> mDimsToCount;
|
||||||
|
mDimsToCount.resize(indiceNd);
|
||||||
|
for (int i = indiceNd; i < params->dimensions(); ++i) {
|
||||||
|
mSliceSize *= params->length(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && mSliceSize != 0) {
|
||||||
|
std::vector<bool> res(outputs.size(), true);
|
||||||
|
return res;
|
||||||
|
} else {
|
||||||
|
std::vector<bool> res(outputs.size(), false);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
||||||
|
Context& context, CommandBuffer& res) const override {
|
||||||
|
MNN_ASSERT(2 == inputs.size());
|
||||||
|
MNN_ASSERT(1 == outputs.size());
|
||||||
|
auto params = inputs[0];
|
||||||
|
auto indice = inputs[1];
|
||||||
|
auto output = outputs[0];
|
||||||
|
|
||||||
|
int mSliceN = 1;
|
||||||
|
int mSliceSize = 1;
|
||||||
|
for (int i = 0; i < indice->dimensions() - 1; ++i) {
|
||||||
|
mSliceN *= indice->length(i);
|
||||||
|
}
|
||||||
|
auto indiceNd = indice->length(indice->dimensions() - 1);
|
||||||
|
std::vector<int> mDimsToCount;
|
||||||
|
mDimsToCount.resize(indiceNd);
|
||||||
|
for (int i = indiceNd; i < params->dimensions(); ++i) {
|
||||||
|
mSliceSize *= params->length(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TensorUtils::getDescribe(indice)->usage != MNN::Tensor::InsideDescribe::CONSTANT || mSliceSize == 0) {
|
||||||
|
Command cmd;
|
||||||
|
cmd.op = op;
|
||||||
|
cmd.inputs = std::move(inputs);
|
||||||
|
cmd.outputs = std::move(outputs);
|
||||||
|
res.command.emplace_back(std::move(cmd));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto paramSize = params->elementSize();
|
||||||
|
for (int i = 0; i < indiceNd; ++i) {
|
||||||
|
mDimsToCount[i] = paramSize / params->length(i);
|
||||||
|
paramSize = mDimsToCount[i];
|
||||||
|
}
|
||||||
|
mDimsToCount.resize(indiceNd);
|
||||||
|
auto indiceData = indice->host<int32_t>();
|
||||||
|
|
||||||
|
auto outputDes = TensorUtils::getDescribe(output);
|
||||||
|
outputDes->regions.clear();
|
||||||
|
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||||
|
for (int i = 0; i < mSliceN; i++) {
|
||||||
|
int fromPos = 0;
|
||||||
|
for (int j = 0; j < indiceNd; ++j) {
|
||||||
|
fromPos += mDimsToCount[j] * indiceData[i * indiceNd + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
Tensor::InsideDescribe::Region slice;
|
||||||
|
slice.origin = params;
|
||||||
|
slice.size[0] = 1;
|
||||||
|
slice.size[1] = 1;
|
||||||
|
slice.size[2] = mSliceSize;
|
||||||
|
slice.src.offset = fromPos;
|
||||||
|
slice.dst.offset = i * mSliceSize;
|
||||||
|
slice.src.stride[0] = 1;
|
||||||
|
slice.src.stride[1] = 1;
|
||||||
|
slice.src.stride[2] = 1;
|
||||||
|
slice.dst.stride[0] = 1;
|
||||||
|
slice.dst.stride[1] = 1;
|
||||||
|
slice.dst.stride[2] = 1;
|
||||||
|
outputDes->regions.emplace_back(std::move(slice));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class GeometryGatherV2 : public DefaultGeometryComputer {
|
||||||
|
public:
|
||||||
|
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
|
||||||
|
const std::vector<Tensor*>& outputs) const override {
|
||||||
|
MNN_ASSERT(inputs.size() >= 2);
|
||||||
|
MNN_ASSERT(1 == outputs.size());
|
||||||
|
auto params = inputs[0];
|
||||||
|
auto indices = inputs[1];
|
||||||
|
auto output = outputs[0];
|
||||||
|
|
||||||
|
int axis = 0;
|
||||||
|
if (inputs.size() == 3) {
|
||||||
|
const Tensor* axisTensor = inputs[2];
|
||||||
|
axis = axisTensor->host<int32_t>()[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
MNN_ASSERT(axis > -params->buffer().dimensions && axis < params->buffer().dimensions);
|
||||||
|
|
||||||
|
if (axis < 0) {
|
||||||
|
axis = params->buffer().dimensions + axis;
|
||||||
|
}
|
||||||
|
const int gatherDimSize = params->buffer().dim[axis].extent;
|
||||||
|
const int N = indices->elementSize();
|
||||||
|
MNN_ASSERT(gatherDimSize <= std::numeric_limits<int32_t>::max());
|
||||||
|
|
||||||
|
int inside = 1;
|
||||||
|
for (int i = axis + 1; i < params->dimensions(); ++i) {
|
||||||
|
inside *= params->length(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TensorUtils::getDescribe(indices)->usage == MNN::Tensor::InsideDescribe::CONSTANT && inside != 0) {
|
||||||
|
std::vector<bool> res(outputs.size(), true);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
return std::vector<bool>(outputs.size(), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
||||||
|
Context& context, CommandBuffer& res) const override {
|
||||||
|
MNN_ASSERT(inputs.size() >= 2);
|
||||||
|
MNN_ASSERT(1 == outputs.size());
|
||||||
|
auto params = inputs[0];
|
||||||
|
auto indices = inputs[1];
|
||||||
|
auto output = outputs[0];
|
||||||
|
|
||||||
|
int axis = 0;
|
||||||
|
if (inputs.size() == 3) {
|
||||||
|
const Tensor* axisTensor = inputs[2];
|
||||||
|
axis = axisTensor->host<int32_t>()[0];
|
||||||
|
}
|
||||||
|
MNN_ASSERT(axis > -params->buffer().dimensions && axis < params->buffer().dimensions);
|
||||||
|
|
||||||
|
if (axis < 0) {
|
||||||
|
axis = params->buffer().dimensions + axis;
|
||||||
|
}
|
||||||
|
const int gatherDimSize = params->buffer().dim[axis].extent;
|
||||||
|
const int N = indices->elementSize();
|
||||||
|
MNN_ASSERT(gatherDimSize <= std::numeric_limits<int32_t>::max());
|
||||||
|
|
||||||
|
int inside = 1;
|
||||||
|
int outside = 1;
|
||||||
|
for (int i = 0; i < axis; ++i) {
|
||||||
|
outside *= params->length(i);
|
||||||
|
}
|
||||||
|
for (int i = axis + 1; i < params->dimensions(); ++i) {
|
||||||
|
inside *= params->length(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TensorUtils::getDescribe(indices)->usage != MNN::Tensor::InsideDescribe::CONSTANT || inside == 0) {
|
||||||
|
Command cmd;
|
||||||
|
cmd.op = op;
|
||||||
|
cmd.inputs = std::move(inputs);
|
||||||
|
cmd.outputs = std::move(outputs);
|
||||||
|
res.command.emplace_back(std::move(cmd));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int limit = params->length(axis);
|
||||||
|
auto bytes = output->buffer().type.bytes();
|
||||||
|
const int insideStride = inside;
|
||||||
|
const int outputOutsideStride = inside * N;
|
||||||
|
const int inputOutsideStride = inside * inputs[0]->length(axis);
|
||||||
|
const int* indicesPtr = indices->host<int32_t>();
|
||||||
|
|
||||||
|
auto outputDes = TensorUtils::getDescribe(output);
|
||||||
|
outputDes->regions.clear();
|
||||||
|
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||||
|
for (int o = 0; o < outside; ++o) {
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
if (indicesPtr[i] < 0 || indicesPtr[i] > limit) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Tensor::InsideDescribe::Region slice;
|
||||||
|
slice.origin = params;
|
||||||
|
slice.size[0] = 1;
|
||||||
|
slice.size[1] = 1;
|
||||||
|
slice.size[2] = insideStride;
|
||||||
|
slice.src.offset = inputOutsideStride * o + insideStride * indicesPtr[i];
|
||||||
|
slice.dst.offset = outputOutsideStride * o + i * insideStride;
|
||||||
|
slice.src.stride[0] = 1;
|
||||||
|
slice.src.stride[1] = 1;
|
||||||
|
slice.src.stride[2] = 1;
|
||||||
|
slice.dst.stride[0] = 1;
|
||||||
|
slice.dst.stride[1] = 1;
|
||||||
|
slice.dst.stride[2] = 1;
|
||||||
|
outputDes->regions.emplace_back(std::move(slice));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static void _create() {
|
||||||
|
// std::shared_ptr<GeometryComputer> comp(new GeometryGather);
|
||||||
|
// GeometryComputer::registerGeometryComputer(comp, {OpType_Gather});
|
||||||
|
//
|
||||||
|
// std::shared_ptr<GeometryComputer> comp2(new GeometryGatherND);
|
||||||
|
// GeometryComputer::registerGeometryComputer(comp2, {OpType_GatherND});
|
||||||
|
//
|
||||||
|
// std::shared_ptr<GeometryComputer> comp3(new GeometryGatherV2);
|
||||||
|
// GeometryComputer::registerGeometryComputer(comp3, {OpType_GatherV2});
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_GEOMETRY(GeometryGather, _create);
|
||||||
|
|
||||||
|
} // namespace MNN
|
|
@ -0,0 +1,214 @@
|
||||||
|
//
|
||||||
|
// GeometrySoftmax.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2020/06/28.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "geometry/GeometryComputer.hpp"
|
||||||
|
#include "core/OpCommonUtils.hpp"
|
||||||
|
#include "geometry/GeometryComputerUtils.hpp"
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
class GeometrySoftmax : public GeometryComputer {
|
||||||
|
public:
|
||||||
|
virtual std::vector<bool> onGetOutputVirtual(const Op* op, const std::vector<Tensor*>& inputs,
|
||||||
|
const std::vector<Tensor*>& outputs) const override {
|
||||||
|
auto axis = op->main_as_Axis()->axis();
|
||||||
|
if (axis < 0) {
|
||||||
|
axis = inputs[0]->dimensions() + axis;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (axis == 1) {
|
||||||
|
return std::vector<bool>(outputs.size(), false);
|
||||||
|
}
|
||||||
|
return std::vector<bool>(outputs.size(), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs,
|
||||||
|
const std::vector<Tensor*>& outputs, Context& context, CommandBuffer& res) const override {
|
||||||
|
MNN_ASSERT(1 == inputs.size());
|
||||||
|
MNN_ASSERT(1 == outputs.size());
|
||||||
|
|
||||||
|
auto input = inputs[0];
|
||||||
|
auto output = outputs[0];
|
||||||
|
auto dims = input->buffer().dimensions;
|
||||||
|
|
||||||
|
auto axis = op->main_as_Axis()->axis();
|
||||||
|
if (axis < 0) {
|
||||||
|
axis = inputs[0]->dimensions() + axis;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (axis == 1) {
|
||||||
|
Command cmd;
|
||||||
|
cmd.op = op;
|
||||||
|
cmd.inputs = std::move(inputs);
|
||||||
|
cmd.outputs = std::move(outputs);
|
||||||
|
res.command.emplace_back(std::move(cmd));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int inside = 1;
|
||||||
|
int outside = 1;
|
||||||
|
int channel = 1;
|
||||||
|
for (int i = 0; i < axis; ++i) {
|
||||||
|
outside *= input->length(i);
|
||||||
|
}
|
||||||
|
channel = input->length(axis);
|
||||||
|
for (int i = axis + 1; i < dims; ++i) {
|
||||||
|
inside *= input->length(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
//input transform to NCHW format
|
||||||
|
std::shared_ptr<Tensor> tmpInput;
|
||||||
|
{
|
||||||
|
tmpInput.reset(Tensor::createDevice<float>({outside, channel, inside}));
|
||||||
|
auto outputDes = TensorUtils::getDescribe(tmpInput.get());
|
||||||
|
outputDes->regions.clear();
|
||||||
|
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||||
|
|
||||||
|
Tensor::InsideDescribe::Region desReg;
|
||||||
|
desReg.size[0] = outside;
|
||||||
|
desReg.size[1] = channel;
|
||||||
|
desReg.size[2] = inside;
|
||||||
|
desReg.dst.offset = 0;
|
||||||
|
desReg.dst.stride[0] = channel*inside;
|
||||||
|
desReg.dst.stride[1] = inside;
|
||||||
|
desReg.dst.stride[2] = 1;
|
||||||
|
desReg.src.offset = 0;
|
||||||
|
desReg.src.stride[0] = channel*inside;
|
||||||
|
desReg.src.stride[1] = inside;
|
||||||
|
desReg.src.stride[2] = 1;
|
||||||
|
desReg.origin = input;
|
||||||
|
outputDes->regions.emplace_back(std::move(desReg));
|
||||||
|
|
||||||
|
res.extras.emplace_back(tmpInput);
|
||||||
|
}
|
||||||
|
|
||||||
|
//reduction max, axis=1
|
||||||
|
std::shared_ptr<Tensor> maxValue;
|
||||||
|
{
|
||||||
|
maxValue.reset(Tensor::createDevice<float>({outside, 1, inside}));
|
||||||
|
res.extras.emplace_back(maxValue);
|
||||||
|
res.command.emplace_back(GeometryComputerUtils::makeReduce(ReductionType_MAXIMUM, tmpInput.get(), maxValue.get()));
|
||||||
|
}
|
||||||
|
|
||||||
|
//broadcast reduction axis dim
|
||||||
|
std::shared_ptr<Tensor> maxBroadValue;
|
||||||
|
{
|
||||||
|
maxBroadValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
|
||||||
|
auto outputDes = TensorUtils::getDescribe(maxBroadValue.get());
|
||||||
|
outputDes->regions.clear();
|
||||||
|
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||||
|
|
||||||
|
Tensor::InsideDescribe::Region desReg;
|
||||||
|
desReg.size[0] = outside;
|
||||||
|
desReg.size[1] = channel;
|
||||||
|
desReg.size[2] = inside;
|
||||||
|
desReg.dst.offset = 0;
|
||||||
|
desReg.dst.stride[0] = channel*inside;
|
||||||
|
desReg.dst.stride[1] = inside;
|
||||||
|
desReg.dst.stride[2] = 1;
|
||||||
|
desReg.src.offset = 0;
|
||||||
|
desReg.src.stride[0] = inside;
|
||||||
|
desReg.src.stride[1] = 0;
|
||||||
|
desReg.src.stride[2] = 1;
|
||||||
|
desReg.origin = maxValue.get();
|
||||||
|
outputDes->regions.emplace_back(std::move(desReg));
|
||||||
|
|
||||||
|
res.extras.emplace_back(maxBroadValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
//sub
|
||||||
|
std::shared_ptr<Tensor> subMaxValue;
|
||||||
|
{
|
||||||
|
subMaxValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
|
||||||
|
auto cmd = GeometryComputerUtils::makeBinary(BinaryOpOperation_SUB, tmpInput.get(), maxBroadValue.get(), subMaxValue.get());
|
||||||
|
res.extras.emplace_back(subMaxValue);
|
||||||
|
res.command.emplace_back(std::move(cmd));
|
||||||
|
}
|
||||||
|
//exp
|
||||||
|
std::shared_ptr<Tensor> expValue;
|
||||||
|
{
|
||||||
|
expValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
|
||||||
|
auto cmd = GeometryComputerUtils::makeUnary(UnaryOpOperation_EXP, subMaxValue.get(), expValue.get());
|
||||||
|
res.extras.emplace_back(expValue);
|
||||||
|
res.command.emplace_back(std::move(cmd));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//reduction sum, axis=2, only support NCHW
|
||||||
|
std::shared_ptr<Tensor> sumValue;
|
||||||
|
{
|
||||||
|
sumValue.reset(Tensor::createDevice<float>({outside, 1, inside}));
|
||||||
|
res.extras.emplace_back(sumValue);
|
||||||
|
res.command.emplace_back(GeometryComputerUtils::makeReduce(ReductionType_SUM, expValue.get(), sumValue.get()));
|
||||||
|
}
|
||||||
|
|
||||||
|
//broadcast reduction axis dim
|
||||||
|
std::shared_ptr<Tensor> sumBroadValue;
|
||||||
|
{
|
||||||
|
sumBroadValue.reset(Tensor::createDevice<float>({outside, channel, inside}));
|
||||||
|
auto outputDes = TensorUtils::getDescribe(sumBroadValue.get());
|
||||||
|
outputDes->regions.clear();
|
||||||
|
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||||
|
|
||||||
|
Tensor::InsideDescribe::Region desReg;
|
||||||
|
desReg.size[0] = outside;
|
||||||
|
desReg.size[1] = channel;
|
||||||
|
desReg.size[2] = inside;
|
||||||
|
desReg.dst.offset = 0;
|
||||||
|
desReg.dst.stride[0] = channel*inside;
|
||||||
|
desReg.dst.stride[1] = inside;
|
||||||
|
desReg.dst.stride[2] = 1;
|
||||||
|
desReg.src.offset = 0;
|
||||||
|
desReg.src.stride[0] = inside;
|
||||||
|
desReg.src.stride[1] = 0;
|
||||||
|
desReg.src.stride[2] = 1;
|
||||||
|
desReg.origin = sumValue.get();
|
||||||
|
outputDes->regions.emplace_back(std::move(desReg));
|
||||||
|
|
||||||
|
res.extras.emplace_back(sumBroadValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
//div
|
||||||
|
std::shared_ptr<Tensor> tmpOutput;
|
||||||
|
{
|
||||||
|
tmpOutput.reset(Tensor::createDevice<float>({outside, channel, inside}));
|
||||||
|
auto cmd = GeometryComputerUtils::makeBinary(BinaryOpOperation_REALDIV, expValue.get(), sumBroadValue.get(), tmpOutput.get());
|
||||||
|
res.extras.emplace_back(tmpOutput);
|
||||||
|
res.command.emplace_back(std::move(cmd));
|
||||||
|
}
|
||||||
|
|
||||||
|
//transform to output
|
||||||
|
{
|
||||||
|
auto outputDes = TensorUtils::getDescribe(output);
|
||||||
|
outputDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||||
|
Tensor::InsideDescribe::Region desReg;
|
||||||
|
desReg.size[0] = outside;
|
||||||
|
desReg.size[1] = channel;
|
||||||
|
desReg.size[2] = inside;
|
||||||
|
desReg.dst.offset = 0;
|
||||||
|
desReg.dst.stride[0] = channel*inside;
|
||||||
|
desReg.dst.stride[1] = inside;
|
||||||
|
desReg.dst.stride[2] = 1;
|
||||||
|
desReg.src.offset = 0;
|
||||||
|
desReg.src.stride[0] = channel*inside;
|
||||||
|
desReg.src.stride[1] = inside;
|
||||||
|
desReg.src.stride[2] = 1;
|
||||||
|
desReg.origin = tmpOutput.get();
|
||||||
|
outputDes->regions.emplace_back(std::move(desReg));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static void _create() {
|
||||||
|
// std::shared_ptr<GeometryComputer> comp(new GeometrySoftmax);
|
||||||
|
// GeometryComputer::registerGeometryComputer(comp, {OpType_Softmax});
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_GEOMETRY(GeometrySoftmax, _create);
|
||||||
|
|
||||||
|
} // namespace MNN
|
|
@ -7,7 +7,7 @@ add_executable(benchmarkExprModels.out ${CMAKE_CURRENT_LIST_DIR}/benchmarkExprMo
|
||||||
target_include_directories(benchmarkExprModels.out PRIVATE "${CMAKE_CURRENT_LIST_DIR}/exprModels" ${CMAKE_CURRENT_SOURCE_DIR}/)
|
target_include_directories(benchmarkExprModels.out PRIVATE "${CMAKE_CURRENT_LIST_DIR}/exprModels" ${CMAKE_CURRENT_SOURCE_DIR}/)
|
||||||
target_link_libraries(benchmarkExprModels.out ${MNN_DEPS})
|
target_link_libraries(benchmarkExprModels.out ${MNN_DEPS})
|
||||||
|
|
||||||
if ((MSVC OR WIN32) AND NOT MNN_BUILD_SHARED_LIBS)
|
if (MSVC AND NOT MNN_BUILD_SHARED_LIBS)
|
||||||
foreach (DEPEND ${MNN_DEPS})
|
foreach (DEPEND ${MNN_DEPS})
|
||||||
target_link_options(benchmark.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>)
|
target_link_options(benchmark.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>)
|
||||||
target_link_options(benchmarkExprModels.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>)
|
target_link_options(benchmarkExprModels.out PRIVATE /WHOLEARCHIVE:$<TARGET_FILE:${DEPEND}>)
|
||||||
|
|
|
@ -124,6 +124,7 @@ std::vector<float> doBench(Model& model, int loop, int warmup = 10, int forward
|
||||||
const auto bufferSize = revertor->getBufferSize();
|
const auto bufferSize = revertor->getBufferSize();
|
||||||
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromBuffer(modelBuffer, bufferSize));
|
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromBuffer(modelBuffer, bufferSize));
|
||||||
revertor.reset();
|
revertor.reset();
|
||||||
|
net->setSessionMode(MNN::Interpreter::Session_Release);
|
||||||
MNN::ScheduleConfig config;
|
MNN::ScheduleConfig config;
|
||||||
config.numThread = numberThread;
|
config.numThread = numberThread;
|
||||||
config.type = static_cast<MNNForwardType>(forward);
|
config.type = static_cast<MNNForwardType>(forward);
|
||||||
|
|
|
@ -90,6 +90,7 @@ static std::vector<float> runNet(VARP netOutput, const ScheduleConfig& config, i
|
||||||
const void* buf = builder.GetBufferPointer();
|
const void* buf = builder.GetBufferPointer();
|
||||||
size_t size = builder.GetSize();
|
size_t size = builder.GetSize();
|
||||||
std::unique_ptr<Interpreter> net(Interpreter::createFromBuffer(buf, size));
|
std::unique_ptr<Interpreter> net(Interpreter::createFromBuffer(buf, size));
|
||||||
|
net->setSessionMode(MNN::Interpreter::Session_Release);
|
||||||
auto session = net->createSession(config);
|
auto session = net->createSession(config);
|
||||||
net->releaseModel();
|
net->releaseModel();
|
||||||
auto inputTensor = net->getSessionInput(session, NULL);
|
auto inputTensor = net->getSessionInput(session, NULL);
|
||||||
|
|
|
@ -1,84 +0,0 @@
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
major_py_ver = sys.version_info.major
|
|
||||||
|
|
||||||
def convert_string_to_hex_list(code_str):
|
|
||||||
hex_list = []
|
|
||||||
for i in range(len(code_str)):
|
|
||||||
hex_ = hex(ord(code_str[i]))
|
|
||||||
hex_list.append(hex_)
|
|
||||||
return hex_list
|
|
||||||
|
|
||||||
def opencl_codegen():
|
|
||||||
cl_kernel_dir = sys.argv[1]
|
|
||||||
output_path = sys.argv[2]
|
|
||||||
print("Generating OpenCL Kernels in "+cl_kernel_dir+" to "+output_path)
|
|
||||||
if not os.path.exists(cl_kernel_dir):
|
|
||||||
print(cl_kernel_dir + " doesn't exist!")
|
|
||||||
|
|
||||||
#common.h
|
|
||||||
common_header_code = ""
|
|
||||||
#quantized_common.h
|
|
||||||
quantized_common_header_code = ""
|
|
||||||
#activation_common.h
|
|
||||||
activation_common_header_code = ""
|
|
||||||
for file_name in os.listdir(cl_kernel_dir):
|
|
||||||
file_path = os.path.join(cl_kernel_dir, file_name)
|
|
||||||
if file_path[-2:] == ".h" and file_name[:-2] == "quantized_common":
|
|
||||||
with open(file_path, "r") as f:
|
|
||||||
quantized_common_header_code += f.read()
|
|
||||||
elif file_path[-2:] == ".h" and file_name[:-2] == "activation_common":
|
|
||||||
with open(file_path, "r") as f:
|
|
||||||
activation_common_header_code += f.read()
|
|
||||||
|
|
||||||
opencl_code_maps = {}
|
|
||||||
for file_name in os.listdir(cl_kernel_dir):
|
|
||||||
file_path = os.path.join(cl_kernel_dir, file_name)
|
|
||||||
if file_path[-3:] == ".cl":
|
|
||||||
with open(file_path, "r") as f:
|
|
||||||
code_str = ""
|
|
||||||
for line in f.readlines():
|
|
||||||
if "#include <activation_common.h>" in line:
|
|
||||||
code_str += common_header_code
|
|
||||||
code_str += activation_common_header_code
|
|
||||||
elif "#include <quantized_common.h>" in line:
|
|
||||||
code_str += common_header_code
|
|
||||||
code_str += quantized_common_header_code
|
|
||||||
elif "#include <common.h>" in line:
|
|
||||||
code_str += common_header_code
|
|
||||||
else:
|
|
||||||
code_str += line
|
|
||||||
opencl_code_maps[file_name[:-3]] = convert_string_to_hex_list(code_str)
|
|
||||||
|
|
||||||
#source model
|
|
||||||
opencl_source_map = "#include <map> \n"
|
|
||||||
opencl_source_map += "#include <string> \n"
|
|
||||||
opencl_source_map += "#include <vector> \n"
|
|
||||||
opencl_source_map += "namespace MNN { \n"
|
|
||||||
opencl_source_map += "extern const std::map<std::string, std::vector<unsigned char>> OpenCLProgramMap = \n { \n"
|
|
||||||
|
|
||||||
if major_py_ver == 2:
|
|
||||||
items = opencl_code_maps.iteritems()
|
|
||||||
else:
|
|
||||||
items = opencl_code_maps.items()
|
|
||||||
for file_name, file_source in items:
|
|
||||||
opencl_source_map += "{\n \""
|
|
||||||
opencl_source_map += file_name
|
|
||||||
opencl_source_map += "\", \n"
|
|
||||||
opencl_source_map += " { "
|
|
||||||
for source_hex in file_source:
|
|
||||||
opencl_source_map += source_hex
|
|
||||||
opencl_source_map += ","
|
|
||||||
opencl_source_map += " } "
|
|
||||||
opencl_source_map += "\n }, \n"
|
|
||||||
|
|
||||||
opencl_source_map += " }; \n"
|
|
||||||
opencl_source_map += "} \n"
|
|
||||||
|
|
||||||
with open(output_path, "w") as w_file:
|
|
||||||
w_file.write(opencl_source_map)
|
|
||||||
|
|
||||||
print("Generate OpenCL Source done !!! \n")
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
opencl_codegen()
|
|
|
@ -0,0 +1,140 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
. ./parse_options.sh || exit 1;
|
||||||
|
|
||||||
|
CMAKE=cmake
|
||||||
|
MAKE=make
|
||||||
|
ANDROID_NDK=/home/android-ndk-r18b
|
||||||
|
|
||||||
|
BUILD_ROOT=`pwd`
|
||||||
|
|
||||||
|
# Clean the exist directory other than remove it in order to solve
|
||||||
|
# the problem "Current working directory cannot be established".
|
||||||
|
function make_or_clean_dir {
|
||||||
|
if [ -d $1 ]; then
|
||||||
|
rm -rf $1/*
|
||||||
|
else
|
||||||
|
mkdir $1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function build_arm_android_32 {
|
||||||
|
make_or_clean_dir build_arm_android_32 && cd build_arm_android_32
|
||||||
|
$CMAKE ../.. \
|
||||||
|
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DANDROID_ABI="armeabi-v7a" \
|
||||||
|
-DANDROID_STL=c++_static \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DANDROID_NATIVE_API_LEVEL=android-21 \
|
||||||
|
-DANDROID_TOOLCHAIN=clang \
|
||||||
|
-DMNN_USE_LOGCAT=true \
|
||||||
|
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
|
||||||
|
-DNATIVE_LIBRARY_OUTPUT=. \
|
||||||
|
-DNATIVE_INCLUDE_OUTPUT=. \
|
||||||
|
-DMNN_VULKAN=$USE_VULKAN \
|
||||||
|
-DMNN_OPENCL=$USE_OPENCL \
|
||||||
|
-DMNN_OPENGL=$USE_OPENGL \
|
||||||
|
-DMNN_USE_THREAD_POOL=$USE_THREAD_POOL || exit 1;
|
||||||
|
$MAKE -j $build_threads || exit 1;
|
||||||
|
cd $BUILD_ROOT; true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function build_arm_android_64 {
|
||||||
|
make_or_clean_dir build_arm_android_64 && cd build_arm_android_64
|
||||||
|
$CMAKE ../.. \
|
||||||
|
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DANDROID_ABI="arm64-v8a" \
|
||||||
|
-DANDROID_STL=c++_static \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DANDROID_NATIVE_API_LEVEL=android-21 \
|
||||||
|
-DANDROID_TOOLCHAIN=clang \
|
||||||
|
-DMNN_USE_LOGCAT=true \
|
||||||
|
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
|
||||||
|
-DNATIVE_LIBRARY_OUTPUT=. \
|
||||||
|
-DNATIVE_INCLUDE_OUTPUT=. \
|
||||||
|
-DMNN_ARM82=ON \
|
||||||
|
-DMNN_VULKAN=$USE_VULKAN \
|
||||||
|
-DMNN_OPENCL=$USE_OPENCL \
|
||||||
|
-DMNN_OPENGL=$USE_OPENGL \
|
||||||
|
-DMNN_USE_THREAD_POOL=$USE_THREAD_POOL || exit 1;
|
||||||
|
$MAKE -j $build_threads || exit 1;
|
||||||
|
cd $BUILD_ROOT; true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function build_arm_linux_32 {
|
||||||
|
cd $BUILD_ROOT; true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function build_arm_linux_64 {
|
||||||
|
cd $BUILD_ROOT; true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function build_x86_linux {
|
||||||
|
make_or_clean_dir build_x86_linux && cd build_x86_linux
|
||||||
|
$CMAKE ../.. \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DMNN_BUILD_TRAIN=ON \
|
||||||
|
-DMNN_SEP_BUILD=OFF \
|
||||||
|
-DMNN_BUILD_DEMO=ON \
|
||||||
|
-DMNN_BUILD_QUANTOOLS=ON \
|
||||||
|
-DMNN_EVALUATION=ON \
|
||||||
|
-DMNN_BUILD_CONVERTER=ON \
|
||||||
|
-DMNN_SUPPORT_TFLITE_QUAN=ON \
|
||||||
|
-DMNN_BUILD_TEST=ON \
|
||||||
|
-DMNN_OPENCL=$USE_OPENCL \
|
||||||
|
-DMNN_VULKAN=$USE_VULKAN \
|
||||||
|
-DMNN_OPENMP=$USE_OPENMP \
|
||||||
|
-DMNN_USE_THREAD_POOL=OFF \
|
||||||
|
-DMNN_BUILD_BENCHMARK=ON || exit 1;
|
||||||
|
$MAKE -j $build_threads || exit 1;
|
||||||
|
cd $BUILD_ROOT; true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function build_all {
|
||||||
|
build_arm_android_32 || exit 1;
|
||||||
|
build_arm_android_64 || exit 1;
|
||||||
|
build_arm_linux_32 || exit 1;
|
||||||
|
build_arm_linux_64 || exit 1;
|
||||||
|
build_x86_linux || exit 1;
|
||||||
|
true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function clean {
|
||||||
|
rm -rf build_arm_android_32
|
||||||
|
rm -rf build_arm_android_64
|
||||||
|
rm -rf build_arm_linux_32
|
||||||
|
rm -rf build_arm_linux_64
|
||||||
|
rm -rf build_x86_linux
|
||||||
|
}
|
||||||
|
|
||||||
|
function build {
|
||||||
|
case $platform in
|
||||||
|
"arm_linux_32")
|
||||||
|
build_arm_linux_32 || exit 1;
|
||||||
|
;;
|
||||||
|
"arm_linux_64")
|
||||||
|
build_arm_linux_64 || exit 1;
|
||||||
|
;;
|
||||||
|
"x86_linux")
|
||||||
|
build_x86_linux || exit 1;
|
||||||
|
;;
|
||||||
|
"arm_android_32")
|
||||||
|
build_arm_android_32 || exit 1;
|
||||||
|
;;
|
||||||
|
"arm_android_64")
|
||||||
|
build_arm_android_64 || exit 1;
|
||||||
|
;;
|
||||||
|
"all")
|
||||||
|
build_all || exit 1;
|
||||||
|
;;
|
||||||
|
*) echo "Invalid platform: $platform" && exit 1;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ $clean == 1 ]; then
|
||||||
|
clean
|
||||||
|
else
|
||||||
|
build $@
|
||||||
|
fi
|
||||||
|
true;
|
|
@ -0,0 +1,113 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Valid platform:
|
||||||
|
# - arm_android_32
|
||||||
|
# - arm_android_64
|
||||||
|
# - arm_linux_32
|
||||||
|
# - arm_linux_64
|
||||||
|
# - x86_linux
|
||||||
|
platform="all"
|
||||||
|
|
||||||
|
# Option to build with opencl.
|
||||||
|
use_opencl=0
|
||||||
|
|
||||||
|
# Option to build with opengl.
|
||||||
|
use_opengl=0
|
||||||
|
|
||||||
|
# Option to build with vulkan.
|
||||||
|
use_vulkan=0
|
||||||
|
|
||||||
|
# Option to build with openmp multithreads library.
|
||||||
|
use_openmp=0
|
||||||
|
|
||||||
|
build_threads=1
|
||||||
|
|
||||||
|
# Option to clear the build history.
|
||||||
|
clean=0
|
||||||
|
|
||||||
|
USE_OPENCL=OFF
|
||||||
|
USE_VULKAN=OFF
|
||||||
|
USE_OPENGL=OFF
|
||||||
|
USE_OPENMP=OFF
|
||||||
|
USE_THREAD_POOL=ON
|
||||||
|
|
||||||
|
function print_usage {
|
||||||
|
echo -e "Usgae: ./build.sh"
|
||||||
|
echo -e " --platform=x: Specify build platform x. "
|
||||||
|
echo -e " All valid platforms are \"arm_android_32\", \"arm_android_64\",
|
||||||
|
\"arm_linux_32\", \"arm_linux_64\", \"x86_linux\", \"all\"."
|
||||||
|
echo -e " The default is \"all\"."
|
||||||
|
echo -e " --use_openmp=true|false: Build with openmp or not."
|
||||||
|
echo -e " The default is false."
|
||||||
|
echo -e " --use_opencl=true|false: Build with opencl or not."
|
||||||
|
echo -e " The default is false."
|
||||||
|
echo -e " --use_opengl=true|false: Build with opengl or not."
|
||||||
|
echo -e " The default is false."
|
||||||
|
echo -e " --use_vulkan=true|false: Build with vulkan or not."
|
||||||
|
echo -e " The default is false."
|
||||||
|
echo -e " --job=n: Build with n threads. Default is 1."
|
||||||
|
}
|
||||||
|
|
||||||
|
function parse_platform {
|
||||||
|
platform=`echo "$1" | awk -F '=' '{print $2}'`
|
||||||
|
}
|
||||||
|
|
||||||
|
function parse_nthreads {
|
||||||
|
build_threads=`echo "$1" | awk -F '=' '{print $2}'`
|
||||||
|
}
|
||||||
|
|
||||||
|
function parse_bool {
|
||||||
|
val=`echo "$1" | awk -F '=' '{print $2}'`
|
||||||
|
if [ $val == "true" ] || [ $val == "1" ]; then
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
[ -z "${1:-}" ] && print_usage && exit 1;
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
[ -z "${1:-}" ] && break;
|
||||||
|
case "$1" in
|
||||||
|
--platform=*) parse_platform "$1"; shift 1;
|
||||||
|
;;
|
||||||
|
--use_openmp=*) parse_bool "$1"; use_openmp=$?; shift 1;
|
||||||
|
;;
|
||||||
|
--use_openmp) use_openmp=true; shift 1;
|
||||||
|
;;
|
||||||
|
--use_opencl=*) parse_bool "$1"; use_opencl=$?; shift 1;
|
||||||
|
;;
|
||||||
|
--use_opencl) use_opencl=true; shift 1;
|
||||||
|
;;
|
||||||
|
--use_opengl=*) parse_bool "$1"; use_opengl=$?; shift 1;
|
||||||
|
;;
|
||||||
|
--use_opengl) use_opengl=true; shift 1;
|
||||||
|
;;
|
||||||
|
--use_vulkan=*) parse_bool "$1"; use_vulkan=$?; shift 1;
|
||||||
|
;;
|
||||||
|
--use_vulkan) use_vulkan=true; shift 1;
|
||||||
|
;;
|
||||||
|
--job=*) parse_nthreads "$1"; shift 1;
|
||||||
|
;;
|
||||||
|
clean) clean=1; shift 1;
|
||||||
|
;;
|
||||||
|
*) break;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ $use_opencl == 1 ]; then
|
||||||
|
USE_OPENCL=ON
|
||||||
|
fi
|
||||||
|
if [ $use_opengl == 1 ]; then
|
||||||
|
USE_OPENGL=ON
|
||||||
|
fi
|
||||||
|
if [ $use_vulkan == 1 ]; then
|
||||||
|
USE_VULKAN=ON
|
||||||
|
fi
|
||||||
|
if [ $use_openmp == 1 ]; then
|
||||||
|
USE_OPENMP=ON
|
||||||
|
USE_THREAD_POOL=OFF
|
||||||
|
fi
|
||||||
|
|
||||||
|
true;
|
|
@ -0,0 +1,3 @@
|
||||||
|
call "C:/Program Files (x86)/Microsoft Visual Studio/2017/BuildTools/VC/Auxiliary/Build/vcvars64.bat"
|
||||||
|
cmake -G "Ninja" -DCMAKE_BUILD_TYPE=Release ..
|
||||||
|
ninja
|
|
@ -0,0 +1,3 @@
|
||||||
|
call "C:/Program Files (x86)/Microsoft Visual Studio/2017/BuildTools/VC/Auxiliary/Build/vcvars32.bat"
|
||||||
|
cmake -G "Ninja" -DCMAKE_BUILD_TYPE=Release ..
|
||||||
|
ninja
|
|
@ -12,3 +12,9 @@ target_link_libraries(segment.out ${MNN_DEPS})
|
||||||
|
|
||||||
add_executable(expressDemo.out ${CMAKE_CURRENT_LIST_DIR}/expressDemo.cpp)
|
add_executable(expressDemo.out ${CMAKE_CURRENT_LIST_DIR}/expressDemo.cpp)
|
||||||
target_link_libraries(expressDemo.out ${MNN_DEPS})
|
target_link_libraries(expressDemo.out ${MNN_DEPS})
|
||||||
|
|
||||||
|
add_executable(transformerDemo.out ${CMAKE_CURRENT_LIST_DIR}/transformerDemo.cpp)
|
||||||
|
target_link_libraries(transformerDemo.out ${MNN_DEPS})
|
||||||
|
|
||||||
|
add_executable(rasterDemo.out ${CMAKE_CURRENT_LIST_DIR}/rasterDemo.cpp)
|
||||||
|
target_link_libraries(rasterDemo.out ${MNN_DEPS})
|
||||||
|
|
|
@ -53,7 +53,6 @@ int main(int argc, const char* argv[]) {
|
||||||
MNN_ERROR("Output Not valid\n");
|
MNN_ERROR("Output Not valid\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
auto size = outputInfo->size;
|
|
||||||
//Test Speed
|
//Test Speed
|
||||||
if (testTime > 0){
|
if (testTime > 0){
|
||||||
//Let the frequence up
|
//Let the frequence up
|
||||||
|
@ -82,6 +81,7 @@ int main(int argc, const char* argv[]) {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
auto size = outputInfo->size;
|
||||||
auto outputPtr = output->readMap<float>();
|
auto outputPtr = output->readMap<float>();
|
||||||
if (nullptr == outputPtr) {
|
if (nullptr == outputPtr) {
|
||||||
MNN_ERROR("Output Not valid read error\n");
|
MNN_ERROR("Output Not valid read error\n");
|
||||||
|
|
|
@ -0,0 +1,251 @@
|
||||||
|
//
|
||||||
|
// rasterDemo.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2020/10/14.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <chrono>
|
||||||
|
#include <MNN/MNNDefine.h>
|
||||||
|
#include <MNN/Tensor.hpp>
|
||||||
|
#include <MNN/Interpreter.hpp>
|
||||||
|
#include "MNN_generated.h"
|
||||||
|
#include "core/TensorUtils.hpp"
|
||||||
|
#include "core/Execution.hpp"
|
||||||
|
#include "core/Backend.hpp"
|
||||||
|
#include "rapidjson/document.h"
|
||||||
|
#include "rapidjson/stringbuffer.h"
|
||||||
|
#include "rapidjson/writer.h"
|
||||||
|
using namespace MNN;
|
||||||
|
/*
|
||||||
|
1.Raster will do the index mapping like below:
|
||||||
|
|
||||||
|
for (region : regions)
|
||||||
|
src = region.src, dst = region.dst;
|
||||||
|
for (i = 0 -> size[0])
|
||||||
|
for (j = 0 -> size[1])
|
||||||
|
for (k = 0 -> size[2])
|
||||||
|
output[dst.offset + i * dst.stride[0] + j * dst.stride[1] + k * dst.stride[2]] =
|
||||||
|
region.origion[src.offset + i * src.stride[0] + j * src.stride[1] + k * src.stride[2]];
|
||||||
|
|
||||||
|
2. Raster Op has a input and a output, but the input is not the real input tensor, it's a
|
||||||
|
middle tensor whith VIRTUAL type that has many regions point to inputs tensors, like below.
|
||||||
|
|
||||||
|
input_0 --> region_0 --\
|
||||||
|
\
|
||||||
|
input_1 --> region_1 ---- middle ----> output
|
||||||
|
/
|
||||||
|
input_2 --> region_2 --/
|
||||||
|
|
||||||
|
3. This example read a json file and construct some Rasters and compute.
|
||||||
|
The input json file format is as below:
|
||||||
|
{
|
||||||
|
"inputs" : [
|
||||||
|
{
|
||||||
|
"id" : int,
|
||||||
|
"type" : "type_name", // float or int
|
||||||
|
"dims" : [int],
|
||||||
|
"data" : [int/float] // if null, fill with random number
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs" : [
|
||||||
|
// same with inputs
|
||||||
|
],
|
||||||
|
"regions" : [
|
||||||
|
{
|
||||||
|
"id" : int, // points to outputs
|
||||||
|
"size" : [int],
|
||||||
|
"src" : {
|
||||||
|
"offset" : int,
|
||||||
|
"stride" : [int]
|
||||||
|
},
|
||||||
|
"dst" : { // same with src },
|
||||||
|
"origin" : int // point to inputs
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
static std::string runRaster(std::string jsonString, int runNum) {
|
||||||
|
srand(0);
|
||||||
|
rapidjson::Document document;
|
||||||
|
document.Parse(jsonString.c_str());
|
||||||
|
if (document.HasParseError()) {
|
||||||
|
MNN_ERROR("Invalid Json Format!\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// prepare CPU backend
|
||||||
|
ScheduleConfig config;
|
||||||
|
config.type = MNN_FORWARD_CPU;
|
||||||
|
BackendConfig backendConfig;
|
||||||
|
backendConfig.precision = BackendConfig::Precision_High;
|
||||||
|
config.backendConfig = &backendConfig;
|
||||||
|
Backend::Info compute;
|
||||||
|
compute.type = config.type;
|
||||||
|
compute.numThread = config.numThread;
|
||||||
|
compute.user = config.backendConfig;
|
||||||
|
const RuntimeCreator* runtimeCreator(MNNGetExtraRuntimeCreator(compute.type));
|
||||||
|
std::unique_ptr<Runtime> runtime(runtimeCreator->onCreate(compute));
|
||||||
|
std::unique_ptr<Backend> backend(runtime->onCreate());
|
||||||
|
|
||||||
|
// build Op
|
||||||
|
std::unique_ptr<OpT> opt(new OpT);
|
||||||
|
opt->type = OpType_Raster;
|
||||||
|
flatbuffers::FlatBufferBuilder builder(1024);
|
||||||
|
builder.ForceDefaults(true);
|
||||||
|
auto len = Op::Pack(builder, opt.get());
|
||||||
|
builder.Finish(len);
|
||||||
|
auto buffer = builder.GetBufferPointer();
|
||||||
|
const Op* op = flatbuffers::GetMutableRoot<Op>(buffer);
|
||||||
|
// build tensors (NCHW) from json
|
||||||
|
std::vector<std::unique_ptr<Tensor>> inputs;
|
||||||
|
std::vector<std::unique_ptr<Tensor>> outputs;
|
||||||
|
auto readTensors = [&document, &backend](std::vector<std::unique_ptr<Tensor>>& tensors, const char* type) {
|
||||||
|
if (document.HasMember(type)) {
|
||||||
|
auto info = document[type].GetArray();
|
||||||
|
tensors.resize(info.Size());
|
||||||
|
for (auto iter = info.begin(); iter != info.end(); iter++) {
|
||||||
|
auto obj = iter->GetObject();
|
||||||
|
int id = obj["id"].GetInt();
|
||||||
|
tensors[id].reset(new Tensor(4));
|
||||||
|
auto tensor = tensors[id].get();
|
||||||
|
auto dataType = obj["type"].GetString();
|
||||||
|
bool isFloat = !strcmp(dataType, "float");
|
||||||
|
tensor->setType(isFloat ? DataType_DT_FLOAT : DataType_DT_INT32);
|
||||||
|
auto dims = obj["dims"].GetArray();
|
||||||
|
for (auto d = dims.begin(); d != dims.end(); d++) {
|
||||||
|
tensor->setLength(d - dims.begin(), d->GetInt());
|
||||||
|
}
|
||||||
|
TensorUtils::setLinearLayout(tensor);
|
||||||
|
backend->onAcquireBuffer(tensor, Backend::STATIC);
|
||||||
|
TensorUtils::getDescribe(tensor)->backend = backend.get();
|
||||||
|
auto data = obj["data"].GetArray();
|
||||||
|
if (!strcmp(type, "inputs")) {
|
||||||
|
bool hasData = data.Size() == tensor->elementSize();
|
||||||
|
auto dataIter = data.begin();
|
||||||
|
for (int i = 0; i < tensor->elementSize(); i++, dataIter++) {
|
||||||
|
if (isFloat) {
|
||||||
|
tensor->host<float>()[i] = hasData ? dataIter->GetFloat() : rand() % 10 / 10.0;
|
||||||
|
} else {
|
||||||
|
tensor->host<int>()[i] = hasData ? dataIter->GetInt() : rand() % 10;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
readTensors(inputs, "inputs");
|
||||||
|
readTensors(outputs, "outputs");
|
||||||
|
|
||||||
|
// build middle tensors' region info from json
|
||||||
|
std::vector<std::unique_ptr<Tensor>> middles;
|
||||||
|
middles.resize(outputs.size());
|
||||||
|
if (document.HasMember("regions")) {
|
||||||
|
auto info = document["regions"].GetArray();
|
||||||
|
for (auto iter = info.begin(); iter != info.end(); iter++) {
|
||||||
|
auto obj = iter->GetObject();
|
||||||
|
int id = obj["id"].GetInt();
|
||||||
|
if (middles[id] == nullptr) {
|
||||||
|
middles[id].reset(new Tensor(4));
|
||||||
|
}
|
||||||
|
auto des = TensorUtils::getDescribe(middles[id].get());
|
||||||
|
des->memoryType = MNN::Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
||||||
|
Tensor::InsideDescribe::Region region;
|
||||||
|
int origin = obj["origin"].GetInt();
|
||||||
|
region.origin = inputs[origin].get();
|
||||||
|
auto size = obj["size"].GetArray();
|
||||||
|
auto src = obj["src"].GetObject();
|
||||||
|
auto dst = obj["dst"].GetObject();
|
||||||
|
auto srcStride = src["stride"].GetArray();
|
||||||
|
auto dstStride = dst["stride"].GetArray();
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
region.size[i] = size[i].GetInt();
|
||||||
|
region.src.stride[i] = srcStride[i].GetInt();
|
||||||
|
region.dst.stride[i] = dstStride[i].GetInt();
|
||||||
|
}
|
||||||
|
region.src.offset = src["offset"].GetInt();
|
||||||
|
region.dst.offset = dst["offset"].GetInt();
|
||||||
|
des->regions.push_back(region);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// build execution of Raster and run them
|
||||||
|
for (int i = 0; i < outputs.size(); i++) {
|
||||||
|
std::vector<Tensor*> ins = {middles[i].get()}, outs = {outputs[i].get()};
|
||||||
|
std::unique_ptr<Execution> exe(backend->onCreate(ins, outs, op));
|
||||||
|
exe->onResize(ins, outs);
|
||||||
|
auto t1 = std::chrono::high_resolution_clock::now();
|
||||||
|
for (int j = 0; j < runNum; j++) {
|
||||||
|
exe->onExecute(ins, outs);
|
||||||
|
}
|
||||||
|
auto t2 = std::chrono::high_resolution_clock::now();
|
||||||
|
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
|
||||||
|
double time = time_span.count() * 1000.0 / runNum;
|
||||||
|
printf("For output_id = %d, run %d times, the average time is %f ms.\n", i, runNum, time);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto writeTensors = [&document](std::vector<std::unique_ptr<Tensor>>& tensors, const char* type) {
|
||||||
|
auto info = document[type].GetArray();
|
||||||
|
for (auto iter = info.begin(); iter != info.end(); iter++) {
|
||||||
|
auto obj = iter->GetObject();
|
||||||
|
int id = obj["id"].GetInt();
|
||||||
|
auto data = obj["data"].GetArray();
|
||||||
|
if (data.Size() == tensors[id]->elementSize()) {
|
||||||
|
// has data, dont write
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
bool isFloat = !strcmp(obj["type"].GetString(), "float");
|
||||||
|
data.Reserve(tensors[id]->elementSize(), document.GetAllocator());
|
||||||
|
for (int i = 0; i < tensors[id]->elementSize(); i++) {
|
||||||
|
if (isFloat) {
|
||||||
|
data.PushBack(tensors[id]->host<float>()[i], document.GetAllocator());
|
||||||
|
} else {
|
||||||
|
data.PushBack(tensors[id]->host<int>()[i], document.GetAllocator());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
writeTensors(inputs, "inputs");
|
||||||
|
writeTensors(outputs, "outputs");
|
||||||
|
rapidjson::StringBuffer stringBuffer;
|
||||||
|
rapidjson::Writer<rapidjson::StringBuffer> writer(stringBuffer);
|
||||||
|
document.Accept(writer);
|
||||||
|
return stringBuffer.GetString();
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, const char* argv[]) {
|
||||||
|
if (argc < 2) {
|
||||||
|
printf("Usage: ./rasterDemo.out input.json [output.json] [runNum]\ndefault output is input, and default runNum is 100.\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
const char* inputFile = argv[1];
|
||||||
|
const char* outputFile = argv[1];
|
||||||
|
int runNum = 100;
|
||||||
|
if (argc >= 3) {
|
||||||
|
outputFile = argv[2];
|
||||||
|
}
|
||||||
|
if (argc >= 4) {
|
||||||
|
runNum = ::atoi(argv[3]);
|
||||||
|
}
|
||||||
|
std::ifstream in(inputFile);
|
||||||
|
if (in.fail()) {
|
||||||
|
printf("Invalid input Json File!\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
std::ofstream out(outputFile);
|
||||||
|
if (out.fail()) {
|
||||||
|
printf("Invalid output Json File!\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << in.rdbuf();
|
||||||
|
out << runRaster(ss.str(), runNum);
|
||||||
|
out.close();
|
||||||
|
printf("Run Raster Done!\n");
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,60 @@
|
||||||
|
#include <MNN/expr/Module.hpp>
|
||||||
|
#define MNN_OPEN_TIME_TRACE
|
||||||
|
#include <MNN/AutoTime.hpp>
|
||||||
|
#include <MNN/expr/ExprCreator.hpp>
|
||||||
|
#include <MNN/expr/Executor.hpp>
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include<string.h>
|
||||||
|
using namespace MNN::Express;
|
||||||
|
using namespace MNN;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
int main(int argc, const char* argv[]) {
|
||||||
|
if (argc < 2) {
|
||||||
|
MNN_ERROR("Don't has model name\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
BackendConfig config;
|
||||||
|
//Executor::getGlobalExecutor()->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 4);
|
||||||
|
auto modelName = argv[1];
|
||||||
|
std::shared_ptr<Module> model;
|
||||||
|
model.reset(Module::load({"NmtModel/Placeholder", "NmtModel/Placeholder_1"}, {"NmtModel/transpose_2"}, modelName));
|
||||||
|
std::vector<int> input0 = {32,16,234,3215,61,135,29,10,24317,4661,4,0};
|
||||||
|
std::vector<int> input1 = {1,1,1,1,1,1,1,1,1,1,1,1};
|
||||||
|
auto first = _Input({1, (int)input0.size()}, NHWC, halide_type_of<int>());
|
||||||
|
::memcpy(first->writeMap<int>(), input0.data(), input0.size() * sizeof(int));
|
||||||
|
auto second = _Input({1, (int)input1.size()}, NHWC, halide_type_of<int>());
|
||||||
|
::memcpy(second->writeMap<int>(), input1.data(), input1.size() * sizeof(int));
|
||||||
|
std::vector<VARP> outputs;
|
||||||
|
for (int i = 0; i < 2; ++i) {
|
||||||
|
{
|
||||||
|
AUTOTIME;
|
||||||
|
Executor::getGlobalExecutor()->resetProfile();
|
||||||
|
outputs = model->onForward({first, second});
|
||||||
|
Executor::getGlobalExecutor()->dumpProfile();
|
||||||
|
}
|
||||||
|
std::ostringstream fileNameOs;
|
||||||
|
std::ostringstream dimInfo;
|
||||||
|
fileNameOs << i << "_output.txt";
|
||||||
|
auto info = outputs[0]->getInfo();
|
||||||
|
for (int d=0; d<info->dim.size(); ++d) {
|
||||||
|
dimInfo << info->dim[d] << "_";
|
||||||
|
}
|
||||||
|
auto fileName = fileNameOs.str();
|
||||||
|
MNN_PRINT("Output Name: %s, Dim: %s\n", fileName.c_str(), dimInfo.str().c_str());
|
||||||
|
auto ptr = outputs[0]->readMap<int>();
|
||||||
|
std::ofstream outputOs(fileName.c_str());
|
||||||
|
for (int i=0; i<info->size; ++i) {
|
||||||
|
outputOs << ptr[i] << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
AUTOTIME;
|
||||||
|
outputs = model->onForward({first, second});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -53,27 +53,23 @@ static int CompareElements(const LabeledElement *a, const LabeledElement *b) {
|
||||||
if (!_net || !_session) {
|
if (!_net || !_session) {
|
||||||
return nil;
|
return nil;
|
||||||
}
|
}
|
||||||
|
MNN::Tensor *output = _net->getSessionOutput(_session, nullptr);
|
||||||
|
MNN::Tensor copy(output);
|
||||||
|
auto input = _net->getSessionInput(_session, nullptr);
|
||||||
|
MNN::Tensor tensorCache(input);
|
||||||
|
input->copyToHostTensor(&tensorCache);
|
||||||
|
|
||||||
// run
|
// run
|
||||||
NSTimeInterval begin = NSDate.timeIntervalSinceReferenceDate;
|
NSTimeInterval begin = NSDate.timeIntervalSinceReferenceDate;
|
||||||
// you should set input data for each inference
|
// you should set input data for each inference
|
||||||
if (cycles == 1) {
|
|
||||||
_net->runSession(_session);
|
|
||||||
} else {
|
|
||||||
auto input = _net->getSessionInput(_session, nullptr);
|
|
||||||
MNN::Tensor tensorCache(input);
|
|
||||||
input->copyToHostTensor(&tensorCache);
|
|
||||||
for (int i = 0; i < cycles; i++) {
|
for (int i = 0; i < cycles; i++) {
|
||||||
input->copyFromHostTensor(&tensorCache);
|
input->copyFromHostTensor(&tensorCache);
|
||||||
_net->runSession(_session);
|
_net->runSession(_session);
|
||||||
}
|
output->copyToHostTensor(©);
|
||||||
}
|
}
|
||||||
NSTimeInterval cost = NSDate.timeIntervalSinceReferenceDate - begin;
|
NSTimeInterval cost = NSDate.timeIntervalSinceReferenceDate - begin;
|
||||||
|
|
||||||
// result
|
// result
|
||||||
MNN::Tensor *output = _net->getSessionOutput(_session, nullptr);
|
|
||||||
MNN::Tensor copy(output);
|
|
||||||
output->copyToHostTensor(©);
|
|
||||||
float *data = copy.host<float>();
|
float *data = copy.host<float>();
|
||||||
LabeledElement objects[1000];
|
LabeledElement objects[1000];
|
||||||
for (int i = 0; i < 1000; i++) {
|
for (int i = 0; i < 1000; i++) {
|
||||||
|
|
|
@ -1,14 +1,21 @@
|
||||||
file(GLOB_RECURSE MNN_EXPR_SRCS "${CMAKE_CURRENT_LIST_DIR}/*.cpp")
|
file(GLOB_RECURSE MNN_EXPR_SRCS "${CMAKE_CURRENT_LIST_DIR}/*.*")
|
||||||
option(MNN_EXPR_ENABLE_PROFILER "Support profile Expr's op cost" OFF)
|
option(MNN_EXPR_ENABLE_PROFILER "Support profile Expr's op cost" OFF)
|
||||||
|
option(MNN_EXPR_SHAPE_EAGER "Force compute Expr's shape directly cost" OFF)
|
||||||
IF (MNN_EXPR_ENABLE_PROFILER)
|
IF (MNN_EXPR_ENABLE_PROFILER)
|
||||||
add_definitions(-DMNN_EXPR_ENABLE_PROFILER)
|
add_definitions(-DMNN_EXPR_ENABLE_PROFILER)
|
||||||
ENDIF()
|
ENDIF()
|
||||||
|
IF (MNN_EXPR_SHAPE_EAGER)
|
||||||
|
add_definitions(-DMNN_EXPR_SHAPE_EAGER)
|
||||||
|
ENDIF()
|
||||||
IF(MNN_SEP_BUILD)
|
IF(MNN_SEP_BUILD)
|
||||||
if (MNN_BUILD_FOR_ANDROID_COMMAND)
|
if (MNN_BUILD_FOR_ANDROID_COMMAND)
|
||||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../")
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../")
|
||||||
endif()
|
endif()
|
||||||
add_library(MNN_Express SHARED ${MNN_EXPR_SRCS})
|
add_library(MNN_Express SHARED ${MNN_EXPR_SRCS})
|
||||||
target_link_libraries(MNN_Express MNN)
|
target_link_libraries(MNN_Express MNN)
|
||||||
|
if (MNN_BUILD_MINI)
|
||||||
|
target_link_libraries(MNN_Express $<TARGET_OBJECTS:MNNTransform>)
|
||||||
|
endif()
|
||||||
ELSE()
|
ELSE()
|
||||||
add_library(MNNExpress OBJECT ${MNN_EXPR_SRCS})
|
add_library(MNNExpress OBJECT ${MNN_EXPR_SRCS})
|
||||||
ENDIF()
|
ENDIF()
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
//
|
||||||
|
// Distributions.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2019/11/28.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "Distributions.hpp"
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
|
||||||
|
void Distributions::uniform(const int count, const float min, const float max, float *r, std::mt19937 gen) {
|
||||||
|
std::uniform_real_distribution<float> dis(min, std::nextafter(max, std::numeric_limits<float>::max()));
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
r[i] = dis(gen);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Distributions::gaussian(const int count, const float mu, const float sigma, float *r, std::mt19937 gen) {
|
||||||
|
std::normal_distribution<float> dis(mu, sigma);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
r[i] = dis(gen);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Express
|
||||||
|
} // namespace MNN
|
|
@ -0,0 +1,27 @@
|
||||||
|
//
|
||||||
|
// Distributions.hpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2019/11/28.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef Distributions_hpp
|
||||||
|
#define Distributions_hpp
|
||||||
|
|
||||||
|
#include <MNN/MNNDefine.h>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
|
||||||
|
class Distributions {
|
||||||
|
public:
|
||||||
|
static void uniform(const int count, const float min, const float max, float* r, std::mt19937 gen);
|
||||||
|
static void gaussian(const int count, const float mu, const float sigma, float* r, std::mt19937 gen);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Express
|
||||||
|
} // namespace MNN
|
||||||
|
|
||||||
|
#endif // Distritutions_hpp
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,45 @@
|
||||||
|
//
|
||||||
|
// ExecutorScope.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2020/10/26.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <thread>
|
||||||
|
#include <MNN/expr/Executor.hpp>
|
||||||
|
#include <MNN/expr/Scope.hpp>
|
||||||
|
#include <MNN/expr/ExecutorScope.hpp>
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
|
||||||
|
typedef std::shared_ptr<Express::Executor> ExecutorRef;
|
||||||
|
#if !defined(__APPLE__)
|
||||||
|
thread_local static Scope<ExecutorRef> g_executor_scope;
|
||||||
|
#else
|
||||||
|
static Scope<ExecutorRef> g_executor_scope;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ExecutorScope::ExecutorScope(const std::shared_ptr<Executor>& current) {
|
||||||
|
g_executor_scope.EnterScope(current);
|
||||||
|
}
|
||||||
|
|
||||||
|
ExecutorScope::ExecutorScope(const std::string& scope_name,
|
||||||
|
const std::shared_ptr<Executor>& current) {
|
||||||
|
g_executor_scope.EnterScope(scope_name, current);
|
||||||
|
}
|
||||||
|
|
||||||
|
ExecutorScope::~ExecutorScope() {
|
||||||
|
g_executor_scope.ExitScope();
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::shared_ptr<Executor> ExecutorScope::Current() {
|
||||||
|
if (g_executor_scope.ScopedLevel() > 0) {
|
||||||
|
return g_executor_scope.Current().content;
|
||||||
|
}
|
||||||
|
return Executor::getGlobalExecutor();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Express
|
||||||
|
} // namespace MNN
|
261
express/Expr.cpp
261
express/Expr.cpp
|
@ -8,23 +8,33 @@
|
||||||
|
|
||||||
#define FLATBUFFERS_PREFER_PRINTF
|
#define FLATBUFFERS_PREFER_PRINTF
|
||||||
#include <MNN/expr/Expr.hpp>
|
#include <MNN/expr/Expr.hpp>
|
||||||
|
#include <MNN/expr/Executor.hpp>
|
||||||
#include <MNN/expr/ExprCreator.hpp>
|
#include <MNN/expr/ExprCreator.hpp>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include "core/MNNMemoryUtils.h"
|
|
||||||
#include "Utils.hpp"
|
#include "Utils.hpp"
|
||||||
#include <map>
|
|
||||||
#include "core/FileLoader.hpp"
|
#include "core/FileLoader.hpp"
|
||||||
#include <MNN/expr/Executor.hpp>
|
#include "core/TensorUtils.hpp"
|
||||||
#include "MNN_generated.h"
|
#include "MNN_generated.h"
|
||||||
//#define MNN_OPEN_TIME_TRACE
|
//#define MNN_OPEN_TIME_TRACE
|
||||||
#include "MNN/AutoTime.hpp"
|
#include "MNN/AutoTime.hpp"
|
||||||
|
#include "MNN/expr/ExecutorScope.hpp"
|
||||||
|
|
||||||
|
//#define MNN_EXPRESS_ERROR_REPORT
|
||||||
static inline std::string numberToString(int index) {
|
static inline std::string numberToString(int index) {
|
||||||
char s[10];
|
char s[10];
|
||||||
snprintf(s, 10, "%d", index);
|
snprintf(s, 10, "%d", index);
|
||||||
return std::string(s);
|
return std::string(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool HasUnknownDim(const std::vector<int>& dims) {
|
||||||
|
for (const int& dim : dims) {
|
||||||
|
if (dim < 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
namespace Express {
|
namespace Express {
|
||||||
void Variable::Info::syncSize() {
|
void Variable::Info::syncSize() {
|
||||||
|
@ -87,8 +97,7 @@ bool VARP::fix(VARP::InputType type) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
Expr::Expr(int outputSize) {
|
Expr::Expr(int outputSize) {
|
||||||
mInside.reset(new Inside);
|
mInside.reset(new Inside(outputSize));
|
||||||
mInside->mOutputInfos.resize(outputSize);
|
|
||||||
mOutputNames.resize(outputSize);
|
mOutputNames.resize(outputSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -117,27 +126,46 @@ void Expr::_addLinkForInputs(EXPRP expr) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPRP Expr::create(Variable::Info&& info) {
|
EXPRP Expr::create(Variable::Info&& info, const void* ptr, VARP::InputType type, bool copy) {
|
||||||
EXPRP expr(new Expr(1));
|
EXPRP expr(new Expr(1));
|
||||||
expr->mOp = nullptr;
|
expr->mOp = nullptr;
|
||||||
auto originPtr = info.ptr;
|
auto originPtr = ptr;
|
||||||
expr->mInside->mOutputInfos[0] = std::move(info);
|
expr->mInside->mOutputInfos[0] = std::move(info);
|
||||||
auto& dstInfo = expr->mInside->mOutputInfos[0];
|
auto& dstInfo = expr->mInside->mOutputInfos[0];
|
||||||
dstInfo.syncSize();
|
|
||||||
if (dstInfo.size > 0) {
|
|
||||||
expr->mExtraBuffer.reset(new char[dstInfo.size * dstInfo.type.bytes()], std::default_delete<char[]>());
|
|
||||||
expr->mInside->mOutputInfos[0].ptr = expr->mExtraBuffer.get();
|
|
||||||
expr->mInside->mInfoDirty = false;
|
expr->mInside->mInfoDirty = false;
|
||||||
|
dstInfo.syncSize();
|
||||||
|
Utils::copyInfoToTensor(expr->mInside->mOutputTensors[0], expr->mInside->mOutputInfos.data());
|
||||||
|
expr->mType = type;
|
||||||
|
if (type == VARP::CONSTANT) {
|
||||||
|
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::CONSTANT;
|
||||||
|
} else if (type == VARP::INPUT) {
|
||||||
|
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::INPUT;
|
||||||
} else {
|
} else {
|
||||||
expr->mInside->mOutputInfos[0].ptr = nullptr;
|
// VARP::TRAINABLE
|
||||||
expr->mInside->mInfoDirty = true;
|
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->usage = Tensor::InsideDescribe::TRAINABLE;
|
||||||
|
}
|
||||||
|
if (dstInfo.size > 0 && copy) {
|
||||||
|
auto res = Utils::allocMemoryForHostTensor(expr->mInside->mOutputTensors[0]);
|
||||||
|
if (!res) {
|
||||||
|
MNN_ASSERT(false);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
expr->mInside->mOutputTensors[0]->buffer().host = nullptr;
|
||||||
}
|
}
|
||||||
if (nullptr == originPtr) {
|
if (nullptr == originPtr) {
|
||||||
expr->mType = VARP::INPUT;
|
if (type == VARP::INPUT && dstInfo.size > 0) {
|
||||||
|
expr->mInside->mContentDirty = true;
|
||||||
|
}
|
||||||
return expr;
|
return expr;
|
||||||
}
|
}
|
||||||
expr->mType = VARP::CONSTANT;
|
expr->mInside->mContentDirty = false;
|
||||||
::memcpy(expr->mInside->mOutputInfos[0].ptr, originPtr, dstInfo.size * dstInfo.type.bytes());
|
if (copy) {
|
||||||
|
::memcpy(expr->mInside->mOutputTensors[0]->buffer().host, originPtr, dstInfo.size * dstInfo.type.bytes());
|
||||||
|
} else {
|
||||||
|
TensorUtils::getDescribe(expr->mInside->mOutputTensors[0])->memoryType = Tensor::InsideDescribe::MEMORY_OUTSIDE;
|
||||||
|
expr->mInside->mOutputTensors[0]->buffer().host = (uint8_t*)originPtr;
|
||||||
|
}
|
||||||
return expr;
|
return expr;
|
||||||
}
|
}
|
||||||
EXPRP Expr::create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize) {
|
EXPRP Expr::create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize) {
|
||||||
|
@ -147,8 +175,7 @@ EXPRP Expr::create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP
|
||||||
expr->mOp = flatbuffers::GetMutableRoot<Op>(extra.first.get());
|
expr->mOp = flatbuffers::GetMutableRoot<Op>(extra.first.get());
|
||||||
expr->mOpBufferSize = extra.second;
|
expr->mOpBufferSize = extra.second;
|
||||||
expr->mInputs = std::move(inputs);
|
expr->mInputs = std::move(inputs);
|
||||||
expr->mInside->mInputInfos.resize(expr->mInputs.size());
|
expr->mInside->mReq = ExecutorScope::Current()->getRequirement(expr.get());
|
||||||
expr->mInside->mReq = Executor::getGlobalExecutor()->getRequirement(expr.get());
|
|
||||||
_addLinkForInputs(expr);
|
_addLinkForInputs(expr);
|
||||||
return expr;
|
return expr;
|
||||||
}
|
}
|
||||||
|
@ -161,34 +188,34 @@ EXPRP Expr::create(const OpT* op, std::vector<VARP> inputs, int outputSize) {
|
||||||
info.dim[0] = 1;
|
info.dim[0] = 1;
|
||||||
}
|
}
|
||||||
info.order = Utils::revertFormat(op->main.AsInput()->dformat);
|
info.order = Utils::revertFormat(op->main.AsInput()->dformat);
|
||||||
info.ptr = nullptr;
|
|
||||||
info.type = Utils::revertDataType(op->main.AsInput()->dtype);
|
info.type = Utils::revertDataType(op->main.AsInput()->dtype);
|
||||||
return create(std::move(info));
|
return create(std::move(info), nullptr, VARP::INPUT);
|
||||||
}
|
}
|
||||||
if (OpType_Const == op->type || OpType_TrainableParam == op->type) {
|
if (OpType_Const == op->type || OpType_TrainableParam == op->type) {
|
||||||
Variable::Info info;
|
Variable::Info info;
|
||||||
info.dim = op->main.AsBlob()->dims;
|
info.dim = op->main.AsBlob()->dims;
|
||||||
info.order = Utils::revertFormat(op->main.AsBlob()->dataFormat);
|
info.order = Utils::revertFormat(op->main.AsBlob()->dataFormat);
|
||||||
info.ptr = nullptr;
|
void* ptr = nullptr;
|
||||||
info.type = Utils::revertDataType(op->main.AsBlob()->dataType);
|
info.type = Utils::revertDataType(op->main.AsBlob()->dataType);
|
||||||
switch (op->main.AsBlob()->dataType) {
|
switch (op->main.AsBlob()->dataType) {
|
||||||
case DataType_DT_INT8:
|
case DataType_DT_INT8:
|
||||||
info.ptr = (void*)op->main.AsBlob()->int8s.data();
|
ptr = (void*)op->main.AsBlob()->int8s.data();
|
||||||
break;
|
break;
|
||||||
case DataType_DT_INT32:
|
case DataType_DT_INT32:
|
||||||
info.ptr = (void*)op->main.AsBlob()->int32s.data();
|
ptr = (void*)op->main.AsBlob()->int32s.data();
|
||||||
break;
|
break;
|
||||||
case DataType_DT_UINT8:
|
case DataType_DT_UINT8:
|
||||||
info.ptr = (void*)op->main.AsBlob()->uint8s.data();
|
ptr = (void*)op->main.AsBlob()->uint8s.data();
|
||||||
break;
|
break;
|
||||||
case DataType_DT_FLOAT:
|
case DataType_DT_FLOAT:
|
||||||
info.ptr = (void*)op->main.AsBlob()->float32s.data();
|
ptr = (void*)op->main.AsBlob()->float32s.data();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
auto expr = create(std::move(info));
|
//MNN_ASSERT(nullptr != ptr);
|
||||||
if (OpType_TrainableParam == op->type) {
|
auto expr = create(std::move(info), ptr, VARP::CONSTANT);
|
||||||
|
if (OpType_TrainableParam == op->type && nullptr != ptr) {
|
||||||
expr->mType = VARP::TRAINABLE;
|
expr->mType = VARP::TRAINABLE;
|
||||||
}
|
}
|
||||||
return expr;
|
return expr;
|
||||||
|
@ -213,7 +240,7 @@ bool Expr::requireInfo() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (nullptr == mOp) {
|
if (nullptr == mOp) {
|
||||||
return mInside->mOutputInfos[0].size > 0;
|
return !HasUnknownDim(mInside->mOutputInfos[0].dim);
|
||||||
}
|
}
|
||||||
bool ready = true;
|
bool ready = true;
|
||||||
for (int i = 0; i < mInputs.size(); ++i) {
|
for (int i = 0; i < mInputs.size(); ++i) {
|
||||||
|
@ -221,8 +248,8 @@ bool Expr::requireInfo() {
|
||||||
// The Variable is set nullptr by api
|
// The Variable is set nullptr by api
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
mInside->mInputInfos[i] = mInputs[i]->getInfo();
|
auto inputInfo = mInputs[i]->getInfo();
|
||||||
if (nullptr == mInside->mInputInfos[i] && (!mInside->mReq.supportError[i])) {
|
if (nullptr == inputInfo) {
|
||||||
#ifdef MNN_EXPRESS_ERROR_REPORT
|
#ifdef MNN_EXPRESS_ERROR_REPORT
|
||||||
MNN_ERROR("%s, %d input not ready\n", mName.c_str(), i);
|
MNN_ERROR("%s, %d input not ready\n", mName.c_str(), i);
|
||||||
#endif
|
#endif
|
||||||
|
@ -233,15 +260,19 @@ bool Expr::requireInfo() {
|
||||||
for (int i = 0; i < mInputs.size(); ++i) {
|
for (int i = 0; i < mInputs.size(); ++i) {
|
||||||
auto& v = mInputs[i];
|
auto& v = mInputs[i];
|
||||||
if (mInside->mReq.shapeNeedContent[i]) {
|
if (mInside->mReq.shapeNeedContent[i]) {
|
||||||
// `readInternal` maybe return nullptr if element count is 0.
|
// For shape need content, the content must not be nullptr
|
||||||
v->readInternal(true);
|
auto ptr = v->readInternal(true);
|
||||||
|
if (nullptr == ptr) {
|
||||||
|
ready = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!ready) {
|
if (!ready) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
//MNN_PRINT("Info %s, %p Start\n", mName.c_str(), this);
|
//MNN_PRINT("Info %s, %p Start\n", mName.c_str(), this);
|
||||||
auto res = Executor::getGlobalExecutor()->computeInfo(this);
|
auto res = ExecutorScope::Current()->computeInfo(this);
|
||||||
//MNN_PRINT("Info Compute %s\n", mName.c_str());
|
//MNN_PRINT("Info Compute %s\n", mName.c_str());
|
||||||
|
|
||||||
if (NO_ERROR == res) {
|
if (NO_ERROR == res) {
|
||||||
|
@ -261,6 +292,14 @@ const std::vector<WeakEXPRP>& Variable::toExprs() const {
|
||||||
|
|
||||||
VARP Variable::create(EXPRP expr, int index) {
|
VARP Variable::create(EXPRP expr, int index) {
|
||||||
VARP res(new Variable(expr, index));
|
VARP res(new Variable(expr, index));
|
||||||
|
#ifdef MNN_EXPR_SHAPE_EAGER
|
||||||
|
auto info = expr->requireInfo();
|
||||||
|
if (!info) {
|
||||||
|
#ifdef MNN_EXPRESS_ERROR_REPORT
|
||||||
|
MNN_ERROR("Can't compute shape\n");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
void Expr::replace(EXPRP old, EXPRP from) {
|
void Expr::replace(EXPRP old, EXPRP from) {
|
||||||
|
@ -307,16 +346,22 @@ void Expr::replace(EXPRP old, EXPRP from) {
|
||||||
old->mValid = from->mValid;
|
old->mValid = from->mValid;
|
||||||
old->mInside = from->mInside;
|
old->mInside = from->mInside;
|
||||||
old->mInputs = from->mInputs;
|
old->mInputs = from->mInputs;
|
||||||
|
std::vector<Expr*> visited;
|
||||||
old->visitOutputs([&](EXPRP expr, int index) {
|
old->visitOutputs([&](EXPRP expr, int index) {
|
||||||
if (expr->mInside->mInfoDirty && expr->mValid && !expr->mInside->mLinkCache) {
|
if (expr->visited()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
visited.emplace_back(expr.get());
|
||||||
|
expr->setVisited(true);
|
||||||
expr->mInside->mCache.reset();
|
expr->mInside->mCache.reset();
|
||||||
expr->mInside->mCacheOffset = 0;
|
expr->mInside->mCacheOffset = 0;
|
||||||
expr->mValid = true;
|
expr->mValid = true;
|
||||||
expr->mInside->mInfoDirty = true;
|
expr->mInside->mInfoDirty = true;
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
for (auto e : visited) {
|
||||||
|
e->setVisited(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Variable::setName(const std::string& name) {
|
void Variable::setName(const std::string& name) {
|
||||||
|
@ -351,7 +396,7 @@ bool Variable::input(VARP src) {
|
||||||
info = tempInfo.get();
|
info = tempInfo.get();
|
||||||
}
|
}
|
||||||
auto dstInfo = getInfo();
|
auto dstInfo = getInfo();
|
||||||
bool needChange = nullptr == dstInfo || info->order != dstInfo->order || info->dim.size() != dstInfo->dim.size();
|
bool needChange = nullptr == dstInfo || info->order != dstInfo->order || info->dim.size() != dstInfo->dim.size() || info->type != dstInfo->type;
|
||||||
if (!needChange) {
|
if (!needChange) {
|
||||||
for (int i=0; i<info->dim.size(); ++i) {
|
for (int i=0; i<info->dim.size(); ++i) {
|
||||||
if (dstInfo->dim[i] != info->dim[i]) {
|
if (dstInfo->dim[i] != info->dim[i]) {
|
||||||
|
@ -362,22 +407,19 @@ bool Variable::input(VARP src) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!mFrom->mInside->mCache) {
|
if (!mFrom->mInside->mCache) {
|
||||||
Executor::getGlobalExecutor()->makeCache({mFrom}, false);
|
ExecutorScope::Current()->makeCache({mFrom}, false);
|
||||||
}
|
}
|
||||||
if (needChange) {
|
if (needChange) {
|
||||||
bool needAlloc = info->size * info->type.bytes() > mFrom->mInside->mOutputInfos[0].size * mFrom->mInside->mOutputInfos[0].type.bytes();
|
|
||||||
mFrom->mInside->mOutputInfos[0] = *info;
|
mFrom->mInside->mOutputInfos[0] = *info;
|
||||||
if (needAlloc) {
|
Utils::releaseMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
|
||||||
mFrom->mExtraBuffer.reset(new char[info->size * info->type.bytes()], std::default_delete<char[]>());
|
Utils::copyInfoToTensor(mFrom->inside()->mOutputTensors[0], mFrom->inside()->mOutputInfos.data());
|
||||||
}
|
Utils::allocMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
|
||||||
mFrom->mInside->mOutputInfos[0].ptr = mFrom->mExtraBuffer.get();
|
|
||||||
mFrom->mInside->mCache->setShapeDirty(0, mFrom->outputInfo(0));
|
|
||||||
}
|
}
|
||||||
if (info->size) {
|
if (info->size) {
|
||||||
auto dstPtr = writeInternal(false);
|
auto dstPtr = writeInternal(false);
|
||||||
auto srcPtr = src->readMap<void>();
|
auto srcPtr = src->readMap<void>();
|
||||||
if (nullptr == dstPtr || nullptr == srcPtr) {
|
if (nullptr == dstPtr || nullptr == srcPtr) {
|
||||||
MNN_ERROR("Alloc memory error or compute src error in Variable::Input\n");
|
//MNN_ERROR("Alloc memory error or compute src error in Variable::Input\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
::memcpy(dstPtr, srcPtr, info->size * info->type.bytes());
|
::memcpy(dstPtr, srcPtr, info->size * info->type.bytes());
|
||||||
|
@ -387,7 +429,7 @@ bool Variable::input(VARP src) {
|
||||||
} else {
|
} else {
|
||||||
informDirty();
|
informDirty();
|
||||||
}
|
}
|
||||||
mFrom->mInside->mCache->setContentReady();
|
mFrom->mInside->mContentDirty = false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -396,23 +438,44 @@ void Variable::replace(VARP dst, VARP src) {
|
||||||
dst->setExpr(nullptr, 0);
|
dst->setExpr(nullptr, 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (nullptr == dst) {
|
||||||
|
dst.mContent = src.mContent;
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (src->mFrom.get() == dst->mFrom.get()) {
|
if (src->mFrom.get() == dst->mFrom.get()) {
|
||||||
dst->mFromIndex = src->mFromIndex;
|
dst->mFromIndex = src->mFromIndex;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (src->mFrom->outputSize() != dst->mFrom->outputSize()) {
|
if (src->mFrom->outputSize() != dst->mFrom->outputSize()) {
|
||||||
// Can't replace Expr, Just replace VARP
|
// Can't replace Expr, Just replace VARP
|
||||||
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
|
std::vector<Expr*> visited;
|
||||||
src->mFrom->mTo.emplace_back(expr);
|
dst->mFrom->visitOutputs([src, dst, &visited](EXPRP expr, int index) {
|
||||||
|
if (expr->visited()) {
|
||||||
return false;
|
return false;
|
||||||
});
|
}
|
||||||
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
|
expr->setVisited(true);
|
||||||
|
visited.emplace_back(expr.get());
|
||||||
expr->mInside->mCache.reset();
|
expr->mInside->mCache.reset();
|
||||||
expr->mInside->mCacheOffset = 0;
|
expr->mInside->mCacheOffset = 0;
|
||||||
expr->mValid = true;
|
expr->mValid = true;
|
||||||
expr->mInside->mInfoDirty = true;
|
expr->mInside->mInfoDirty = true;
|
||||||
|
expr->mInside->mContentDirty = true;
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
for (auto v : visited) {
|
||||||
|
v->setVisited(false);
|
||||||
|
}
|
||||||
|
dst->mFrom->visitOutputs([src, dst](EXPRP expr, int index) {
|
||||||
|
for (int i =0; i< expr->inputs().size(); ++i) {
|
||||||
|
auto input = expr->inputs()[i];
|
||||||
|
if (input == dst) {
|
||||||
|
expr->mInputs[i] = src;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
src->mFrom->mTo.emplace_back(expr);
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
dst->mFrom = src->mFrom;
|
dst->mFrom = src->mFrom;
|
||||||
dst->mFromIndex = src->mFromIndex;
|
dst->mFromIndex = src->mFromIndex;
|
||||||
return;
|
return;
|
||||||
|
@ -452,15 +515,19 @@ bool Variable::resize(INTS dims) {
|
||||||
}
|
}
|
||||||
info.dim = dims;
|
info.dim = dims;
|
||||||
info.syncSize();
|
info.syncSize();
|
||||||
mFrom->mExtraBuffer.reset(new char[info.size * info.type.bytes()], std::default_delete<char[]>());
|
Utils::copyInfoToTensor(mFrom->inside()->mOutputTensors[0], mFrom->inside()->mOutputInfos.data());
|
||||||
info.ptr = mFrom->mExtraBuffer.get();
|
Utils::releaseMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
|
||||||
|
if (0 >= info.size) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool res = Utils::allocMemoryForHostTensor(mFrom->inside()->mOutputTensors[0]);
|
||||||
|
if (!res) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
mFrom->mValid = true;
|
mFrom->mValid = true;
|
||||||
mFrom->mInside->mInputInfos.clear();
|
mFrom->inside()->mInfoDirty = false;
|
||||||
auto cache = mFrom->mInside->mCache;
|
mFrom->inside()->mContentDirty = true;
|
||||||
if (nullptr != cache) {
|
|
||||||
cache->setShapeDirty(0, mFrom->outputInfo(0));
|
|
||||||
}
|
|
||||||
mFrom->visitOutputs([](EXPRP expr, int index) { return expr->setInfoDirty(); });
|
mFrom->visitOutputs([](EXPRP expr, int index) { return expr->setInfoDirty(); });
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -478,11 +545,12 @@ void Expr::visit(EXPRP expr, const std::function<bool(EXPRP)>& before, const std
|
||||||
void* Variable::readInternal(bool forShape) {
|
void* Variable::readInternal(bool forShape) {
|
||||||
if (nullptr == mFrom->get()) {
|
if (nullptr == mFrom->get()) {
|
||||||
if (VARP::INPUT == mFrom->mType) {
|
if (VARP::INPUT == mFrom->mType) {
|
||||||
if (nullptr == mFrom->mInside->mCache) {
|
if (mFrom->mInside->mContentDirty) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return mFrom->outputInfo(mFromIndex)->ptr;
|
//MNN_ASSERT(nullptr != mFrom->inside()->mOutputTensors[0]->buffer().host);
|
||||||
|
return mFrom->inside()->mOutputTensors[0]->buffer().host;
|
||||||
}
|
}
|
||||||
auto res = mFrom->requireInfo();
|
auto res = mFrom->requireInfo();
|
||||||
if (false == res) {
|
if (false == res) {
|
||||||
|
@ -490,21 +558,26 @@ void* Variable::readInternal(bool forShape) {
|
||||||
}
|
}
|
||||||
auto cache = mFrom->inside()->mCache;
|
auto cache = mFrom->inside()->mCache;
|
||||||
if (nullptr == cache) {
|
if (nullptr == cache) {
|
||||||
Executor::getGlobalExecutor()->makeCache({mFrom}, forShape);
|
ExecutorScope::Current()->makeCache({mFrom}, forShape);
|
||||||
cache = mFrom->inside()->mCache;
|
cache = mFrom->inside()->mCache;
|
||||||
}
|
}
|
||||||
if (nullptr == cache) {
|
if (nullptr == cache) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
if (NO_ERROR != Executor::getGlobalExecutor()->runCache(cache)) {
|
if (NO_ERROR != ExecutorScope::Current()->runCache(cache)) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
cache->syncOutput(mFrom->mInside->mCacheOffset + mFromIndex, mFrom->outputInfo(mFromIndex));
|
return Executor::mapOutput(cache.get(), mFrom->mInside->mCacheOffset + mFromIndex, mFrom->mInside->mOutputTensors[mFromIndex]);
|
||||||
return mFrom->outputInfo(mFromIndex)->ptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Variable::informDirty() {
|
void Variable::informDirty() {
|
||||||
mFrom->visitOutputs([](EXPRP expr, int index) {
|
std::vector<Expr*> visited;
|
||||||
|
mFrom->visitOutputs([&visited](EXPRP expr, int index) {
|
||||||
|
if (expr->visited()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
visited.emplace_back(expr.get());
|
||||||
|
expr->setVisited(true);
|
||||||
if (expr->inside()->mReq.shapeNeedContent.empty()) {
|
if (expr->inside()->mReq.shapeNeedContent.empty()) {
|
||||||
// Not init
|
// Not init
|
||||||
return false;
|
return false;
|
||||||
|
@ -514,28 +587,32 @@ void Variable::informDirty() {
|
||||||
expr->visitOutputs([](EXPRP e, int index) { return e->setInfoDirty(); });
|
expr->visitOutputs([](EXPRP e, int index) { return e->setInfoDirty(); });
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (expr->inside()->mContentDirty) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
expr->inside()->mContentDirty = true;
|
|
||||||
if (expr->inside()->mReq.contentNeedContent[index]) {
|
if (expr->inside()->mReq.contentNeedContent[index]) {
|
||||||
if (expr->inside()->mCache != nullptr) {
|
if (expr->inside()->mCache != nullptr) {
|
||||||
expr->inside()->mCache->setContentDirty();
|
Executor::setContentDirty(expr->inside()->mCache.get());
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
});
|
});
|
||||||
|
for (auto e : visited) {
|
||||||
|
e->setVisited(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
void Variable::prepareCompute(const std::vector<VARP>& vars, bool forceCpu) {
|
void Variable::prepareCompute(const std::vector<VARP>& vars, bool forceCpu) {
|
||||||
std::vector<EXPRP> exprs;
|
std::vector<EXPRP> exprs;
|
||||||
for (auto v : vars) {
|
for (auto v : vars) {
|
||||||
if (v->expr().first->inside()->mCache == nullptr) {
|
if (!v->expr().first->visited()) {
|
||||||
|
v->expr().first->inside()->mCache = nullptr;
|
||||||
v->expr().first->requireInfo();
|
v->expr().first->requireInfo();
|
||||||
|
v->expr().first->setVisited(true);
|
||||||
exprs.emplace_back(v->expr().first);
|
exprs.emplace_back(v->expr().first);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Executor::getGlobalExecutor()->makeCache(std::move(exprs), forceCpu);
|
for (auto v : vars) {
|
||||||
|
v->expr().first->setVisited(false);
|
||||||
|
}
|
||||||
|
ExecutorScope::Current()->makeCache(std::move(exprs), forceCpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
void* Variable::writeInternal(bool inform) {
|
void* Variable::writeInternal(bool inform) {
|
||||||
|
@ -545,16 +622,8 @@ void* Variable::writeInternal(bool inform) {
|
||||||
if (inform) {
|
if (inform) {
|
||||||
informDirty();
|
informDirty();
|
||||||
}
|
}
|
||||||
auto cache = mFrom->mInside->mCache;
|
mFrom->mInside->mContentDirty = false;
|
||||||
if (nullptr == cache) {
|
return mFrom->inside()->mOutputTensors[0]->host<void>();
|
||||||
Executor::getGlobalExecutor()->makeCache({mFrom});
|
|
||||||
cache = mFrom->mInside->mCache;
|
|
||||||
}
|
|
||||||
if (nullptr == cache) {
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
mFrom->mInside->mCache->setContentReady();
|
|
||||||
return mFrom->mInside->mOutputInfos[0].ptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Variable::unMap() {
|
void Variable::unMap() {
|
||||||
|
@ -591,12 +660,17 @@ bool Expr::setInfoDirty() {
|
||||||
mInside->mContentDirty = true;
|
mInside->mContentDirty = true;
|
||||||
mValid = true;
|
mValid = true;
|
||||||
if (mInside->mCache != nullptr) {
|
if (mInside->mCache != nullptr) {
|
||||||
mInside->mCache->setShapeDirty(0, nullptr);
|
Executor::setShapeDirty(mInside->mCache.get());
|
||||||
|
}
|
||||||
|
for (auto o : mInside->mOutputTensors) {
|
||||||
|
Utils::releaseMemoryForHostTensor(o);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<VARP> Variable::load(const char* fileName) {
|
std::vector<VARP> Variable::load(const char* fileName) {
|
||||||
|
AutoStorage<uint8_t> buffer;
|
||||||
|
{
|
||||||
FileLoader loader(fileName);
|
FileLoader loader(fileName);
|
||||||
if (!loader.valid()) {
|
if (!loader.valid()) {
|
||||||
MNN_ERROR("Error for open %s\n", fileName);
|
MNN_ERROR("Error for open %s\n", fileName);
|
||||||
|
@ -606,11 +680,11 @@ std::vector<VARP> Variable::load(const char* fileName) {
|
||||||
if (!loader.valid()) {
|
if (!loader.valid()) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
AutoStorage<uint8_t> buffer;
|
|
||||||
loader.merge(buffer);
|
loader.merge(buffer);
|
||||||
if (buffer.get() == nullptr) {
|
if (buffer.get() == nullptr) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return load(buffer.get(), buffer.size());
|
return load(buffer.get(), buffer.size());
|
||||||
}
|
}
|
||||||
std::vector<VARP> Variable::load(const uint8_t* buffer, size_t length) {
|
std::vector<VARP> Variable::load(const uint8_t* buffer, size_t length) {
|
||||||
|
@ -722,6 +796,7 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
|
||||||
} else {
|
} else {
|
||||||
MNN_ASSERT(1 == expr->outputSize());
|
MNN_ASSERT(1 == expr->outputSize());
|
||||||
auto& info = expr->mInside->mOutputInfos[0];
|
auto& info = expr->mInside->mOutputInfos[0];
|
||||||
|
auto ptr = expr->mInside->mOutputTensors[0]->host<void>();
|
||||||
op.reset(new OpT);
|
op.reset(new OpT);
|
||||||
if (expr->mType != VARP::INPUT) {
|
if (expr->mType != VARP::INPUT) {
|
||||||
auto blob = new BlobT;
|
auto blob = new BlobT;
|
||||||
|
@ -730,16 +805,20 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
|
||||||
if (info.type.code == halide_type_float) {
|
if (info.type.code == halide_type_float) {
|
||||||
blob->dataType = DataType_DT_FLOAT;
|
blob->dataType = DataType_DT_FLOAT;
|
||||||
blob->float32s.resize(info.size);
|
blob->float32s.resize(info.size);
|
||||||
::memcpy(blob->float32s.data(), info.ptr, info.size * sizeof(float));
|
::memcpy(blob->float32s.data(), ptr, info.size * sizeof(float));
|
||||||
} else if (info.type.code == halide_type_int) {
|
} else if (info.type.code == halide_type_int && info.type.bits == 32) {
|
||||||
blob->dataType = DataType_DT_INT32;
|
blob->dataType = DataType_DT_INT32;
|
||||||
blob->int32s.resize(info.size);
|
blob->int32s.resize(info.size);
|
||||||
::memcpy(blob->int32s.data(), info.ptr, info.size * sizeof(int));
|
::memcpy(blob->int32s.data(), ptr, info.size * sizeof(int));
|
||||||
}
|
} else if (info.type.code == halide_type_int && info.type.bits == 8) {
|
||||||
else if (info.type.code == halide_type_uint && info.type.bits == 8) {
|
blob->dataType = DataType_DT_INT8;
|
||||||
|
blob->int8s.resize(info.size);
|
||||||
|
auto pptr = (int8_t *)ptr;
|
||||||
|
::memcpy(blob->int8s.data(), ptr, info.size * sizeof(int8_t));
|
||||||
|
} else if (info.type.code == halide_type_uint && info.type.bits == 8) {
|
||||||
blob->dataType = DataType_DT_UINT8;
|
blob->dataType = DataType_DT_UINT8;
|
||||||
blob->uint8s.resize(info.size);
|
blob->uint8s.resize(info.size);
|
||||||
::memcpy(blob->uint8s.data(), info.ptr, info.size * sizeof(uint8_t));
|
::memcpy(blob->uint8s.data(), ptr, info.size * sizeof(uint8_t));
|
||||||
}
|
}
|
||||||
op->type = OpType_Const;
|
op->type = OpType_Const;
|
||||||
if (expr->mType == VARP::TRAINABLE) {
|
if (expr->mType == VARP::TRAINABLE) {
|
||||||
|
@ -781,12 +860,12 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
|
||||||
auto op = dest->oplists[index].get();
|
auto op = dest->oplists[index].get();
|
||||||
auto tensorIndexOffset = varIndexInfo[expr];
|
auto tensorIndexOffset = varIndexInfo[expr];
|
||||||
for (int v=0; v<expr->outputSize(); ++v) {
|
for (int v=0; v<expr->outputSize(); ++v) {
|
||||||
auto const tensorIndex = tensorIndexOffset + v;
|
auto subindex = tensorIndexOffset + v;
|
||||||
if (dest->tensorName[tensorIndex].empty()) {
|
if (dest->tensorName[subindex].empty()) {
|
||||||
if (v == 0) {
|
if (v == 0) {
|
||||||
dest->tensorName[tensorIndex] = op->name;
|
dest->tensorName[subindex] = op->name;
|
||||||
} else {
|
} else {
|
||||||
dest->tensorName[tensorIndex] = op->name + numberToString(v);
|
dest->tensorName[subindex] = op->name + numberToString(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,210 @@
|
||||||
|
//
|
||||||
|
// Initializer.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2019/11/28.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "Initializer.hpp"
|
||||||
|
#include <MNN/expr/ExprCreator.hpp>
|
||||||
|
#include <cmath>
|
||||||
|
#include <vector>
|
||||||
|
#include "Distributions.hpp"
|
||||||
|
#include "RandomGenerator.hpp"
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
|
||||||
|
Express::VARP Initializer::createConstVar(Express::INTS dim, Express::Dimensionformat format) {
|
||||||
|
auto res = Express::_Input(dim, format, halide_type_of<float>());
|
||||||
|
this->onExecute(res);
|
||||||
|
res.fix(Express::VARP::CONSTANT);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
class ConstantInitializer : public Initializer {
|
||||||
|
public:
|
||||||
|
ConstantInitializer(float value) : mConstant(value) {
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void onExecute(Express::VARP p) override {
|
||||||
|
const int count = p->getInfo()->size;
|
||||||
|
MNN_ASSERT(count > 0);
|
||||||
|
auto ptr = p->writeMap<float>();
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
ptr[i] = mConstant;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
float mConstant;
|
||||||
|
};
|
||||||
|
Initializer* Initializer::constValue(float value) {
|
||||||
|
return new ConstantInitializer(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
class UniformInitializer : public Initializer {
|
||||||
|
public:
|
||||||
|
UniformInitializer(float min = 0, float max = 1) {
|
||||||
|
mMin = min;
|
||||||
|
mMax = max;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void onExecute(Express::VARP p) override {
|
||||||
|
const int count = p->getInfo()->size;
|
||||||
|
MNN_ASSERT(count > 0);
|
||||||
|
Distributions::uniform(count, mMin, mMax, p->writeMap<float>(), RandomGenerator::generator());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
float mMin;
|
||||||
|
float mMax;
|
||||||
|
};
|
||||||
|
|
||||||
|
Initializer* Initializer::uniform(float minValue, float maxValue) {
|
||||||
|
return new UniformInitializer(minValue, maxValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
class XavierInitializer : public Initializer {
|
||||||
|
public:
|
||||||
|
XavierInitializer(VarianceNorm norm = FANIN) {
|
||||||
|
mNorm = norm;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void onExecute(Express::VARP p) override {
|
||||||
|
const int count = p->getInfo()->size;
|
||||||
|
MNN_ASSERT(count > 0);
|
||||||
|
const std::vector<int> dims = p->getInfo()->dim;
|
||||||
|
// referenced from Caffe
|
||||||
|
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
|
||||||
|
int fanIn = count / dims[0];
|
||||||
|
int fanOut = dims.size() > 1 ? count / dims[1] : count;
|
||||||
|
float n = fanIn; // default: FANIN
|
||||||
|
if (mNorm == VarianceNorm::AVERAGE) {
|
||||||
|
n = (fanIn + fanOut) / 2.0f;
|
||||||
|
} else if (mNorm == VarianceNorm::FANOUT) {
|
||||||
|
n = fanOut;
|
||||||
|
}
|
||||||
|
float scale = sqrtf(3.0f / n);
|
||||||
|
|
||||||
|
Distributions::uniform(count, -scale, scale, p->writeMap<float>(), RandomGenerator::generator());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
VarianceNorm mNorm;
|
||||||
|
};
|
||||||
|
Initializer* Initializer::xavier(VarianceNorm norm) {
|
||||||
|
return new XavierInitializer(norm);
|
||||||
|
}
|
||||||
|
|
||||||
|
class GaussianInitializer : public Initializer {
|
||||||
|
public:
|
||||||
|
GaussianInitializer(float mean = 0, float std = 1) {
|
||||||
|
mMean = mean;
|
||||||
|
mStd = std;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void onExecute(Express::VARP p) override {
|
||||||
|
const int count = p->getInfo()->size;
|
||||||
|
MNN_ASSERT(count > 0);
|
||||||
|
Distributions::gaussian(count, mMean, mStd, p->writeMap<float>(), RandomGenerator::generator());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
float mMean;
|
||||||
|
float mStd;
|
||||||
|
};
|
||||||
|
Initializer* Initializer::gauss(float mean, float std) {
|
||||||
|
return new GaussianInitializer(mean, std);
|
||||||
|
}
|
||||||
|
|
||||||
|
class MSRAInitializer : public Initializer {
|
||||||
|
public:
|
||||||
|
MSRAInitializer(VarianceNorm norm = FANIN) {
|
||||||
|
mNorm = norm;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void onExecute(Express::VARP p) override {
|
||||||
|
const int count = p->getInfo()->size;
|
||||||
|
MNN_ASSERT(count > 0);
|
||||||
|
const std::vector<int> dims = p->getInfo()->dim;
|
||||||
|
// referenced from Caffe
|
||||||
|
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
|
||||||
|
int fanIn = count / dims[0];
|
||||||
|
int fanOut = dims.size() > 1 ? count / dims[1] : count;
|
||||||
|
float n = fanIn; // default: FANIN
|
||||||
|
if (mNorm == VarianceNorm::AVERAGE) {
|
||||||
|
n = (fanIn + fanOut) / 2.0f;
|
||||||
|
} else if (mNorm == VarianceNorm::FANOUT) {
|
||||||
|
n = fanOut;
|
||||||
|
}
|
||||||
|
float std = sqrtf(2.0f / n);
|
||||||
|
|
||||||
|
Distributions::gaussian(count, 0.0f, std, p->writeMap<float>(), RandomGenerator::generator());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
VarianceNorm mNorm;
|
||||||
|
};
|
||||||
|
Initializer* Initializer::MSRA(VarianceNorm norm) {
|
||||||
|
return new MSRAInitializer(norm);
|
||||||
|
}
|
||||||
|
|
||||||
|
class BilinearInitializer : public Initializer {
|
||||||
|
public:
|
||||||
|
BilinearInitializer() = default;
|
||||||
|
|
||||||
|
virtual void onExecute(Express::VARP p) override {
|
||||||
|
const int count = p->getInfo()->size;
|
||||||
|
MNN_ASSERT(count > 0);
|
||||||
|
const std::vector<int> dims = p->getInfo()->dim;
|
||||||
|
MNN_ASSERT(dims.size() == 4);
|
||||||
|
MNN_ASSERT(dims[2] == dims[3]); // NCHW, H == W
|
||||||
|
// referenced from Caffe
|
||||||
|
// https://github.com/BVLC/caffe/blob/master/include/caffe/filler.hpp
|
||||||
|
int f = ceilf(dims[3] / 2.0f);
|
||||||
|
float c = (dims[3] - 1) / (2.0f * f);
|
||||||
|
auto ptr = p->writeMap<float>();
|
||||||
|
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
float x = i % dims[3];
|
||||||
|
float y = (i / dims[3]) % dims[2];
|
||||||
|
ptr[i] = (1 - std::fabs(x / f - c)) * (1 - std::fabs(y / f - c));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Initializer* Initializer::bilinear() {
|
||||||
|
return new BilinearInitializer();
|
||||||
|
}
|
||||||
|
|
||||||
|
class PositiveUnitball : public Initializer {
|
||||||
|
public:
|
||||||
|
PositiveUnitball() = default;
|
||||||
|
|
||||||
|
virtual void onExecute(Express::VARP p) override {
|
||||||
|
const int count = p->getInfo()->size;
|
||||||
|
MNN_ASSERT(count > 0);
|
||||||
|
const std::vector<int> dims = p->getInfo()->dim;
|
||||||
|
auto ptr = p->writeMap<float>();
|
||||||
|
|
||||||
|
Distributions::uniform(count, 0, 1, ptr, RandomGenerator::generator());
|
||||||
|
|
||||||
|
int dim = count / dims[0];
|
||||||
|
for (int i = 0; i < dims[0]; i++) {
|
||||||
|
float sum = 0;
|
||||||
|
for (int j = 0; j < dim; j++) {
|
||||||
|
sum += ptr[i * dim + j];
|
||||||
|
}
|
||||||
|
for (int j = 0; j < dim; j++) {
|
||||||
|
ptr[i * dim + j] = ptr[i * dim + j] / sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Initializer* Initializer::positiveUnitball() {
|
||||||
|
return new PositiveUnitball();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Express
|
||||||
|
} // namespace MNN
|
|
@ -0,0 +1,43 @@
|
||||||
|
//
|
||||||
|
// Initializer.hpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2019/11/28.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef Initializer_hpp
|
||||||
|
#define Initializer_hpp
|
||||||
|
|
||||||
|
#include <MNN/expr/Expr.hpp>
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
class RandomGenerator;
|
||||||
|
class MNN_PUBLIC Initializer {
|
||||||
|
public:
|
||||||
|
Initializer() = default;
|
||||||
|
virtual ~Initializer() = default;
|
||||||
|
Express::VARP createConstVar(Express::INTS dim, Express::Dimensionformat format = Express::NCHW);
|
||||||
|
virtual void onExecute(Express::VARP p) = 0;
|
||||||
|
|
||||||
|
static Initializer* constValue(float value);
|
||||||
|
static Initializer* uniform(float minValue = 0.0f, float maxValue = 1.0f);
|
||||||
|
|
||||||
|
enum VarianceNorm {
|
||||||
|
FANIN,
|
||||||
|
FANOUT,
|
||||||
|
AVERAGE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static Initializer* xavier(VarianceNorm norm = FANIN);
|
||||||
|
static Initializer* gauss(float mean = 0.0f, float std = 1.0f);
|
||||||
|
static Initializer* MSRA(VarianceNorm norm = FANIN);
|
||||||
|
static Initializer* bilinear();
|
||||||
|
static Initializer* positiveUnitball();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Express
|
||||||
|
} // namespace MNN
|
||||||
|
|
||||||
|
#endif // Initializer_hpp
|
|
@ -30,7 +30,18 @@ static DataType _convertDataType(halide_type_t type) {
|
||||||
}
|
}
|
||||||
return DataType_DT_INVALID;
|
return DataType_DT_INVALID;
|
||||||
}
|
}
|
||||||
|
static VARP _checkNC4HW4(VARP x) {
|
||||||
|
#ifdef MNN_EXPR_SHAPE_EAGER
|
||||||
|
auto info = x->getInfo();
|
||||||
|
if (nullptr != info && info->order == NC4HW4) {
|
||||||
|
return _Convert(x, NCHW);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return x;
|
||||||
|
}
|
||||||
static VARP _Binary(VARP x, VARP y, BinaryOpOperation operation) {
|
static VARP _Binary(VARP x, VARP y, BinaryOpOperation operation) {
|
||||||
|
x = _checkNC4HW4(x);
|
||||||
|
y = _checkNC4HW4(y);
|
||||||
std::unique_ptr<OpT> op(new OpT);
|
std::unique_ptr<OpT> op(new OpT);
|
||||||
op->main.type = OpParameter_BinaryOp;
|
op->main.type = OpParameter_BinaryOp;
|
||||||
op->type = OpType_BinaryOp;
|
op->type = OpType_BinaryOp;
|
||||||
|
@ -49,6 +60,7 @@ static VARP _Unary(VARP x, UnaryOpOperation operation) {
|
||||||
return (Variable::create(Expr::create(op.get(), {x})));
|
return (Variable::create(Expr::create(op.get(), {x})));
|
||||||
}
|
}
|
||||||
static VARP _Reduce(VARP x, INTS dim, ReductionType type, bool keepDim) {
|
static VARP _Reduce(VARP x, INTS dim, ReductionType type, bool keepDim) {
|
||||||
|
x = _checkNC4HW4(x);
|
||||||
std::unique_ptr<OpT> op(new OpT);
|
std::unique_ptr<OpT> op(new OpT);
|
||||||
op->main.type = OpParameter_ReductionParam;
|
op->main.type = OpParameter_ReductionParam;
|
||||||
op->type = OpType_Reduction;
|
op->type = OpType_Reduction;
|
||||||
|
@ -60,6 +72,7 @@ static VARP _Reduce(VARP x, INTS dim, ReductionType type, bool keepDim) {
|
||||||
return (Variable::create(Expr::create(op.get(), {x})));
|
return (Variable::create(Expr::create(op.get(), {x})));
|
||||||
}
|
}
|
||||||
static VARP _ReduceMutable(VARP x, VARP dim, ReductionType type, bool keepDim) {
|
static VARP _ReduceMutable(VARP x, VARP dim, ReductionType type, bool keepDim) {
|
||||||
|
x = _checkNC4HW4(x);
|
||||||
std::unique_ptr<OpT> op(new OpT);
|
std::unique_ptr<OpT> op(new OpT);
|
||||||
op->main.type = OpParameter_ReductionParam;
|
op->main.type = OpParameter_ReductionParam;
|
||||||
op->type = OpType_Reduction;
|
op->type = OpType_Reduction;
|
||||||
|
@ -955,6 +968,7 @@ Returns:
|
||||||
A variable of type int.
|
A variable of type int.
|
||||||
*/
|
*/
|
||||||
VARP _ArgMax(VARP input, int axis) {
|
VARP _ArgMax(VARP input, int axis) {
|
||||||
|
input = _checkNC4HW4(input);
|
||||||
std::unique_ptr<OpT> op(new OpT);
|
std::unique_ptr<OpT> op(new OpT);
|
||||||
op->main.type = OpParameter_ArgMax;
|
op->main.type = OpParameter_ArgMax;
|
||||||
op->type = OpType_ArgMax;
|
op->type = OpType_ArgMax;
|
||||||
|
@ -976,6 +990,7 @@ Returns:
|
||||||
A variable of type int.
|
A variable of type int.
|
||||||
*/
|
*/
|
||||||
VARP _ArgMin(VARP input, int axis) {
|
VARP _ArgMin(VARP input, int axis) {
|
||||||
|
input = _checkNC4HW4(input);
|
||||||
std::unique_ptr<OpT> op(new OpT);
|
std::unique_ptr<OpT> op(new OpT);
|
||||||
op->main.type = OpParameter_ArgMax;
|
op->main.type = OpParameter_ArgMax;
|
||||||
op->type = OpType_ArgMin;
|
op->type = OpType_ArgMin;
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
// Created by MNN on 2019/08/20.
|
// Created by MNN on 2019/08/20.
|
||||||
// Copyright © 2018, Alibaba Group Holding Limited
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
//
|
//
|
||||||
|
|
||||||
#ifndef MergeOptimizer_hpp
|
#ifndef MergeOptimizer_hpp
|
||||||
#define MergeOptimizer_hpp
|
#define MergeOptimizer_hpp
|
||||||
|
|
||||||
|
|
|
@ -54,16 +54,14 @@ VARP _Input(INTS shape, Dimensionformat data_format, halide_type_t dtype) {
|
||||||
info.dim = std::move(shape);
|
info.dim = std::move(shape);
|
||||||
info.order = data_format;
|
info.order = data_format;
|
||||||
info.type = dtype;
|
info.type = dtype;
|
||||||
info.ptr = nullptr;
|
return (Variable::create(Expr::create(std::move(info), nullptr, VARP::INPUT)));
|
||||||
return (Variable::create(Expr::create(std::move(info))));
|
|
||||||
}
|
}
|
||||||
VARP _Scalar(const void* ptr, halide_type_t type) {
|
VARP _Scalar(const void* ptr, halide_type_t type) {
|
||||||
Variable::Info info;
|
Variable::Info info;
|
||||||
info.dim = {};
|
info.dim = {};
|
||||||
info.order = NHWC;
|
info.order = NHWC;
|
||||||
info.type = type;
|
info.type = type;
|
||||||
info.ptr = (void*)ptr;
|
return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
|
||||||
return (Variable::create(Expr::create(std::move(info))));
|
|
||||||
}
|
}
|
||||||
/*create a constant variable.
|
/*create a constant variable.
|
||||||
Args:
|
Args:
|
||||||
|
@ -79,8 +77,7 @@ VARP _Const(const void* ptr, INTS shape, Dimensionformat format, halide_type_t t
|
||||||
info.dim = std::move(shape);
|
info.dim = std::move(shape);
|
||||||
info.order = format;
|
info.order = format;
|
||||||
info.type = type;
|
info.type = type;
|
||||||
info.ptr = (void*)ptr;
|
return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
|
||||||
return (Variable::create(Expr::create(std::move(info))));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VARP _Const(float value, INTS shape, Dimensionformat format) {
|
VARP _Const(float value, INTS shape, Dimensionformat format) {
|
||||||
|
@ -93,8 +90,8 @@ VARP _Const(float value, INTS shape, Dimensionformat format) {
|
||||||
for (int i = 0; i < info.size; ++i) {
|
for (int i = 0; i < info.size; ++i) {
|
||||||
values[i] = value;
|
values[i] = value;
|
||||||
}
|
}
|
||||||
info.ptr = (void*)values.data();
|
auto ptr = (void*)values.data();
|
||||||
return (Variable::create(Expr::create(std::move(info))));
|
return (Variable::create(Expr::create(std::move(info), ptr, VARP::CONSTANT)));
|
||||||
}
|
}
|
||||||
|
|
||||||
VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format, halide_type_t type) {
|
VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format, halide_type_t type) {
|
||||||
|
@ -107,6 +104,23 @@ VARP _TrainableParam(float value, INTS dims, Dimensionformat format) {
|
||||||
v.fix(VARP::TRAINABLE);
|
v.fix(VARP::TRAINABLE);
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
VARP _InnerProduct(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS outputShape) {
|
||||||
|
std::unique_ptr<OpT> ipOp(new OpT);
|
||||||
|
ipOp->type = OpType_InnerProduct;
|
||||||
|
ipOp->main.type = OpParameter_InnerProduct;
|
||||||
|
ipOp->main.value = new InnerProductT;
|
||||||
|
auto ipParam = ipOp->main.AsInnerProduct();
|
||||||
|
|
||||||
|
ipParam->outputCount = outputShape[1];
|
||||||
|
if(!bias.empty()) {
|
||||||
|
ipParam->biasTerm = 1;
|
||||||
|
}
|
||||||
|
ipParam->weightSize = weight.size();
|
||||||
|
|
||||||
|
ipParam->weight = std::move(weight);
|
||||||
|
ipParam->bias = std::move(bias);
|
||||||
|
return (Variable::create(Expr::create(ipOp.get(), {x})));
|
||||||
|
}
|
||||||
|
|
||||||
VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads) {
|
VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads) {
|
||||||
std::unique_ptr<OpT> convOp(new OpT);
|
std::unique_ptr<OpT> convOp(new OpT);
|
||||||
|
@ -183,7 +197,7 @@ VARP _Conv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS
|
||||||
return (Variable::create(Expr::create(convOp.get(), {x})));
|
return (Variable::create(Expr::create(convOp.get(), {x})));
|
||||||
}
|
}
|
||||||
VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
||||||
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6) {
|
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6, int nbits) {
|
||||||
std::unique_ptr<OpT> convOp(new OpT);
|
std::unique_ptr<OpT> convOp(new OpT);
|
||||||
convOp->type = OpType_Convolution;
|
convOp->type = OpType_Convolution;
|
||||||
if (channel[0] == channel[1] && channel[0] == group) {
|
if (channel[0] == channel[1] && channel[0] == group) {
|
||||||
|
@ -285,6 +299,42 @@ VARP _Deconv(VARP weight, VARP bias, VARP x, PaddingMode pad, INTS stride, INTS
|
||||||
return (Variable::create(Expr::create(std::move(convOp), {x, weight})));
|
return (Variable::create(Expr::create(std::move(convOp), {x, weight})));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VARP _Deconv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
||||||
|
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, bool relu6) {
|
||||||
|
std::unique_ptr<OpT> convOp(new OpT);
|
||||||
|
convOp->type = OpType_Deconvolution;
|
||||||
|
if (channel[0] == channel[1] && channel[0] == group) {
|
||||||
|
convOp->type = OpType_DeconvolutionDepthwise;
|
||||||
|
}
|
||||||
|
convOp->main.type = OpParameter_Convolution2D;
|
||||||
|
convOp->main.value = new Convolution2DT;
|
||||||
|
auto conv2D = convOp->main.AsConvolution2D();
|
||||||
|
conv2D->common.reset(new Convolution2DCommonT);
|
||||||
|
conv2D->common->padMode = _convertPadMode(pad);
|
||||||
|
if (pads.size() == 2) {
|
||||||
|
conv2D->common->padX = pads[0];
|
||||||
|
conv2D->common->padY = pads[1];
|
||||||
|
} else {
|
||||||
|
conv2D->common->pads = std::move(pads);
|
||||||
|
}
|
||||||
|
conv2D->common->strideX = stride[0];
|
||||||
|
conv2D->common->strideY = stride[1];
|
||||||
|
conv2D->common->group = group;
|
||||||
|
conv2D->common->outputCount = channel[1];
|
||||||
|
conv2D->common->inputCount = channel[0];
|
||||||
|
conv2D->common->dilateX = dilate[0];
|
||||||
|
conv2D->common->dilateY = dilate[1];
|
||||||
|
conv2D->common->kernelX = kernelSize[0];
|
||||||
|
conv2D->common->kernelY = kernelSize[1];
|
||||||
|
conv2D->common->relu6 = relu6;
|
||||||
|
conv2D->common->relu = relu;
|
||||||
|
MNN_ASSERT(weight.size() == channel[1] * (channel[0] / group) * kernelSize[0] * kernelSize[1]);
|
||||||
|
conv2D->weight = std::move(weight);
|
||||||
|
MNN_ASSERT(bias.size() == channel[1]);
|
||||||
|
conv2D->bias = std::move(bias);
|
||||||
|
return (Variable::create(Expr::create(convOp.get(), {x})));
|
||||||
|
}
|
||||||
|
|
||||||
static VARP _Pool(VARP x, INTS kernel, INTS stride, PoolType type, PaddingMode pad, INTS pads) {
|
static VARP _Pool(VARP x, INTS kernel, INTS stride, PoolType type, PaddingMode pad, INTS pads) {
|
||||||
std::unique_ptr<OpT> pool(new OpT);
|
std::unique_ptr<OpT> pool(new OpT);
|
||||||
pool->type = OpType_Pooling;
|
pool->type = OpType_Pooling;
|
||||||
|
@ -381,9 +431,13 @@ x: A variable.
|
||||||
Returns:
|
Returns:
|
||||||
output: A variable with the same type as `x`.
|
output: A variable with the same type as `x`.
|
||||||
*/
|
*/
|
||||||
VARP _Relu6(VARP x) {
|
VARP _Relu6(VARP x, float minValue, float maxValue) {
|
||||||
std::unique_ptr<OpT> relu(new OpT);
|
std::unique_ptr<OpT> relu(new OpT);
|
||||||
relu->type = OpType_ReLU6;
|
relu->type = OpType_ReLU6;
|
||||||
|
relu->main.value = new Relu6T;
|
||||||
|
relu->main.type = OpParameter_Relu6;
|
||||||
|
relu->main.AsRelu6()->maxValue = maxValue;
|
||||||
|
relu->main.AsRelu6()->minValue = minValue;
|
||||||
return (Variable::create(Expr::create(relu.get(), {x})));
|
return (Variable::create(Expr::create(relu.get(), {x})));
|
||||||
}
|
}
|
||||||
/*Given an input value x, it computes the output as x if x > 0 and slopes * x if x <= 0.
|
/*Given an input value x, it computes the output as x if x > 0 and slopes * x if x <= 0.
|
||||||
|
@ -746,9 +800,12 @@ input: A variable.
|
||||||
Returns:
|
Returns:
|
||||||
A variable of Halide_Type_Int.
|
A variable of Halide_Type_Int.
|
||||||
*/
|
*/
|
||||||
VARP _Shape(VARP input) {
|
VARP _Shape(VARP input, bool nchw) {
|
||||||
std::unique_ptr<OpT> shape(new OpT);
|
std::unique_ptr<OpT> shape(new OpT);
|
||||||
shape->type = OpType_Shape;
|
shape->type = OpType_Shape;
|
||||||
|
if (nchw) {
|
||||||
|
shape->defaultDimentionFormat = MNN_DATA_FORMAT_NCHW;
|
||||||
|
}
|
||||||
return (Variable::create(Expr::create(std::move(shape), {input})));
|
return (Variable::create(Expr::create(std::move(shape), {input})));
|
||||||
}
|
}
|
||||||
/*Stacks a list of rank-R variables into one rank-(R+1) variable.
|
/*Stacks a list of rank-R variables into one rank-(R+1) variable.
|
||||||
|
@ -906,6 +963,21 @@ VARP _Elu(VARP features, float alpha) {
|
||||||
op->main.value = eluParam;
|
op->main.value = eluParam;
|
||||||
return (Variable::create(Expr::create(std::move(op), {features})));
|
return (Variable::create(Expr::create(std::move(op), {features})));
|
||||||
}
|
}
|
||||||
|
/*Given an input value x, it computes the output as 1.0 if x > threshold and 0.0 if x <= threshold.
|
||||||
|
features: A variable of type Halide_Type_Float
|
||||||
|
threshold: threshold value
|
||||||
|
Returns:
|
||||||
|
A variable. Has the same type as features.
|
||||||
|
*/
|
||||||
|
VARP _Threshold(VARP features, float threshold) {
|
||||||
|
std::unique_ptr<OpT> op(new OpT);
|
||||||
|
op->type = OpType_Threshold;
|
||||||
|
auto eluParam = new ELUT;
|
||||||
|
op->main.type = OpParameter_ELU;
|
||||||
|
eluParam->alpha = threshold;
|
||||||
|
op->main.value = eluParam;
|
||||||
|
return (Variable::create(Expr::create(std::move(op), {features})));
|
||||||
|
}
|
||||||
/*Computes the size of the variable
|
/*Computes the size of the variable
|
||||||
Args:
|
Args:
|
||||||
input: A variable of type Halide_Type_Float or Halide_Type_Int
|
input: A variable of type Halide_Type_Float or Halide_Type_Int
|
||||||
|
@ -1049,7 +1121,6 @@ std::vector<VARP> _Moments(VARP x, INTS axis, VARP shift, bool keepDims) {
|
||||||
op->main.type = OpParameter_MomentsParam;
|
op->main.type = OpParameter_MomentsParam;
|
||||||
momentsParam->dim = axis;
|
momentsParam->dim = axis;
|
||||||
momentsParam->keepDims = keepDims;
|
momentsParam->keepDims = keepDims;
|
||||||
momentsParam->dType = (MNN::DataType)Utils::convertDataType(x->getInfo()->type);
|
|
||||||
op->main.value = momentsParam;
|
op->main.value = momentsParam;
|
||||||
EXPRP expr = Expr::create(std::move(op), {x}, 2);
|
EXPRP expr = Expr::create(std::move(op), {x}, 2);
|
||||||
std::vector<VARP> res;
|
std::vector<VARP> res;
|
||||||
|
@ -1405,7 +1476,7 @@ VARP _ZeroGrad(VARP x) {
|
||||||
}
|
}
|
||||||
|
|
||||||
VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize,
|
VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize,
|
||||||
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu) {
|
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, int nbits) {
|
||||||
std::unique_ptr<OpT> convOp(new OpT);
|
std::unique_ptr<OpT> convOp(new OpT);
|
||||||
convOp->type = OpType_ConvInt8;
|
convOp->type = OpType_ConvInt8;
|
||||||
if (channel[0] == channel[1] && channel[0] == group) {
|
if (channel[0] == channel[1] && channel[0] == group) {
|
||||||
|
@ -1433,9 +1504,16 @@ VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<fl
|
||||||
conv2D->symmetricQuan->bias = std::move(bias);
|
conv2D->symmetricQuan->bias = std::move(bias);
|
||||||
conv2D->symmetricQuan->scale = std::move(scale);
|
conv2D->symmetricQuan->scale = std::move(scale);
|
||||||
conv2D->symmetricQuan->weight = std::move(weight);
|
conv2D->symmetricQuan->weight = std::move(weight);
|
||||||
|
conv2D->symmetricQuan->nbits = nbits;
|
||||||
return (Variable::create(Expr::create(convOp.get(), {x})));
|
return (Variable::create(Expr::create(convOp.get(), {x})));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim) {
|
||||||
|
std::unique_ptr<MNN::OpT> cosineSimilarityOp(new MNN::OpT);
|
||||||
|
cosineSimilarityOp->type = MNN::OpType_CosineSimilarity;
|
||||||
|
return (Variable::create(Expr::create(std::move(cosineSimilarityOp), {input0, input1, inputDim})));
|
||||||
|
}
|
||||||
|
|
||||||
VARP _FloatToInt8(VARP x, VARP scale, char minValue/*For future*/, char maxValue/*For future*/) {
|
VARP _FloatToInt8(VARP x, VARP scale, char minValue/*For future*/, char maxValue/*For future*/) {
|
||||||
auto xInfo = x->getInfo();
|
auto xInfo = x->getInfo();
|
||||||
auto scaleInfo = scale->getInfo();
|
auto scaleInfo = scale->getInfo();
|
||||||
|
|
|
@ -22,28 +22,7 @@ Optimizer::Parameters::~Parameters() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::shared_ptr<Optimizer> Optimizer::create(Config config) {
|
std::shared_ptr<Optimizer> Optimizer::create(Config config) {
|
||||||
const int numThread = config.numThread;
|
// Do nothing
|
||||||
auto forwardType = config.forwardType;
|
|
||||||
if (forwardType != MNN_FORWARD_ALL) {
|
|
||||||
if (MNNGetExtraBackendCreator(forwardType) == nullptr) {
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
return std::shared_ptr<Optimizer>(new MergeOptimizer(config.forwardType, numThread, nullptr));
|
|
||||||
}
|
|
||||||
|
|
||||||
auto device = config.device;
|
|
||||||
if (CPU == device) {
|
|
||||||
return std::shared_ptr<Optimizer>(new MergeOptimizer(MNN_FORWARD_CPU, numThread, nullptr));
|
|
||||||
}
|
|
||||||
if (GPU == device) {
|
|
||||||
std::vector<MNNForwardType> types {MNN_FORWARD_METAL, MNN_FORWARD_OPENCL, MNN_FORWARD_VULKAN, MNN_FORWARD_OPENGL};
|
|
||||||
for (auto type : types) {
|
|
||||||
auto creator = MNNGetExtraBackendCreator(type);
|
|
||||||
if (nullptr != creator) {
|
|
||||||
return std::shared_ptr<Optimizer>(new MergeOptimizer(type, numThread, nullptr));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
//
|
||||||
|
// RandomGenerator.hpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2019/11/28.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef RandomGenerator_hpp
|
||||||
|
#define RandomGenerator_hpp
|
||||||
|
|
||||||
|
#include <MNN/MNNDefine.h>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
|
||||||
|
class MNN_PUBLIC RandomGenerator {
|
||||||
|
private:
|
||||||
|
RandomGenerator(int seed = std::random_device()()) {
|
||||||
|
mSeed = seed;
|
||||||
|
mGenerator.seed(mSeed);
|
||||||
|
}
|
||||||
|
|
||||||
|
~RandomGenerator() = default;
|
||||||
|
|
||||||
|
RandomGenerator(RandomGenerator &);
|
||||||
|
|
||||||
|
RandomGenerator &operator=(const RandomGenerator &);
|
||||||
|
|
||||||
|
private:
|
||||||
|
int mSeed;
|
||||||
|
std::mt19937 mGenerator;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static std::mt19937 &generator(int seed = std::random_device()()) {
|
||||||
|
static RandomGenerator rng(seed);
|
||||||
|
return rng.mGenerator;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Express
|
||||||
|
} // namespace MNN
|
||||||
|
|
||||||
|
#endif // RandomGenerator_hpp
|
|
@ -10,8 +10,24 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include "MNN_generated.h"
|
#include "MNN_generated.h"
|
||||||
#include "core/TensorUtils.hpp"
|
#include "core/TensorUtils.hpp"
|
||||||
|
#include "core/MNNMemoryUtils.h"
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
namespace Express {
|
namespace Express {
|
||||||
|
Expr::Inside::Inside(int outputSize) {
|
||||||
|
mOutputInfos.resize(outputSize);
|
||||||
|
mOutputTensors.resize(outputSize);
|
||||||
|
for (int i=0; i<outputSize; ++i) {
|
||||||
|
mOutputTensors[i] = new Tensor;
|
||||||
|
TensorUtils::getDescribe(mOutputTensors[i])->memoryType = Tensor::InsideDescribe::MEMORY_HOST;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Expr::Inside::~Inside() {
|
||||||
|
for (auto t : mOutputTensors) {
|
||||||
|
delete t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#define CONVERT(src, dst, f)\
|
#define CONVERT(src, dst, f)\
|
||||||
if (f == src) return dst;
|
if (f == src) return dst;
|
||||||
|
|
||||||
|
@ -61,7 +77,6 @@ void Utils::copyInfoToTensor(Tensor* dest, const Variable::Info* source) {
|
||||||
}
|
}
|
||||||
dest->buffer().dimensions = (int)source->dim.size();
|
dest->buffer().dimensions = (int)source->dim.size();
|
||||||
dest->buffer().type = source->type;
|
dest->buffer().type = source->type;
|
||||||
dest->buffer().host = (uint8_t*)source->ptr;
|
|
||||||
TensorUtils::getDescribe(dest)->dimensionFormat = (MNN_DATA_FORMAT)Utils::convertFormat(source->order);
|
TensorUtils::getDescribe(dest)->dimensionFormat = (MNN_DATA_FORMAT)Utils::convertFormat(source->order);
|
||||||
TensorUtils::setLinearLayout(dest);
|
TensorUtils::setLinearLayout(dest);
|
||||||
}
|
}
|
||||||
|
@ -70,7 +85,31 @@ void Utils::copyTensorToInfo(Variable::Info* shape, const Tensor* tensor) {
|
||||||
shape->dim = tensor->shape();
|
shape->dim = tensor->shape();
|
||||||
shape->size = tensor->elementSize();
|
shape->size = tensor->elementSize();
|
||||||
shape->order = Utils::revertFormat(TensorUtils::getDescribe(tensor)->dimensionFormat);
|
shape->order = Utils::revertFormat(TensorUtils::getDescribe(tensor)->dimensionFormat);
|
||||||
shape->ptr = tensor->host<float>();
|
}
|
||||||
|
bool Utils::allocMemoryForHostTensor(Tensor* dest) {
|
||||||
|
if (nullptr != dest->buffer().host) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (TensorUtils::getDescribe(dest)->memoryType != Tensor::InsideDescribe::MEMORY_HOST) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
auto size = dest->size();
|
||||||
|
if (0 >= size) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
dest->buffer().host = (uint8_t*)MNNMemoryAllocAlign(size, MNN_MEMORY_ALIGN_DEFAULT);
|
||||||
|
return dest->buffer().host != nullptr;
|
||||||
|
}
|
||||||
|
bool Utils::releaseMemoryForHostTensor(Tensor* dest) {
|
||||||
|
if (nullptr == dest->buffer().host) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (TensorUtils::getDescribe(dest)->memoryType != Tensor::InsideDescribe::MEMORY_HOST) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
MNNMemoryFreeAlign(dest->buffer().host);
|
||||||
|
dest->buffer().host = nullptr;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Express
|
} // namespace Express
|
||||||
|
|
|
@ -15,15 +15,16 @@
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
namespace Express {
|
namespace Express {
|
||||||
struct Expr::Inside {
|
struct Expr::Inside {
|
||||||
std::vector<const Variable::Info*> mInputInfos;
|
Inside(int outputSize);
|
||||||
|
~ Inside();
|
||||||
std::vector<Variable::Info> mOutputInfos;
|
std::vector<Variable::Info> mOutputInfos;
|
||||||
|
std::vector<Tensor*> mOutputTensors;
|
||||||
Executor::Requirement mReq;
|
Executor::Requirement mReq;
|
||||||
std::shared_ptr<Executor::ComputeCache::Unit> mUnit;
|
std::shared_ptr<Executor::Unit> mUnit;
|
||||||
std::shared_ptr<Executor::ComputeCache> mCache;
|
std::shared_ptr<Executor::ComputeCache> mCache;
|
||||||
int mCacheOffset = 0;
|
int mCacheOffset = 0;
|
||||||
bool mInfoDirty = true;
|
bool mInfoDirty = true;
|
||||||
bool mContentDirty = true;
|
bool mContentDirty = true;
|
||||||
bool mLinkCache = false;
|
|
||||||
};
|
};
|
||||||
class Utils {
|
class Utils {
|
||||||
public:
|
public:
|
||||||
|
@ -33,6 +34,8 @@ public:
|
||||||
static int convertFormat(Dimensionformat format);
|
static int convertFormat(Dimensionformat format);
|
||||||
static Express::Dimensionformat revertFormat(int format);
|
static Express::Dimensionformat revertFormat(int format);
|
||||||
static halide_type_t revertDataType(DataType dataType);
|
static halide_type_t revertDataType(DataType dataType);
|
||||||
|
static bool allocMemoryForHostTensor(Tensor* dest);
|
||||||
|
static bool releaseMemoryForHostTensor(Tensor* dest);
|
||||||
};
|
};
|
||||||
} // namespace Express
|
} // namespace Express
|
||||||
} // namespace MNN
|
} // namespace MNN
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
#include <MNN/expr/ExprCreator.hpp>
|
#include <MNN/expr/ExprCreator.hpp>
|
||||||
using namespace MNN::Express;
|
using namespace MNN::Express;
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
namespace Train {
|
namespace Express {
|
||||||
FixModule::FixModule(std::vector<Express::VARP> output, std::vector<Express::VARP> parameters,
|
FixModule::FixModule(std::vector<Express::VARP> output, std::vector<Express::VARP> parameters,
|
||||||
std::vector<std::pair<Express::VARP, Express::Dimensionformat>> inputs) {
|
std::vector<std::pair<Express::VARP, Express::Dimensionformat>> inputs) {
|
||||||
for (auto p : parameters) {
|
for (auto p : parameters) {
|
||||||
|
@ -34,5 +34,19 @@ std::vector<Express::VARP> FixModule::onForward(const std::vector<Express::VARP>
|
||||||
}
|
}
|
||||||
return mOutput;
|
return mOutput;
|
||||||
}
|
}
|
||||||
} // namespace Train
|
|
||||||
|
Module* FixModule::clone(CloneContext* ctx) const {
|
||||||
|
FixModule* module(new FixModule);
|
||||||
|
for (auto& it : mInputs) {
|
||||||
|
VARP v = ctx->getOrClone(it.first);
|
||||||
|
module->mInputs.push_back(std::make_pair(v, it.second));
|
||||||
|
}
|
||||||
|
for (auto& it : mOutput) {
|
||||||
|
VARP v = ctx->getOrClone(it);
|
||||||
|
module->mOutput.push_back(v);
|
||||||
|
}
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Express
|
||||||
} // namespace MNN
|
} // namespace MNN
|
|
@ -8,9 +8,9 @@
|
||||||
|
|
||||||
#ifndef FixModule_hpp
|
#ifndef FixModule_hpp
|
||||||
#define FixModule_hpp
|
#define FixModule_hpp
|
||||||
#include "Module.hpp"
|
#include <MNN/expr/Module.hpp>
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
namespace Train {
|
namespace Express {
|
||||||
|
|
||||||
class FixModule : public Module {
|
class FixModule : public Module {
|
||||||
public:
|
public:
|
||||||
|
@ -20,10 +20,14 @@ public:
|
||||||
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
|
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
|
||||||
virtual void onClearCache() override;
|
virtual void onClearCache() override;
|
||||||
private:
|
private:
|
||||||
|
FixModule() = default;
|
||||||
|
|
||||||
|
Module* clone(CloneContext* ctx) const override;
|
||||||
|
|
||||||
std::vector<std::pair<Express::VARP, Express::Dimensionformat>> mInputs;
|
std::vector<std::pair<Express::VARP, Express::Dimensionformat>> mInputs;
|
||||||
std::vector<Express::VARP> mOutput;
|
std::vector<Express::VARP> mOutput;
|
||||||
};
|
};
|
||||||
} // namespace Train
|
} // namespace Express
|
||||||
} // namespace MNN
|
} // namespace MNN
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -0,0 +1,112 @@
|
||||||
|
//
|
||||||
|
// IfModule.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2020/09/01.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "IfModule.hpp"
|
||||||
|
#include "MNN_generated.h"
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
static int _findPos(const std::vector<std::string>& names, const std::string& key) {
|
||||||
|
for (int i=0; i<names.size(); ++i) {
|
||||||
|
if (names[i] == key) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
std::vector<Express::VARP> IfModule::onForward(const std::vector<Express::VARP>& inputs) {
|
||||||
|
std::vector<Express::VARP> outputs(mOutputFromElse.size());
|
||||||
|
MNN_ASSERT(mOutputFromThen.size() == mOutputFromElse.size());
|
||||||
|
if (inputs[0]->readMap<int>()[0] > 0) {
|
||||||
|
std::vector<Express::VARP> subInputs(mInputForThen.size());
|
||||||
|
for (auto& p : mInputForThen) {
|
||||||
|
subInputs[p.first] = inputs[p.second];
|
||||||
|
}
|
||||||
|
auto subOutputs = mThen->onForward(subInputs);
|
||||||
|
for (int i=0; i<mOutputFromThen.size(); ++i) {
|
||||||
|
outputs[i] = subOutputs[mOutputFromThen[i]];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
std::vector<Express::VARP> subInputs(mInputForElse.size());
|
||||||
|
for (auto& p : mInputForElse) {
|
||||||
|
subInputs[p.first] = inputs[p.second];
|
||||||
|
}
|
||||||
|
auto subOutputs = mElse->onForward(subInputs);
|
||||||
|
for (int i=0; i<mOutputFromElse.size(); ++i) {
|
||||||
|
outputs[i] = subOutputs[mOutputFromElse[i]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return outputs;
|
||||||
|
}
|
||||||
|
IfModule* IfModule::create(const Op* op, const std::map<std::string, SubGraph>& subGraph) {
|
||||||
|
auto module = new IfModule;
|
||||||
|
auto ifParam = op->main_as_IfParam();
|
||||||
|
auto& thenG = subGraph.find(ifParam->then_graph()->str())->second;
|
||||||
|
auto& elseG = subGraph.find(ifParam->else_graph()->str())->second;
|
||||||
|
module->mElse = elseG.m;
|
||||||
|
module->mThen = thenG.m;
|
||||||
|
if (nullptr != op->name()) {
|
||||||
|
module->setName(op->name()->str());
|
||||||
|
}
|
||||||
|
/** Compute map index
|
||||||
|
std::vector<std::pair<int, int>> mInputForThen;
|
||||||
|
|
||||||
|
// First mElse' index, Second: inputs's index
|
||||||
|
std::vector<std::pair<int, int>> mInputForElse;
|
||||||
|
|
||||||
|
std::vector<int> mOutputFromThen;
|
||||||
|
std::vector<int> mOutputFromElse;
|
||||||
|
*/
|
||||||
|
// Map Inputs
|
||||||
|
for (int i=0; i<ifParam->aliases_inputs()->size(); ++i) {
|
||||||
|
auto index = i;
|
||||||
|
auto data = ifParam->aliases_inputs()->GetAs<StringVec>(i);
|
||||||
|
if (nullptr == data->data()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (int s=0; s<data->data()->size(); ++s) {
|
||||||
|
auto name = data->data()->GetAsString(s)->str();
|
||||||
|
auto thenPos = _findPos(thenG.inputs, name);
|
||||||
|
if (thenPos >= 0) {
|
||||||
|
module->mInputForThen.emplace_back(std::make_pair(thenPos, i));
|
||||||
|
}
|
||||||
|
auto elsePos = _findPos(elseG.inputs, name);
|
||||||
|
if (elsePos >= 0) {
|
||||||
|
module->mInputForElse.emplace_back(std::make_pair(elsePos, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Map outputs
|
||||||
|
auto output = ifParam->aliases_outputs();
|
||||||
|
module->mOutputFromThen.resize(output->size());
|
||||||
|
module->mOutputFromElse.resize(output->size());
|
||||||
|
for (int i=0; i<output->size(); ++i) {
|
||||||
|
auto data = output->GetAs<StringVec>(i);
|
||||||
|
MNN_ASSERT(data->data()->size() == 2);
|
||||||
|
|
||||||
|
auto thenPos = _findPos(thenG.outputs, data->data()->GetAsString(0)->str());
|
||||||
|
MNN_ASSERT(thenPos >= 0);
|
||||||
|
auto elsePos = _findPos(elseG.outputs, data->data()->GetAsString(1)->str());
|
||||||
|
module->mOutputFromThen[i] = thenPos;
|
||||||
|
module->mOutputFromElse[i] = elsePos;
|
||||||
|
}
|
||||||
|
return module;
|
||||||
|
}
|
||||||
|
|
||||||
|
Module* IfModule::clone(CloneContext* ctx) const {
|
||||||
|
IfModule* module(new IfModule);
|
||||||
|
module->mInputForThen = mInputForThen;
|
||||||
|
module->mInputForElse = mInputForElse;
|
||||||
|
module->mOutputFromThen = mOutputFromThen;
|
||||||
|
module->mOutputFromElse = mOutputFromElse;
|
||||||
|
module->mThen.reset(mThen->clone(ctx));
|
||||||
|
module->mElse.reset(mElse->clone(ctx));
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Express
|
||||||
|
} // namespace MNN
|
|
@ -0,0 +1,43 @@
|
||||||
|
//
|
||||||
|
// IfModule.hpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2020/09/01.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef IfModule_hpp
|
||||||
|
#define IfModule_hpp
|
||||||
|
|
||||||
|
#include <MNN/expr/Module.hpp>
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
class IfModule : public Module {
|
||||||
|
public:
|
||||||
|
virtual ~ IfModule() {
|
||||||
|
// Do nothing
|
||||||
|
}
|
||||||
|
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
|
||||||
|
static IfModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
|
||||||
|
|
||||||
|
private:
|
||||||
|
IfModule(){}
|
||||||
|
|
||||||
|
Module* clone(CloneContext* ctx) const override;
|
||||||
|
|
||||||
|
// First mThen' index, Second: inputs's index
|
||||||
|
std::vector<std::pair<int, int>> mInputForThen;
|
||||||
|
|
||||||
|
// First mElse' index, Second: inputs's index
|
||||||
|
std::vector<std::pair<int, int>> mInputForElse;
|
||||||
|
|
||||||
|
std::vector<int> mOutputFromThen;
|
||||||
|
std::vector<int> mOutputFromElse;
|
||||||
|
|
||||||
|
std::shared_ptr<Module> mThen;
|
||||||
|
std::shared_ptr<Module> mElse;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* IfModule_hpp */
|
|
@ -0,0 +1,182 @@
|
||||||
|
//
|
||||||
|
// Module.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2019/11/25.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <MNN/expr/Module.hpp>
|
||||||
|
#include <MNN/expr/ExprCreator.hpp>
|
||||||
|
#include "FixModule.hpp"
|
||||||
|
#include "PipelineModule.hpp"
|
||||||
|
#include "core/FileLoader.hpp"
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
|
||||||
|
class EmptyModule : public Module {
|
||||||
|
public:
|
||||||
|
EmptyModule(const std::vector<Express::VARP>& parameters) {
|
||||||
|
for (auto p : parameters) {
|
||||||
|
addParameter(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
virtual ~EmptyModule() {
|
||||||
|
// Do nothing
|
||||||
|
}
|
||||||
|
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
EmptyModule() = default;
|
||||||
|
|
||||||
|
Module* clone(Module::CloneContext* ctx) const override {
|
||||||
|
EmptyModule* module(new EmptyModule);
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Module* Module::createEmpty(const std::vector<Express::VARP>& parameters) {
|
||||||
|
return new EmptyModule(parameters);
|
||||||
|
}
|
||||||
|
|
||||||
|
Express::VARP Module::forward(Express::VARP input) {
|
||||||
|
return this->onForward({input})[0];
|
||||||
|
}
|
||||||
|
std::vector<Express::VARP> Module::parameters() const {
|
||||||
|
std::vector<Express::VARP> result;
|
||||||
|
_collectParameters(result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
bool Module::loadParameters(const std::vector<Express::VARP>& parameters) {
|
||||||
|
std::vector<Express::VARP> result;
|
||||||
|
_collectParameters(result);
|
||||||
|
if (parameters.empty() || parameters.size() != result.size()) {
|
||||||
|
MNN_ERROR("Error parameters, empty or parameter size not match \n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (int i=0; i<parameters.size(); ++i) {
|
||||||
|
if (nullptr != result[i].get()) {
|
||||||
|
// Check Origin parameter's size
|
||||||
|
auto dstInfo = result[i]->getInfo();
|
||||||
|
auto srcInfo = parameters[i]->getInfo();
|
||||||
|
if (dstInfo->dim.size() != srcInfo->dim.size() || dstInfo->order != srcInfo->order) {
|
||||||
|
MNN_ERROR("Error parameters %d, dim size or order not match \n", i);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (dstInfo->size != srcInfo->size || dstInfo->type != srcInfo->type) {
|
||||||
|
MNN_ERROR("Error parameters %d, size or type not match \n", i);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Variable::replace(result[i], parameters[i]);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
void Module::setIsTraining(const bool isTraining) {
|
||||||
|
mIsTraining = isTraining;
|
||||||
|
for (auto c : mChildren) {
|
||||||
|
c->setIsTraining(isTraining);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Module::getIsTraining() {
|
||||||
|
return mIsTraining;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Module::registerModel(const std::vector<std::shared_ptr<Module>>& children) {
|
||||||
|
mChildren.insert(mChildren.begin(), children.begin(), children.end());
|
||||||
|
}
|
||||||
|
int Module::addParameter(VARP parameter) {
|
||||||
|
auto res = mParameters.size();
|
||||||
|
mParameters.emplace_back(parameter);
|
||||||
|
return (int)res;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Module::setParameter(Express::VARP parameter, int index) {
|
||||||
|
if (index < 0 || index >= mParameters.size()) {
|
||||||
|
MNN_ERROR("Module error: index out of range: %d - %d:\n", index, (int)mParameters.size());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
mParameters[index] = parameter;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Module::_collectParameters(std::vector<Express::VARP>& result) const {
|
||||||
|
for (auto p : mParameters) {
|
||||||
|
result.push_back(p);
|
||||||
|
}
|
||||||
|
for (auto c : mChildren) {
|
||||||
|
c->_collectParameters(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void Module::clearCache() {
|
||||||
|
for (auto c : mChildren) {
|
||||||
|
c->clearCache();
|
||||||
|
}
|
||||||
|
this->onClearCache();
|
||||||
|
}
|
||||||
|
|
||||||
|
Module* Module::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const char* fileName, bool dynamic) {
|
||||||
|
AutoStorage<uint8_t> buffer;
|
||||||
|
{
|
||||||
|
FileLoader loader(fileName);
|
||||||
|
if (!loader.valid()) {
|
||||||
|
MNN_ERROR("Error for open %s\n", fileName);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
loader.read();
|
||||||
|
if (!loader.valid()) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
loader.merge(buffer);
|
||||||
|
if (buffer.get() == nullptr) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return load(inputs, outputs, buffer.get(), buffer.size(), dynamic);
|
||||||
|
}
|
||||||
|
|
||||||
|
Module* Module::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic) {
|
||||||
|
return PipelineModule::load(inputs, outputs, buffer, length, dynamic);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPRP Module::CloneContext::getOrClone(EXPRP expr) {
|
||||||
|
auto it = mExprMap.find(expr.get());
|
||||||
|
if (it == mExprMap.end()) {
|
||||||
|
// EXPRP replica = expr->clone(shareParams);
|
||||||
|
// TODO(hjchen2): Clone expr.
|
||||||
|
EXPRP replica = expr;
|
||||||
|
it = mExprMap.emplace(expr.get(), replica).first;
|
||||||
|
}
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
VARP Module::CloneContext::getOrClone(VARP var) {
|
||||||
|
auto it = mVarMap.find(var.get());
|
||||||
|
if (it != mVarMap.end()) {
|
||||||
|
// TODO(hjchen2): Clone variable.
|
||||||
|
VARP replica = var;
|
||||||
|
it = mVarMap.emplace(var.get(), replica).first;
|
||||||
|
}
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
Module* Module::clone(const Module* module, const bool shareParams) {
|
||||||
|
CloneContext context(shareParams);
|
||||||
|
return module->clone(&context);
|
||||||
|
}
|
||||||
|
|
||||||
|
Module* Module::cloneBaseTo(CloneContext* ctx, Module* module) const {
|
||||||
|
for (const Express::VARP& var : mParameters) {
|
||||||
|
module->mParameters.push_back(ctx->getOrClone(var));
|
||||||
|
}
|
||||||
|
module->mIsTraining = mIsTraining;
|
||||||
|
module->mName = mName;
|
||||||
|
module->mType = mType;
|
||||||
|
return module;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Express
|
||||||
|
} // namespace MNN
|
|
@ -6,9 +6,11 @@
|
||||||
// Copyright © 2018, Alibaba Group Holding Limited
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
//
|
//
|
||||||
|
|
||||||
#include "NN.hpp"
|
#include <MNN/expr/NN.hpp>
|
||||||
#include "Distributions.hpp"
|
#include "Distributions.hpp"
|
||||||
#include "FixModule.hpp"
|
#include "FixModule.hpp"
|
||||||
|
#include "WhileModule.hpp"
|
||||||
|
#include "IfModule.hpp"
|
||||||
#include "Initializer.hpp"
|
#include "Initializer.hpp"
|
||||||
#include "MNN_generated.h"
|
#include "MNN_generated.h"
|
||||||
#include "RandomGenerator.hpp"
|
#include "RandomGenerator.hpp"
|
||||||
|
@ -17,7 +19,7 @@
|
||||||
|
|
||||||
using namespace MNN::Express;
|
using namespace MNN::Express;
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
namespace Train {
|
namespace Express {
|
||||||
static VARP _activate(VARP x, NN::ActivationFunctionType type) {
|
static VARP _activate(VARP x, NN::ActivationFunctionType type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case NN::None:
|
case NN::None:
|
||||||
|
@ -58,6 +60,14 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
DropoutModule() = default;
|
||||||
|
|
||||||
|
Module* clone(CloneContext* ctx) const override {
|
||||||
|
DropoutModule* module(new DropoutModule);
|
||||||
|
module->mDropRatio = mDropRatio;
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
|
||||||
float mDropRatio;
|
float mDropRatio;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -80,8 +90,8 @@ public:
|
||||||
mRunningVariance = _Const(bnPa->varData()->data(), {1, mChannels, 1, 1}, NCHW);
|
mRunningVariance = _Const(bnPa->varData()->data(), {1, mChannels, 1, 1}, NCHW);
|
||||||
addParameter(mScale);
|
addParameter(mScale);
|
||||||
addParameter(mBias);
|
addParameter(mBias);
|
||||||
addParameter(mRunningVariance);
|
mRunningVariancePos = addParameter(mRunningVariance);
|
||||||
addParameter(mRunningMean);
|
mRunningMeanPos = addParameter(mRunningMean);
|
||||||
mReductionDims = {0, 2, 3};
|
mReductionDims = {0, 2, 3};
|
||||||
setType("BatchNorm");
|
setType("BatchNorm");
|
||||||
}
|
}
|
||||||
|
@ -110,8 +120,8 @@ public:
|
||||||
|
|
||||||
addParameter(mScale);
|
addParameter(mScale);
|
||||||
addParameter(mBias);
|
addParameter(mBias);
|
||||||
addParameter(mRunningVariance);
|
mRunningVariancePos = addParameter(mRunningVariance);
|
||||||
addParameter(mRunningMean);
|
mRunningMeanPos = addParameter(mRunningMean);
|
||||||
setType("BatchNorm");
|
setType("BatchNorm");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,9 +166,8 @@ public:
|
||||||
mRunningVariance = _Const(mMomentum) * mRunningVariance + _Const(1 - mMomentum) * sampleVar;
|
mRunningVariance = _Const(mMomentum) * mRunningVariance + _Const(1 - mMomentum) * sampleVar;
|
||||||
outputData->setName(name());
|
outputData->setName(name());
|
||||||
outputData = _Convert(outputData, dimFormat);
|
outputData = _Convert(outputData, dimFormat);
|
||||||
Variable::prepareCompute({inputs[0], outputData, mRunningMean, mRunningVariance});
|
setParameter(mRunningMean, mRunningMeanPos);
|
||||||
mRunningMean.fix(Express::VARP::CONSTANT);
|
setParameter(mRunningVariance, mRunningVariancePos);
|
||||||
mRunningVariance.fix(Express::VARP::CONSTANT);
|
|
||||||
return {outputData};
|
return {outputData};
|
||||||
}
|
}
|
||||||
auto rStd = _Const(1.0f) / _Sqrt(mRunningVariance + _Const(mEps));
|
auto rStd = _Const(1.0f) / _Sqrt(mRunningVariance + _Const(mEps));
|
||||||
|
@ -180,12 +189,31 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
BatchNormModule() = default;
|
||||||
|
|
||||||
|
Module* clone(CloneContext* ctx) const override {
|
||||||
|
BatchNormModule* module(new BatchNormModule);
|
||||||
|
module->mMomentum = mMomentum;
|
||||||
|
module->mEps = mEps;
|
||||||
|
module->mScale = ctx->getOrClone(mScale);
|
||||||
|
module->mBias = ctx->getOrClone(mBias);
|
||||||
|
module->mRunningMean = ctx->getOrClone(mRunningMean);
|
||||||
|
module->mRunningVariance = ctx->getOrClone(mRunningVariance);
|
||||||
|
module->mRunningMeanPos = mRunningMeanPos;
|
||||||
|
module->mRunningVariancePos = mRunningVariancePos;
|
||||||
|
module->mChannels = mChannels;
|
||||||
|
module->mReductionDims = mReductionDims;
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
|
||||||
float mMomentum = 0.99;
|
float mMomentum = 0.99;
|
||||||
float mEps = 1e-5;
|
float mEps = 1e-5;
|
||||||
VARP mScale = nullptr;
|
VARP mScale = nullptr;
|
||||||
VARP mBias = nullptr;
|
VARP mBias = nullptr;
|
||||||
VARP mRunningMean = nullptr;
|
VARP mRunningMean = nullptr;
|
||||||
VARP mRunningVariance = nullptr;
|
VARP mRunningVariance = nullptr;
|
||||||
|
int mRunningMeanPos = -1;
|
||||||
|
int mRunningVariancePos = -1;
|
||||||
int mChannels;
|
int mChannels;
|
||||||
std::vector<int> mReductionDims;
|
std::vector<int> mReductionDims;
|
||||||
};
|
};
|
||||||
|
@ -246,7 +274,18 @@ public:
|
||||||
tempOutput->setName(name());
|
tempOutput->setName(name());
|
||||||
return {tempOutput};
|
return {tempOutput};
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
ConvModule() = default;
|
||||||
|
|
||||||
|
Module* clone(CloneContext* ctx) const override {
|
||||||
|
ConvModule* module(new ConvModule);
|
||||||
|
module->mParameter = mParameter;
|
||||||
|
module->mParameter.weight = ctx->getOrClone(mParameter.weight);
|
||||||
|
module->mParameter.bias = ctx->getOrClone(mParameter.bias);
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
|
||||||
NN::ConvParameters mParameter;
|
NN::ConvParameters mParameter;
|
||||||
};
|
};
|
||||||
static std::tuple<VARP, VARP, int> _initParameters(const NN::ConvOption& option, bool hasBias,
|
static std::tuple<VARP, VARP, int> _initParameters(const NN::ConvOption& option, bool hasBias,
|
||||||
|
@ -533,7 +572,23 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const NN::ConvOption mOption;
|
ConvOctaveModule() = default;
|
||||||
|
|
||||||
|
Module* clone(CloneContext* ctx) const override {
|
||||||
|
ConvOctaveModule* module(new ConvOctaveModule);
|
||||||
|
module->mOption = mOption;
|
||||||
|
module->mLLW = ctx->getOrClone(mLLW);
|
||||||
|
module->mLHW = ctx->getOrClone(mLHW);
|
||||||
|
module->mHLW = ctx->getOrClone(mHLW);
|
||||||
|
module->mHHW = ctx->getOrClone(mHHW);
|
||||||
|
module->mLBias = ctx->getOrClone(mLBias);
|
||||||
|
module->mHBias = ctx->getOrClone(mHBias);
|
||||||
|
module->mSplitInput = mSplitInput;
|
||||||
|
module->mGroup = mGroup;
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
|
||||||
|
NN::ConvOption mOption;
|
||||||
VARP mLLW;
|
VARP mLLW;
|
||||||
VARP mLHW;
|
VARP mLHW;
|
||||||
VARP mHLW;
|
VARP mHLW;
|
||||||
|
@ -555,7 +610,7 @@ Module* NN::ConvOctave(const ConvParameters& parameters,
|
||||||
module->setName(parameters.name);
|
module->setName(parameters.name);
|
||||||
return module;
|
return module;
|
||||||
}
|
}
|
||||||
Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr) {
|
Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr, const std::map<std::string, SubGraph>& subgraphs) {
|
||||||
if (nullptr == expr->get()) {
|
if (nullptr == expr->get()) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
@ -565,6 +620,12 @@ Module* NN::Utils::ExtractNotRunableOp(Express::EXPRP expr) {
|
||||||
if (expr->get()->type() == OpType_Dropout) {
|
if (expr->get()->type() == OpType_Dropout) {
|
||||||
return new DropoutModule(0.3f);
|
return new DropoutModule(0.3f);
|
||||||
}
|
}
|
||||||
|
if (expr->get()->type() == OpType_While) {
|
||||||
|
return WhileModule::create(expr->get(), subgraphs);
|
||||||
|
}
|
||||||
|
if (expr->get()->type() == OpType_If) {
|
||||||
|
return IfModule::create(expr->get(), subgraphs);
|
||||||
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -622,6 +683,9 @@ public:
|
||||||
mLimitScale = _Scalar<float>(1.0f / limit);
|
mLimitScale = _Scalar<float>(1.0f / limit);
|
||||||
mClampValue = _Scalar<float>(limit);
|
mClampValue = _Scalar<float>(limit);
|
||||||
|
|
||||||
|
mInputScalePos = addParameter(mInputScale);
|
||||||
|
mOutputScalePos = addParameter(mOutputScale);
|
||||||
|
|
||||||
setType("ConvBNReluFused");
|
setType("ConvBNReluFused");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -632,31 +696,16 @@ public:
|
||||||
tempX = _Convert(tempX, NCHW);
|
tempX = _Convert(tempX, NCHW);
|
||||||
}
|
}
|
||||||
auto originX = tempX;
|
auto originX = tempX;
|
||||||
VARP scale;
|
VARP scale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
|
||||||
if (mFeatureScaleStatMethod == NN::PerTensor) {
|
|
||||||
scale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
|
|
||||||
} else {
|
|
||||||
auto originSize = originX->getInfo()->size;
|
|
||||||
auto batch = originX->getInfo()->dim[0];
|
|
||||||
auto channel = originX->getInfo()->dim[1];
|
|
||||||
if (originSize / batch / channel < 10) {
|
|
||||||
// Too small data
|
|
||||||
//MNN_PRINT("%d - %d - %d\n", originSize, batch, channel);
|
|
||||||
std::vector<int> dims = {1, channel, 1, 1};
|
|
||||||
auto dimVar = _Const(dims.data(), {4}, NCHW, halide_type_of<int32_t>());
|
|
||||||
auto singleScale = _Maximum(_ReduceMax(_Abs(tempX)), _Scalar<float>(0.0001f)) * mLimitScale;
|
|
||||||
scale = _Fill(dimVar, singleScale);
|
|
||||||
} else {
|
|
||||||
//MNN_PRINT("%d - %d - %d\n", originSize, batch, channel);
|
|
||||||
scale = _Maximum(_ReduceMax(_Abs(tempX), {0, 2, 3}, true), _Scalar<float>(0.0001f)) * mLimitScale;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
scale.fix(VARP::CONSTANT);
|
|
||||||
if (useScale == nullptr) {
|
if (useScale == nullptr) {
|
||||||
tempX = _Round(tempX * _Reciprocal(scale)) * scale;
|
tempX = _Round(tempX * _Reciprocal(scale)) * scale;
|
||||||
} else {
|
} else {
|
||||||
tempX = _Round(tempX * _Reciprocal(useScale)) * useScale;
|
tempX = _Round(tempX * _Reciprocal(useScale)) * useScale;
|
||||||
}
|
}
|
||||||
|
// Break the grad by use cast
|
||||||
|
tempX = _Cast<float>(tempX);
|
||||||
|
|
||||||
|
// Move grad from tempX to originX
|
||||||
tempX = _Convert(tempX + _ZeroGrad(originX), originFormat);
|
tempX = _Convert(tempX + _ZeroGrad(originX), originFormat);
|
||||||
return std::make_pair(tempX, scale);
|
return std::make_pair(tempX, scale);
|
||||||
}
|
}
|
||||||
|
@ -684,18 +733,16 @@ public:
|
||||||
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override {
|
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override {
|
||||||
VARP res;
|
VARP res;
|
||||||
if (getIsTraining()) {
|
if (getIsTraining()) {
|
||||||
Variable::prepareCompute({inputs[0]});
|
|
||||||
auto x = _Convert(inputs[0], NCHW);
|
auto x = _Convert(inputs[0], NCHW);
|
||||||
// simulate weight quant
|
// simulate weight quant
|
||||||
auto weightScale = _Maximum(_ReduceMax(_Abs(mWeight), {1, 2, 3}, true), _Scalar<float>(1E-6)) * mLimitScale;
|
auto weightScale = _Maximum(_ReduceMax(_Abs(mWeight), {1, 2, 3}, true), _Scalar<float>(1E-6)) * mLimitScale;
|
||||||
weightScale.fix(VARP::CONSTANT);
|
|
||||||
auto weightTemp = _Round(mWeight * _Reciprocal(weightScale)) * weightScale;
|
auto weightTemp = _Round(mWeight * _Reciprocal(weightScale)) * weightScale;
|
||||||
weightTemp = weightTemp + _ZeroGrad(mWeight);
|
weightTemp = weightTemp + _ZeroGrad(mWeight);
|
||||||
|
|
||||||
// simulate input quant to get original input scale
|
// simulate input quant to get original input scale
|
||||||
auto inputPair = fakeQuantFeature(x);
|
auto inputPair = fakeQuantFeature(x);
|
||||||
mInputScale = updateScale(mInputScale, inputPair.second);
|
mInputScale = updateScale(mInputScale, inputPair.second);
|
||||||
mInputScale.fix(VARP::CONSTANT);
|
setParameter(mInputScale, mInputScalePos);
|
||||||
|
|
||||||
// simulate output quant to get original output scale
|
// simulate output quant to get original output scale
|
||||||
res = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride,
|
res = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride,
|
||||||
|
@ -709,10 +756,9 @@ public:
|
||||||
|
|
||||||
res = _activate(res, mActivation);
|
res = _activate(res, mActivation);
|
||||||
|
|
||||||
Variable::prepareCompute({conv, res});
|
|
||||||
auto outputPair = fakeQuantFeature(res);
|
auto outputPair = fakeQuantFeature(res);
|
||||||
mOutputScale = updateScale(mOutputScale, outputPair.second);
|
mOutputScale = updateScale(mOutputScale, outputPair.second);
|
||||||
mOutputScale.fix(VARP::CONSTANT);
|
setParameter(mOutputScale, mOutputScalePos);
|
||||||
res = outputPair.first;
|
res = outputPair.first;
|
||||||
} else {
|
} else {
|
||||||
if (nullptr == mInputScale) {
|
if (nullptr == mInputScale) {
|
||||||
|
@ -725,6 +771,7 @@ public:
|
||||||
auto x = _Convert(inputs[0], NCHW);
|
auto x = _Convert(inputs[0], NCHW);
|
||||||
auto inputPair = fakeQuantFeature(x);
|
auto inputPair = fakeQuantFeature(x);
|
||||||
mInputScale = inputPair.second;
|
mInputScale = inputPair.second;
|
||||||
|
setParameter(mInputScale, mInputScalePos);
|
||||||
inputPair.first.fix(VARP::CONSTANT);
|
inputPair.first.fix(VARP::CONSTANT);
|
||||||
|
|
||||||
auto simuRes = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride,
|
auto simuRes = _Conv(weightTemp, mBias, _Convert(inputPair.first, NC4HW4), mOption.padMode, mOption.stride,
|
||||||
|
@ -737,6 +784,7 @@ public:
|
||||||
Variable::prepareCompute({simuRes});
|
Variable::prepareCompute({simuRes});
|
||||||
auto outputPair = fakeQuantFeature(simuRes);
|
auto outputPair = fakeQuantFeature(simuRes);
|
||||||
mOutputScale = outputPair.second;
|
mOutputScale = outputPair.second;
|
||||||
|
setParameter(mOutputScale, mOutputScalePos);
|
||||||
outputPair.first.fix(VARP::CONSTANT);
|
outputPair.first.fix(VARP::CONSTANT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -772,12 +820,7 @@ public:
|
||||||
{
|
{
|
||||||
std::vector<int> dims = {x->getInfo()->dim[1]};
|
std::vector<int> dims = {x->getInfo()->dim[1]};
|
||||||
auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>());
|
auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>());
|
||||||
VARP channelScale;
|
VARP channelScale = _Reciprocal(_Fill(dimVar, mInputScale));
|
||||||
if (mFeatureScaleStatMethod == NN::PerTensor) {
|
|
||||||
channelScale = _Reciprocal(_Fill(dimVar, mInputScale));
|
|
||||||
} else {
|
|
||||||
channelScale = _Reciprocal(mInputScale);
|
|
||||||
}
|
|
||||||
x = _FloatToInt8(x, channelScale, -127, 127);// TODO add clamp
|
x = _FloatToInt8(x, channelScale, -127, 127);// TODO add clamp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -824,12 +867,7 @@ public:
|
||||||
{
|
{
|
||||||
std::vector<int> dims = {res->getInfo()->dim[1]};
|
std::vector<int> dims = {res->getInfo()->dim[1]};
|
||||||
auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>());
|
auto dimVar = _Const(dims.data(), {1}, NCHW, halide_type_of<int32_t>());
|
||||||
VARP channelScale;
|
VARP channelScale = _Fill(dimVar, mOutputScale);
|
||||||
if (mFeatureScaleStatMethod == NN::PerTensor) {
|
|
||||||
channelScale = _Fill(dimVar, mOutputScale);
|
|
||||||
} else {
|
|
||||||
channelScale = mOutputScale;
|
|
||||||
}
|
|
||||||
res = _Int8ToFloat(res, channelScale);
|
res = _Int8ToFloat(res, channelScale);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -838,6 +876,34 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
ConvBNReluFusedModule() = default;
|
||||||
|
|
||||||
|
Module* clone(CloneContext* ctx) const override {
|
||||||
|
ConvBNReluFusedModule* module(new ConvBNReluFusedModule);
|
||||||
|
module->mConvParameter = mConvParameter;
|
||||||
|
module->mConvParameter.weight = ctx->getOrClone(mConvParameter.weight);
|
||||||
|
module->mConvParameter.bias = ctx->getOrClone(mConvParameter.bias);
|
||||||
|
module->mOption = mOption;
|
||||||
|
module->mGroup = mGroup;
|
||||||
|
module->mWeight = ctx->getOrClone(mWeight);
|
||||||
|
module->mBias = ctx->getOrClone(mBias);
|
||||||
|
module->mActivation = mActivation;
|
||||||
|
module->mLimitScale = ctx->getOrClone(mLimitScale);
|
||||||
|
module->mInputScalePos = mInputScalePos;
|
||||||
|
module->mOutputScalePos = mOutputScalePos;
|
||||||
|
module->mInputScale = ctx->getOrClone(mInputScale);
|
||||||
|
module->mOutputScale = ctx->getOrClone(mOutputScale);
|
||||||
|
module->mClampValue = ctx->getOrClone(mClampValue);
|
||||||
|
module->mMomentum = mMomentum;
|
||||||
|
module->mFeatureScaleStatMethod = mFeatureScaleStatMethod;
|
||||||
|
module->mScaleUpdateMethod = mScaleUpdateMethod;
|
||||||
|
if (mBatchNorm) {
|
||||||
|
module->mBatchNorm.reset(mBatchNorm->clone(ctx));
|
||||||
|
module->registerModel({module->mBatchNorm});
|
||||||
|
}
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
|
||||||
NN::ConvParameters mConvParameter;
|
NN::ConvParameters mConvParameter;
|
||||||
NN::ConvOption mOption;
|
NN::ConvOption mOption;
|
||||||
int mGroup;
|
int mGroup;
|
||||||
|
@ -846,6 +912,8 @@ private:
|
||||||
NN::ActivationFunctionType mActivation = NN::ActivationFunctionType::None;
|
NN::ActivationFunctionType mActivation = NN::ActivationFunctionType::None;
|
||||||
std::shared_ptr<Module> mBatchNorm = nullptr;
|
std::shared_ptr<Module> mBatchNorm = nullptr;
|
||||||
VARP mLimitScale;
|
VARP mLimitScale;
|
||||||
|
int mInputScalePos = -1;
|
||||||
|
int mOutputScalePos = -1;
|
||||||
VARP mInputScale = nullptr;
|
VARP mInputScale = nullptr;
|
||||||
VARP mOutputScale = nullptr;
|
VARP mOutputScale = nullptr;
|
||||||
VARP mClampValue;
|
VARP mClampValue;
|
||||||
|
@ -870,5 +938,5 @@ Module* NN::ConvInt8(const ConvParameters& para, int bits, NN::FeatureScaleStatM
|
||||||
return new ConvBNReluFusedModule({conv}, featureMethod, method, bits);
|
return new ConvBNReluFusedModule({conv}, featureMethod, method, bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Train
|
} // namespace Express
|
||||||
} // namespace MNN
|
} // namespace MNN
|
|
@ -0,0 +1,761 @@
|
||||||
|
//
|
||||||
|
// PipelineModule.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2020/01/09.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "PipelineModule.hpp"
|
||||||
|
#include "MNN_generated.h"
|
||||||
|
#include <set>
|
||||||
|
#include <vector>
|
||||||
|
#include "StaticModule.hpp"
|
||||||
|
#include "IfModule.hpp"
|
||||||
|
#include "WhileModule.hpp"
|
||||||
|
using namespace MNN::Express;
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
//#define DYNAMIC
|
||||||
|
#define PIPELINE_MODULE "_pipeline_module__"
|
||||||
|
class ExprModule : public Module {
|
||||||
|
public:
|
||||||
|
ExprModule(EXPRP expr) {
|
||||||
|
mExpr = expr;
|
||||||
|
setName(expr->name());
|
||||||
|
mInputs = expr->inputs();
|
||||||
|
auto op = mExpr->get();
|
||||||
|
if (op) {
|
||||||
|
auto typeName = EnumNameOpType(op->type());
|
||||||
|
setType(typeName);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < mInputs.size(); ++i) {
|
||||||
|
auto inputExpr = mInputs[i]->expr().first;
|
||||||
|
if (inputExpr->get() != nullptr) {
|
||||||
|
mInputs[i] = nullptr;
|
||||||
|
mInputIndexes.emplace_back(i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
switch (inputExpr->inputType()) {
|
||||||
|
case VARP::INPUT:
|
||||||
|
mInputs[i] = nullptr;
|
||||||
|
mInputIndexes.emplace_back(i);
|
||||||
|
break;
|
||||||
|
case VARP::CONSTANT:
|
||||||
|
break;
|
||||||
|
case VARP::TRAINABLE:
|
||||||
|
addParameter(mInputs[i]);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
virtual std::vector<VARP> onForward(const std::vector<VARP>& inputs) override {
|
||||||
|
MNN_ASSERT(mInputIndexes.size() == inputs.size());
|
||||||
|
if (nullptr == mExpr->get()) {
|
||||||
|
return {Variable::create(mExpr)};
|
||||||
|
}
|
||||||
|
std::vector<VARP> tempInputs = mInputs;
|
||||||
|
for (int i = 0; i < inputs.size(); ++i) {
|
||||||
|
tempInputs[mInputIndexes[i]] = inputs[i];
|
||||||
|
}
|
||||||
|
std::vector<VARP> outputVars;
|
||||||
|
auto newExpr = Expr::create(mExpr->extra(), std::move(tempInputs), mExpr->outputSize());
|
||||||
|
newExpr->setName(mExpr->name());
|
||||||
|
for (int i = 0; i < mExpr->outputSize(); ++i) {
|
||||||
|
outputVars.emplace_back(Variable::create(newExpr, i));
|
||||||
|
}
|
||||||
|
return outputVars;
|
||||||
|
}
|
||||||
|
const std::vector<int>& inputIndexes() const {
|
||||||
|
return mInputIndexes;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Module* clone(CloneContext* ctx) const override {
|
||||||
|
ExprModule* module(new ExprModule(ctx->getOrClone(mExpr)));
|
||||||
|
for (const VARP& var : mInputs) {
|
||||||
|
module->mInputs.push_back(ctx->getOrClone(var));
|
||||||
|
}
|
||||||
|
module->mInputIndexes = mInputIndexes;
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPRP mExpr;
|
||||||
|
std::vector<VARP> mInputs;
|
||||||
|
std::vector<int> mInputIndexes;
|
||||||
|
};
|
||||||
|
|
||||||
|
Module* PipelineModule::extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph) {
|
||||||
|
std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(EXPRP)> transformFunction;
|
||||||
|
if (fortrain) {
|
||||||
|
transformFunction =
|
||||||
|
[&subGraph](EXPRP source) {
|
||||||
|
if (source->get() == nullptr) {
|
||||||
|
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
|
||||||
|
}
|
||||||
|
std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
|
||||||
|
if (nullptr != m) {
|
||||||
|
m->setName(source->name());
|
||||||
|
return std::make_pair(std::vector<int>{}, m);
|
||||||
|
}
|
||||||
|
auto convExtracted = NN::Utils::ExtractConvolution(source);
|
||||||
|
if (convExtracted.weight == nullptr) {
|
||||||
|
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
|
||||||
|
}
|
||||||
|
std::shared_ptr<Module> module(NN::Conv(convExtracted));
|
||||||
|
module->setName(source->name());
|
||||||
|
return std::make_pair(std::vector<int>{0}, module);
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
transformFunction = [&subGraph](EXPRP source) {
|
||||||
|
if (source->get() == nullptr) {
|
||||||
|
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
|
||||||
|
}
|
||||||
|
std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
|
||||||
|
if (nullptr != m) {
|
||||||
|
m->setName(source->name());
|
||||||
|
return std::make_pair(std::vector<int>{}, m);
|
||||||
|
}
|
||||||
|
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return new PipelineModule(inputs, outputs, transformFunction);
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineModule::PipelineModule(std::vector<VARP> inputs, std::vector<VARP> outputs, const Transformer& transformFunction) {
|
||||||
|
setType(PIPELINE_MODULE);
|
||||||
|
std::vector<EXPRP> executeOrder;
|
||||||
|
std::set<EXPRP> inputExpr;
|
||||||
|
for (auto v : inputs) {
|
||||||
|
inputExpr.insert(v->expr().first);
|
||||||
|
}
|
||||||
|
for (auto output : outputs) {
|
||||||
|
Expr::visit(output->expr().first,
|
||||||
|
[&executeOrder, &inputExpr](EXPRP expr) {
|
||||||
|
if (expr->visited()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (inputExpr.find(expr)!= inputExpr.end()) {
|
||||||
|
expr->setVisited(true);
|
||||||
|
executeOrder.emplace_back(expr);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
[&executeOrder](EXPRP expr) {
|
||||||
|
//FUNC_PRINT_ALL(var->name().c_str(), s);
|
||||||
|
if (!expr->visited()) {
|
||||||
|
executeOrder.emplace_back(expr);
|
||||||
|
expr->setVisited(true);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
for (auto expr : executeOrder) {
|
||||||
|
expr->setVisited(false);
|
||||||
|
}
|
||||||
|
// Set Indexes
|
||||||
|
std::map<EXPRP, int> indexes;
|
||||||
|
int currentIndexes = 0;
|
||||||
|
for (auto expr : executeOrder) {
|
||||||
|
indexes[expr] = currentIndexes;
|
||||||
|
currentIndexes += expr->outputSize();
|
||||||
|
}
|
||||||
|
std::set<EXPRP> inputSets;
|
||||||
|
mInputIndexes.clear();
|
||||||
|
mStackSize = currentIndexes;
|
||||||
|
for (auto v : inputs) {
|
||||||
|
auto inputExpr = v->expr();
|
||||||
|
mInputIndexes.emplace_back(indexes[inputExpr.first] + inputExpr.second);
|
||||||
|
inputSets.insert(inputExpr.first);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create All SubModule
|
||||||
|
for (auto expr : executeOrder) {
|
||||||
|
if (inputSets.find(expr) != inputSets.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
std::pair<std::vector<int>, std::shared_ptr<Module> > moduleResult;
|
||||||
|
bool extracted = false;
|
||||||
|
if (!transformFunction) {
|
||||||
|
moduleResult = std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
|
||||||
|
} else {
|
||||||
|
moduleResult = transformFunction(expr);
|
||||||
|
}
|
||||||
|
if (moduleResult.second == nullptr) {
|
||||||
|
std::shared_ptr<Module> module(new ExprModule(expr));
|
||||||
|
moduleResult.first = ((ExprModule*)module.get())->inputIndexes();
|
||||||
|
moduleResult.second = module;
|
||||||
|
} else {
|
||||||
|
extracted = true;
|
||||||
|
}
|
||||||
|
auto subInputs = expr->inputs();
|
||||||
|
auto& exprInputIndexes = moduleResult.first;
|
||||||
|
std::vector<int> inputIndexes;
|
||||||
|
if (exprInputIndexes.empty() && extracted) {
|
||||||
|
inputIndexes.resize(subInputs.size());
|
||||||
|
for (int i = 0; i < inputIndexes.size(); ++i) {
|
||||||
|
auto inputExpr = subInputs[i]->expr();
|
||||||
|
inputIndexes[i] = indexes[inputExpr.first] + inputExpr.second;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
inputIndexes.resize(exprInputIndexes.size());
|
||||||
|
for (int i = 0; i < inputIndexes.size(); ++i) {
|
||||||
|
auto inputExpr = subInputs[exprInputIndexes[i]]->expr();
|
||||||
|
inputIndexes[i] = indexes[inputExpr.first] + inputExpr.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::vector<int> outputIndexes(expr->outputSize());
|
||||||
|
for (int i = 0; i < outputIndexes.size(); ++i) {
|
||||||
|
outputIndexes[i] = indexes[expr] + i;
|
||||||
|
}
|
||||||
|
mSubModules.emplace_back(std::make_tuple(moduleResult.second, inputIndexes, outputIndexes));
|
||||||
|
registerModel({moduleResult.second});
|
||||||
|
}
|
||||||
|
mOutputIndexes.clear();
|
||||||
|
for (auto output : outputs) {
|
||||||
|
auto outputExpr = output->expr();
|
||||||
|
mOutputIndexes.emplace_back(indexes[outputExpr.first] + outputExpr.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool PipelineModule::turnQuantize(Module* module, const int bit, NN::FeatureScaleStatMethod featureScaleStatMethod, NN::ScaleUpdateMethod scaleUpdateMethod) {
|
||||||
|
if (nullptr == module || module->type() != PIPELINE_MODULE) {
|
||||||
|
MNN_ERROR("Invalide module for quantized\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
((PipelineModule*)module)->toTrainQuant(bit, featureScaleStatMethod, scaleUpdateMethod);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> PipelineModule::countOutputReference(std::vector<int> outputIndices) {
|
||||||
|
MNN_ASSERT(outputIndices.size() > 0);
|
||||||
|
std::vector<int> countResult(outputIndices.size(), 0);
|
||||||
|
|
||||||
|
for (int i = 0; i < mSubModules.size(); i++) {
|
||||||
|
auto &m = mSubModules[i];
|
||||||
|
auto& theModule = std::get<0>(m);
|
||||||
|
auto name = theModule->name();
|
||||||
|
auto &inputIndices = std::get<1>(m);
|
||||||
|
|
||||||
|
for (int j = 0; j < inputIndices.size(); j++) {
|
||||||
|
int index = inputIndices[j];
|
||||||
|
for (int k = 0; k < countResult.size(); k++) {
|
||||||
|
if (index == outputIndices[k]) {
|
||||||
|
countResult[k]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return countResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PipelineModule::toTrainQuant(const int bits, NN::FeatureScaleStatMethod featureScaleStatMethod,
|
||||||
|
NN::ScaleUpdateMethod scaleUpdateMethod) {
|
||||||
|
std::vector<int> needEraseIndices;
|
||||||
|
|
||||||
|
for (int i = 0; i < mSubModules.size(); i++) {
|
||||||
|
auto& m = mSubModules[i];
|
||||||
|
auto& theModule = std::get<0>(m);
|
||||||
|
auto moduleType = theModule->type();
|
||||||
|
//auto& inputIndices = std::get<1>(m);
|
||||||
|
auto& outputIndices = std::get<2>(m);
|
||||||
|
|
||||||
|
if (moduleType == "Conv" && i < mSubModules.size() - 1) {
|
||||||
|
auto& p1 = mSubModules[i+1];
|
||||||
|
auto p1Module = std::get<0>(p1);
|
||||||
|
auto& p1ModuleType = p1Module->type();
|
||||||
|
auto& p1InputIndices = std::get<1>(p1);
|
||||||
|
auto& p1OutputIndices = std::get<2>(p1);
|
||||||
|
|
||||||
|
auto convOutputCount = countOutputReference(outputIndices);
|
||||||
|
bool convSingleOutputReference = ((outputIndices.size() == 1) && (convOutputCount[0] == 1));
|
||||||
|
|
||||||
|
// only conv
|
||||||
|
if ((!convSingleOutputReference) || (p1ModuleType == "Conv") ||
|
||||||
|
(p1ModuleType != "BatchNorm" && p1ModuleType != "ReLU" && p1ModuleType != "ReLU6")) {
|
||||||
|
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||||
|
registerModel({theModule});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// conv + bn + ?
|
||||||
|
if (p1ModuleType == "BatchNorm") {
|
||||||
|
bool convBnConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
|
||||||
|
if (!convBnConnected) {
|
||||||
|
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||||
|
registerModel({theModule});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// last conv + bn
|
||||||
|
if (i == mSubModules.size() - 2) {
|
||||||
|
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||||
|
registerModel({theModule});
|
||||||
|
outputIndices = p1OutputIndices;
|
||||||
|
needEraseIndices.emplace_back(i + 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// maybe there is a relu or relu6 after conv + bn
|
||||||
|
auto& p2 = mSubModules[i+2];
|
||||||
|
auto& p2Module = std::get<0>(p2);
|
||||||
|
auto p2ModuleType = p2Module->type();
|
||||||
|
auto& p2InputIndices = std::get<1>(p2);
|
||||||
|
auto& p2OutputIndices = std::get<2>(p2);
|
||||||
|
|
||||||
|
auto bnOutputCount = countOutputReference(p1OutputIndices);
|
||||||
|
bool bnSingleOutputReference = ((p1OutputIndices.size() == 1) && (bnOutputCount[0] == 1));
|
||||||
|
|
||||||
|
// only conv + bn
|
||||||
|
if ((!bnSingleOutputReference) || (p2ModuleType != "ReLU" && p2ModuleType != "ReLU6")) {
|
||||||
|
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||||
|
registerModel({theModule});
|
||||||
|
outputIndices = p1OutputIndices;
|
||||||
|
needEraseIndices.emplace_back(i + 1);
|
||||||
|
continue;
|
||||||
|
} else { // conv + bn + relu or conv + bn + relu6
|
||||||
|
bool convBnReluConnected = ((bnSingleOutputReference) && (p2InputIndices.size() == 1) && (p2InputIndices[0] == p1OutputIndices[0]));
|
||||||
|
if (!convBnReluConnected) {
|
||||||
|
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||||
|
registerModel({theModule});
|
||||||
|
outputIndices = p1OutputIndices;
|
||||||
|
needEraseIndices.emplace_back(i + 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
theModule.reset(NN::ConvBNReluFused({theModule, p1Module, p2Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||||
|
registerModel({theModule});
|
||||||
|
outputIndices = p2OutputIndices;
|
||||||
|
needEraseIndices.emplace_back(i + 1);
|
||||||
|
needEraseIndices.emplace_back(i + 2);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// conv + relu or conv + relu6
|
||||||
|
if (p1ModuleType == "ReLU" || p1ModuleType == "ReLU6") {
|
||||||
|
bool convReluConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
|
||||||
|
if (!convReluConnected) {
|
||||||
|
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||||
|
registerModel({theModule});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||||
|
registerModel({theModule});
|
||||||
|
outputIndices = p1OutputIndices;
|
||||||
|
needEraseIndices.emplace_back(i + 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == mSubModules.size() - 1 && moduleType == "Conv") {
|
||||||
|
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
|
||||||
|
registerModel({theModule});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// erase useless submodules
|
||||||
|
const int eraseSize = needEraseIndices.size();
|
||||||
|
int alreadyErasedCount = 0;
|
||||||
|
for (int i = 0; i < eraseSize; i++) {
|
||||||
|
auto position = needEraseIndices[i] - alreadyErasedCount;
|
||||||
|
auto type = std::get<0>(mSubModules[position])->type();
|
||||||
|
MNN_ASSERT(type == "BatchNorm" || type == "ReLU" || type == "ReLU6");
|
||||||
|
mSubModules.erase(mSubModules.begin() + position);
|
||||||
|
alreadyErasedCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<VARP> PipelineModule::onForward(const std::vector<VARP>& inputs) {
|
||||||
|
std::vector<VARP> mStack(mStackSize);
|
||||||
|
for (int i = 0; i < mInputIndexes.size(); ++i) {
|
||||||
|
mStack[mInputIndexes[i]] = inputs[i];
|
||||||
|
}
|
||||||
|
for (int index = 0; index < mSubModules.size(); ++index) {
|
||||||
|
auto& m = mSubModules[index];
|
||||||
|
std::vector<VARP> tempInputs(std::get<1>(m).size());
|
||||||
|
for (int i = 0; i < tempInputs.size(); ++i) {
|
||||||
|
tempInputs[i] = mStack[std::get<1>(m)[i]];
|
||||||
|
MNN_ASSERT(nullptr != tempInputs[i]);
|
||||||
|
}
|
||||||
|
std::vector<VARP> tempOutputs = std::get<0>(m)->onForward(tempInputs);
|
||||||
|
MNN_ASSERT(tempOutputs.size() == std::get<2>(m).size());
|
||||||
|
for (int i = 0; i < tempOutputs.size(); ++i) {
|
||||||
|
mStack[std::get<2>(m)[i]] = tempOutputs[i];
|
||||||
|
MNN_ASSERT(nullptr != tempOutputs[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::vector<VARP> outputs(mOutputIndexes.size());
|
||||||
|
for (int i = 0; i < mOutputIndexes.size(); ++i) {
|
||||||
|
outputs[i] = mStack[mOutputIndexes[i]];
|
||||||
|
}
|
||||||
|
return outputs;
|
||||||
|
}
|
||||||
|
void PipelineModule::onClearCache() {
|
||||||
|
// Do nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::map<std::string, SubGraph> _createSubGraph(const MNN::Net* net, bool dynamic) {
|
||||||
|
std::map<std::string, SubGraph> subGraphMap;
|
||||||
|
auto subGraphs = net->subgraphs();
|
||||||
|
if (nullptr == subGraphs) {
|
||||||
|
return subGraphMap;
|
||||||
|
}
|
||||||
|
for (int i=0; i<subGraphs->size(); ++i) {
|
||||||
|
auto graph = subGraphs->GetAs<SubGraphProto>(i);
|
||||||
|
std::vector<std::string> subInputs;
|
||||||
|
std::vector<std::string> subOutputs;
|
||||||
|
if (nullptr != graph->inputs()) {
|
||||||
|
for (int v=0; v<graph->inputs()->size(); ++v) {
|
||||||
|
auto index = graph->inputs()->data()[v];
|
||||||
|
subInputs.emplace_back(graph->tensors()->GetAsString(index)->str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int v=0; v<graph->outputs()->size(); ++v) {
|
||||||
|
auto index = graph->outputs()->data()[v];
|
||||||
|
subOutputs.emplace_back(graph->tensors()->GetAsString(index)->str());
|
||||||
|
}
|
||||||
|
// Pack to Net for loading
|
||||||
|
std::shared_ptr<Module> submodule;
|
||||||
|
{
|
||||||
|
std::unique_ptr<SubGraphProtoT> _tempInfo(graph->UnPack());
|
||||||
|
std::unique_ptr<NetT> _tempNet(new NetT);
|
||||||
|
_tempNet->oplists = std::move(_tempInfo->nodes);
|
||||||
|
_tempNet->tensorName = std::move(_tempInfo->tensors);
|
||||||
|
flatbuffers::FlatBufferBuilder builder(1024);
|
||||||
|
auto offset = Net::Pack(builder, _tempNet.get());
|
||||||
|
builder.Finish(offset);
|
||||||
|
if (dynamic) {
|
||||||
|
submodule.reset(PipelineModule::load(subInputs, subOutputs, (const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), dynamic));
|
||||||
|
} else {
|
||||||
|
submodule.reset(new StaticModule((const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), subInputs, subOutputs));
|
||||||
|
}
|
||||||
|
if (graph->name() != nullptr) {
|
||||||
|
submodule->setName(graph->name()->str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto key = graph->name()->str();
|
||||||
|
SubGraph subgraph;
|
||||||
|
subgraph.inputs = std::move(subInputs);
|
||||||
|
subgraph.outputs = std::move(subOutputs);
|
||||||
|
subgraph.m = submodule;
|
||||||
|
subGraphMap.insert(std::make_pair(key, subgraph));
|
||||||
|
}
|
||||||
|
return subGraphMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SubModuleInfo {
|
||||||
|
std::vector<int> opList;
|
||||||
|
std::vector<int> inputs;;
|
||||||
|
std::vector<int> outputs;
|
||||||
|
std::vector<uint8_t> tensorMask;
|
||||||
|
};
|
||||||
|
static std::vector<SubModuleInfo> _createSubModuleInfo(const MNN::Net* net, const std::set<int>& inputIndexes, const std::set<int>& outputIndexes) {
|
||||||
|
std::vector<SubModuleInfo> submodule;
|
||||||
|
SubModuleInfo current;
|
||||||
|
std::vector<int> inputOps;
|
||||||
|
|
||||||
|
// Seperate the graph to serveral submodule
|
||||||
|
for (int i=0; i<net->oplists()->size(); ++i) {
|
||||||
|
auto op = net->oplists()->GetAs<Op>(i);
|
||||||
|
// Collect Input
|
||||||
|
if (op->type() == OpType_Input) {
|
||||||
|
inputOps.emplace_back(i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (op->type() == OpType_If || op->type() == OpType_While) {
|
||||||
|
if (current.opList.size() > 0) {
|
||||||
|
// Not empty
|
||||||
|
submodule.emplace_back(std::move(current));
|
||||||
|
}
|
||||||
|
SubModuleInfo controlOp;
|
||||||
|
controlOp.opList = {i};
|
||||||
|
submodule.emplace_back(std::move(controlOp));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
current.opList.emplace_back(i);
|
||||||
|
}
|
||||||
|
if (!current.opList.empty()) {
|
||||||
|
submodule.emplace_back(std::move(current));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**Compute All SubModule's inputs and outputs*/
|
||||||
|
// 0: not use, 1: input, 2: output, 3: mid, 4: valid output
|
||||||
|
for (int moduleIndex=0; moduleIndex < submodule.size(); ++moduleIndex) {
|
||||||
|
auto& m = submodule[moduleIndex];
|
||||||
|
if (1 == m.opList.size()) {
|
||||||
|
// Fast way to determine
|
||||||
|
auto op = net->oplists()->GetAs<Op>(m.opList[0]);
|
||||||
|
if (nullptr != op->inputIndexes()) {
|
||||||
|
m.inputs.resize(op->inputIndexes()->size());
|
||||||
|
::memcpy(m.inputs.data(), op->inputIndexes()->data(), m.inputs.size() * sizeof(int));
|
||||||
|
}
|
||||||
|
if (nullptr != op->outputIndexes()) {
|
||||||
|
m.outputs.resize(op->outputIndexes()->size());
|
||||||
|
::memcpy(m.outputs.data(), op->outputIndexes()->data(), m.outputs.size() * sizeof(int));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
m.tensorMask = std::vector<uint8_t>(net->tensorName()->size(), 0);
|
||||||
|
auto& tensorMask = m.tensorMask;
|
||||||
|
for (auto opIndex : m.opList) {
|
||||||
|
auto op = net->oplists()->GetAs<Op>(opIndex);
|
||||||
|
if (nullptr != op->inputIndexes()) {
|
||||||
|
for (int v=0; v<op->inputIndexes()->size(); ++v) {
|
||||||
|
auto index = op->inputIndexes()->data()[v];
|
||||||
|
tensorMask[index] = tensorMask[index] | 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (nullptr != op->outputIndexes()) {
|
||||||
|
for (int v=0; v<op->outputIndexes()->size(); ++v) {
|
||||||
|
auto index = op->outputIndexes()->data()[v];
|
||||||
|
tensorMask[index] = tensorMask[index] | 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i=0; i<tensorMask.size(); ++i) {
|
||||||
|
if (0 == tensorMask[i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (1 == tensorMask[i]) {
|
||||||
|
m.inputs.emplace_back(i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (2 == tensorMask[i]) {
|
||||||
|
m.outputs.emplace_back(i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (3 == tensorMask[i]) {
|
||||||
|
if (outputIndexes.find(i) != outputIndexes.end()) {
|
||||||
|
m.outputs.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check if the module's input is valid
|
||||||
|
for (int i=0; i<m.inputs.size(); ++i) {
|
||||||
|
auto index = m.inputs[i];
|
||||||
|
if (inputIndexes.find(index) != inputIndexes.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
bool find = false;
|
||||||
|
for (int sub=0; sub < moduleIndex; ++sub) {
|
||||||
|
for (auto out : submodule[sub].outputs) {
|
||||||
|
if (out == index) {
|
||||||
|
find = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (find) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (find) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Find from module
|
||||||
|
for (int sub=0; sub < moduleIndex; ++sub) {
|
||||||
|
if (submodule[sub].tensorMask.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (submodule[sub].tensorMask[index] == 2) {
|
||||||
|
find = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (submodule[sub].tensorMask[index] == 3) {
|
||||||
|
submodule[sub].outputs.emplace_back(index);
|
||||||
|
submodule[sub].tensorMask[index] = 2;
|
||||||
|
find = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MNN_ASSERT(find);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto& m : submodule) {
|
||||||
|
m.tensorMask.clear();
|
||||||
|
}
|
||||||
|
return submodule;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Module* _createSubModule(const MNN::Net* net, const SubModuleInfo& info, const std::map<std::string, SubGraph>& subs) {
|
||||||
|
if (1 == info.opList.size()) {
|
||||||
|
auto op = net->oplists()->GetAs<Op>(info.opList[0]);
|
||||||
|
if (OpType_If == op->type()) {
|
||||||
|
return IfModule::create(op, subs);
|
||||||
|
}
|
||||||
|
if (OpType_While == op->type()) {
|
||||||
|
return WhileModule::create(op, subs);
|
||||||
|
}
|
||||||
|
MNN_ASSERT(false);
|
||||||
|
}
|
||||||
|
std::unique_ptr<NetT> _tempNet(new NetT);
|
||||||
|
// Copy Tensor Name
|
||||||
|
_tempNet->tensorName.resize(net->tensorName()->size());
|
||||||
|
for (int i=0; i<net->tensorName()->size(); ++i) {
|
||||||
|
_tempNet->tensorName[i] = net->tensorName()->GetAsString(i)->str();
|
||||||
|
}
|
||||||
|
// Create Input node
|
||||||
|
std::vector<std::string> inputNames;
|
||||||
|
for (auto index : info.inputs) {
|
||||||
|
std::unique_ptr<OpT> inputOp(new OpT);
|
||||||
|
inputOp->outputIndexes = {index};
|
||||||
|
inputOp->type = OpType_Input;
|
||||||
|
inputOp->main.type = OpParameter_Input;
|
||||||
|
inputOp->main.value = new InputT;
|
||||||
|
inputOp->main.AsInput()->dims = {0, 0, -1, -1};
|
||||||
|
_tempNet->oplists.emplace_back(std::move(inputOp));
|
||||||
|
inputNames.emplace_back(_tempNet->tensorName[index]);
|
||||||
|
}
|
||||||
|
// Create compute node
|
||||||
|
for (auto opIndex : info.opList) {
|
||||||
|
std::unique_ptr<OpT> op(net->oplists()->GetAs<Op>(opIndex)->UnPack());
|
||||||
|
_tempNet->oplists.emplace_back(std::move(op));
|
||||||
|
}
|
||||||
|
// Get output names
|
||||||
|
std::vector<std::string> outputNames;
|
||||||
|
for (auto index : info.outputs) {
|
||||||
|
outputNames.emplace_back(_tempNet->tensorName[index]);
|
||||||
|
}
|
||||||
|
// Create Net Buffer
|
||||||
|
flatbuffers::FlatBufferBuilder builder(1024);
|
||||||
|
auto offset = Net::Pack(builder, _tempNet.get());
|
||||||
|
builder.Finish(offset);
|
||||||
|
_tempNet.reset();
|
||||||
|
return new StaticModule((const uint8_t*)builder.GetBufferPointer(), builder.GetSize(), inputNames, outputNames);
|
||||||
|
}
|
||||||
|
|
||||||
|
Module* PipelineModule::load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic) {
|
||||||
|
// Create Subgraph
|
||||||
|
auto net = GetNet(buffer);
|
||||||
|
auto subGraphs = net->subgraphs();
|
||||||
|
if (nullptr == net->oplists() || nullptr == net->tensorName()) {
|
||||||
|
MNN_ERROR("Invalid net, for null oplist or tensorName\n");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
if (!dynamic) {
|
||||||
|
if (nullptr == subGraphs) {
|
||||||
|
// Has no control flow, can just use static module
|
||||||
|
return new StaticModule(buffer, length, inputs, outputs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto subGraphMap = _createSubGraph(net, dynamic);
|
||||||
|
if (dynamic) {
|
||||||
|
// For dynamic mode
|
||||||
|
auto varMaps = Variable::loadMap(buffer, length);
|
||||||
|
std::vector<VARP> inputVars(inputs.size());
|
||||||
|
for (int i=0; i<inputs.size(); ++i) {
|
||||||
|
inputVars[i] = varMaps[inputs[i]];
|
||||||
|
}
|
||||||
|
std::vector<VARP> outputVars(outputs.size());
|
||||||
|
for (int i=0; i<outputs.size(); ++i) {
|
||||||
|
outputVars[i] = varMaps[outputs[i]];
|
||||||
|
}
|
||||||
|
return extract(inputVars, outputVars, false, subGraphMap);
|
||||||
|
}
|
||||||
|
std::set<int> inputIndexes;
|
||||||
|
std::set<int> outputIndexes;
|
||||||
|
std::map<std::string, int> inputsMap;
|
||||||
|
std::map<std::string, int> outputsMap;
|
||||||
|
for (int i=0; i<net->tensorName()->size(); ++i) {
|
||||||
|
auto tname = net->tensorName()->GetAsString(i)->str();
|
||||||
|
for (auto& s : inputs) {
|
||||||
|
if (tname == s) {
|
||||||
|
inputIndexes.emplace(i);
|
||||||
|
inputsMap.insert(std::make_pair(s, i));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto& s : outputs) {
|
||||||
|
if (tname == s) {
|
||||||
|
outputIndexes.emplace(i);
|
||||||
|
outputsMap.insert(std::make_pair(s, i));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::vector<int> inputIndexesVec(inputs.size());
|
||||||
|
for (int i=0; i<inputs.size(); ++i) {
|
||||||
|
inputIndexesVec[i] = inputsMap[inputs[i]];
|
||||||
|
}
|
||||||
|
std::vector<int> outputIndexesVec(outputs.size());
|
||||||
|
for (int i=0; i<outputs.size(); ++i) {
|
||||||
|
outputIndexesVec[i] = outputsMap[outputs[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
auto subModulesInfo = _createSubModuleInfo(net, inputIndexes, outputIndexes);
|
||||||
|
std::vector<std::shared_ptr<Module>> subModules(subModulesInfo.size());
|
||||||
|
for (int i=0; i<subModulesInfo.size(); ++i) {
|
||||||
|
subModules[i].reset(_createSubModule(net, subModulesInfo[i], subGraphMap));
|
||||||
|
}
|
||||||
|
auto result = new PipelineModule;
|
||||||
|
/**
|
||||||
|
Compute:
|
||||||
|
std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
|
||||||
|
std::vector<int> mInputIndexes;
|
||||||
|
std::vector<int> mOutputIndexes;
|
||||||
|
int mStackSize = 0;
|
||||||
|
*/
|
||||||
|
// Make Stack, first: origin, second: new
|
||||||
|
std::map<int, int> stackMap;
|
||||||
|
int stackIndex = 0;
|
||||||
|
for (auto& m : subModulesInfo) {
|
||||||
|
for (auto index : m.inputs) {
|
||||||
|
if (stackMap.find(index) == stackMap.end()) {
|
||||||
|
stackMap.insert(std::make_pair(index, stackIndex));
|
||||||
|
stackIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto index : m.outputs) {
|
||||||
|
if (stackMap.find(index) == stackMap.end()) {
|
||||||
|
stackMap.insert(std::make_pair(index, stackIndex));
|
||||||
|
stackIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result->mStackSize = stackMap.size();
|
||||||
|
for (int i=0; i<subModulesInfo.size(); ++i) {
|
||||||
|
auto& info = subModulesInfo[i];
|
||||||
|
// Reindex stack index
|
||||||
|
std::vector<int> subInputs(info.inputs.size());
|
||||||
|
for (int i=0; i<info.inputs.size(); ++i) {
|
||||||
|
subInputs[i] = stackMap[info.inputs[i]];
|
||||||
|
}
|
||||||
|
std::vector<int> subOutputs(info.outputs.size());
|
||||||
|
for (int i=0; i<info.outputs.size(); ++i) {
|
||||||
|
subOutputs[i] = stackMap[info.outputs[i]];
|
||||||
|
}
|
||||||
|
result->mSubModules.emplace_back(std::make_tuple(subModules[i], subInputs, subOutputs));
|
||||||
|
}
|
||||||
|
for (int i=0; i<inputIndexesVec.size(); ++i) {
|
||||||
|
inputIndexesVec[i] = stackMap[inputIndexesVec[i]];
|
||||||
|
}
|
||||||
|
for (int i=0; i<outputIndexesVec.size(); ++i) {
|
||||||
|
outputIndexesVec[i] = stackMap[outputIndexesVec[i]];
|
||||||
|
}
|
||||||
|
result->mInputIndexes = std::move(inputIndexesVec);
|
||||||
|
result->mOutputIndexes = std::move(outputIndexesVec);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
Module* PipelineModule::clone(CloneContext* ctx) const {
|
||||||
|
PipelineModule* module(new PipelineModule);
|
||||||
|
for (const auto& it : mSubModules) {
|
||||||
|
const std::shared_ptr<Module>& submodule = std::get<0>(it);
|
||||||
|
const std::vector<int>& input_indices = std::get<1>(it);
|
||||||
|
const std::vector<int>& output_indices = std::get<2>(it);
|
||||||
|
std::shared_ptr<Module> replica_submodule(submodule->clone(ctx));
|
||||||
|
module->mSubModules.push_back(
|
||||||
|
std::make_tuple(replica_submodule, input_indices, output_indices));
|
||||||
|
module->registerModel({replica_submodule});
|
||||||
|
}
|
||||||
|
module->mInputIndexes = mInputIndexes;
|
||||||
|
module->mOutputIndexes = mOutputIndexes;
|
||||||
|
module->mStackSize = mStackSize;
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Express
|
||||||
|
} // namespace MNN
|
|
@ -8,16 +8,20 @@
|
||||||
|
|
||||||
#ifndef PipelineModule_hpp
|
#ifndef PipelineModule_hpp
|
||||||
#define PipelineModule_hpp
|
#define PipelineModule_hpp
|
||||||
#include "Module.hpp"
|
#include <MNN/expr/Module.hpp>
|
||||||
#include "NN.hpp"
|
#include <MNN/expr/NN.hpp>
|
||||||
#include <MNN/expr/ExprCreator.hpp>
|
#include <MNN/expr/ExprCreator.hpp>
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
namespace Train {
|
namespace Express {
|
||||||
|
|
||||||
class MNN_PUBLIC PipelineModule : public Module {
|
class MNN_PUBLIC PipelineModule : public Module {
|
||||||
public:
|
public:
|
||||||
typedef std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(Express::EXPRP)> Transformer;
|
typedef std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(Express::EXPRP)> Transformer;
|
||||||
static Module* extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain);
|
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic = false);
|
||||||
|
static Module* extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph = {});
|
||||||
|
static Module* extractOrigin(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain) {
|
||||||
|
return extract(inputs, outputs, fortrain);
|
||||||
|
}
|
||||||
static bool turnQuantize(Module* module, const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor, NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
|
static bool turnQuantize(Module* module, const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor, NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
|
||||||
void toTrainQuant(const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor,
|
void toTrainQuant(const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor,
|
||||||
NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
|
NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
|
||||||
|
@ -26,14 +30,18 @@ public:
|
||||||
std::vector<int> countOutputReference(std::vector<int> outputIndices);
|
std::vector<int> countOutputReference(std::vector<int> outputIndices);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
PipelineModule(){}
|
||||||
PipelineModule(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs,
|
PipelineModule(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs,
|
||||||
const Transformer& transformFunction = {});
|
const Transformer& transformFunction = {});
|
||||||
|
|
||||||
|
Module* clone(CloneContext* ctx) const override;
|
||||||
|
|
||||||
std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
|
std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
|
||||||
std::vector<Express::VARP> mStack;
|
|
||||||
std::vector<int> mInputIndexes;
|
std::vector<int> mInputIndexes;
|
||||||
std::vector<int> mOutputIndexes;
|
std::vector<int> mOutputIndexes;
|
||||||
|
int mStackSize = 0;
|
||||||
};
|
};
|
||||||
} // namespace Train
|
} // namespace Express
|
||||||
} // namespace MNN
|
} // namespace MNN
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -0,0 +1,186 @@
|
||||||
|
//
|
||||||
|
// StaticModule.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on b'2020/09/10'.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "StaticModule.hpp"
|
||||||
|
#include <MNN/expr/ExprCreator.hpp>
|
||||||
|
#include <MNN/AutoTime.hpp>
|
||||||
|
#include "core/TensorUtils.hpp"
|
||||||
|
#include "core/Session.hpp"
|
||||||
|
#include <MNN/expr/Executor.hpp>
|
||||||
|
#include <MNN/AutoTime.hpp>
|
||||||
|
#include <MNN/expr/ExecutorScope.hpp>
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
StaticModule::StaticModule(const void* buffer, size_t length, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool shapeFix) : mInputs(inputs), mOutputs(outputs) {
|
||||||
|
mShapeFix = shapeFix;
|
||||||
|
mOutputNumbers = (int)outputs.size();
|
||||||
|
/** Compute:
|
||||||
|
std::vector<int, int> mOutputFromTensor;
|
||||||
|
std::vector<int, int> mOutputFromInput;
|
||||||
|
*/
|
||||||
|
for (int i=0; i<outputs.size(); ++i) {
|
||||||
|
auto& t = outputs[i];
|
||||||
|
bool fromInput = false;
|
||||||
|
for (int j=0; j<inputs.size(); ++j) {
|
||||||
|
if (inputs[j] == t) {
|
||||||
|
fromInput = true;
|
||||||
|
mOutputFromInput.emplace_back(std::make_pair(i, j));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fromInput) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
mOutputFromTensor.emplace_back(i);
|
||||||
|
}
|
||||||
|
if (mOutputFromTensor.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
mNet.reset(Interpreter::createFromBuffer(buffer, length));
|
||||||
|
#ifdef MNN_EXPR_ENABLE_PROFILER
|
||||||
|
mNet->setSessionMode(Interpreter::Session_Debug);
|
||||||
|
#else
|
||||||
|
mNet->setSessionMode(Interpreter::Session_Release);
|
||||||
|
#endif
|
||||||
|
if (mShapeFix) {
|
||||||
|
mNet->setSessionMode(Interpreter::Session_Input_Inside);
|
||||||
|
} else {
|
||||||
|
mNet->setSessionMode(Interpreter::Session_Input_User);
|
||||||
|
}
|
||||||
|
auto rt = Express::ExecutorScope::Current()->getRuntime();
|
||||||
|
// TODO: Add Config
|
||||||
|
ScheduleConfig config;
|
||||||
|
config.numThread = 1;
|
||||||
|
config.type = rt.first.begin()->first;
|
||||||
|
config.saveTensors = outputs;
|
||||||
|
mSession = mNet->createSession(config, rt);
|
||||||
|
mInputTensors.resize(inputs.size());
|
||||||
|
for (int i=0; i<inputs.size(); ++i) {
|
||||||
|
mInputTensors[i] = mNet->getSessionInput(mSession, inputs[i].c_str());
|
||||||
|
}
|
||||||
|
mOutputTensors.resize(mOutputFromTensor.size());
|
||||||
|
for (int i=0; i<mOutputFromTensor.size(); ++i) {
|
||||||
|
mOutputTensors[i] = mNet->getSessionOutput(mSession, outputs[mOutputFromTensor[i]].c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
StaticModule:: ~ StaticModule() {
|
||||||
|
// Do nothing
|
||||||
|
}
|
||||||
|
std::vector<Express::VARP> StaticModule::onForward(const std::vector<Express::VARP>& inputs) {
|
||||||
|
AUTOTIME;
|
||||||
|
std::vector<Express::VARP> outputs(mOutputNumbers);
|
||||||
|
for (auto& iter : mOutputFromInput) {
|
||||||
|
outputs[iter.first] = inputs[iter.second];
|
||||||
|
}
|
||||||
|
if (mOutputFromTensor.empty()) {
|
||||||
|
return outputs;
|
||||||
|
}
|
||||||
|
MNN_ASSERT(inputs.size() == mInputTensors.size());
|
||||||
|
for (int i=0; i<inputs.size(); ++i) {
|
||||||
|
auto info = inputs[i]->getInfo();
|
||||||
|
mInputTensors[i]->buffer().type = info->type;
|
||||||
|
auto des = TensorUtils::getDescribe(mInputTensors[i]);
|
||||||
|
if (info->order == Express::NCHW) {
|
||||||
|
des->dimensionFormat = MNN_DATA_FORMAT_NCHW;
|
||||||
|
}
|
||||||
|
if (info->order == Express::NHWC) {
|
||||||
|
des->dimensionFormat = MNN_DATA_FORMAT_NHWC;
|
||||||
|
}
|
||||||
|
if (info->order == Express::NC4HW4) {
|
||||||
|
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
|
||||||
|
}
|
||||||
|
mNet->resizeTensor(mInputTensors[i], info->dim);
|
||||||
|
}
|
||||||
|
if (!mShapeFix) {
|
||||||
|
for (int i=0; i<inputs.size(); ++i) {
|
||||||
|
mInputTensors[i]->buffer().host = (uint8_t*)inputs[i]->readMap<void>();
|
||||||
|
}
|
||||||
|
// FIXME: Use Interpreter's API
|
||||||
|
mSession->setNeedResize();
|
||||||
|
}
|
||||||
|
mNet->resizeSession(mSession);
|
||||||
|
if (mShapeFix) {
|
||||||
|
for (int i=0; i<inputs.size(); ++i) {
|
||||||
|
// For Shape only usage input, don't alloc memory
|
||||||
|
if (nullptr != mInputTensors[i]->host<void>()) {
|
||||||
|
::memcpy(mInputTensors[i]->host<void>(), inputs[i]->readMap<void>(), mInputTensors[i]->size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef MNN_EXPR_ENABLE_PROFILER
|
||||||
|
auto globalExecutor = ExecutorScope::Current();
|
||||||
|
Timer cost;
|
||||||
|
TensorCallBackWithInfo beforeCallBack = [&cost] (const std::vector<Tensor*>&, const OperatorInfo* info) {
|
||||||
|
cost.reset();
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
TensorCallBackWithInfo afterCallBack = [&cost, globalExecutor] (const std::vector<Tensor*>&, const OperatorInfo* info) {
|
||||||
|
auto costTimes = (float)cost.durationInUs() / 1000.0f;
|
||||||
|
globalExecutor->addOpCostTime(info->type(), costTimes);
|
||||||
|
globalExecutor->addOpFlops(info->type(), info->flops());
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
mNet->runSessionWithCallBackInfo(mSession, beforeCallBack, afterCallBack);
|
||||||
|
#else
|
||||||
|
mNet->runSession(mSession);
|
||||||
|
#endif
|
||||||
|
for (int i=0; i<mOutputTensors.size(); ++i) {
|
||||||
|
Express::Variable::Info info;
|
||||||
|
info.dim = mOutputTensors[i]->shape();
|
||||||
|
info.type = mOutputTensors[i]->getType();
|
||||||
|
auto format = TensorUtils::getDescribe(mOutputTensors[i])->dimensionFormat;
|
||||||
|
info.order = Express::NHWC;
|
||||||
|
if (format == MNN_DATA_FORMAT_NCHW) {
|
||||||
|
info.order = Express::NCHW;
|
||||||
|
} else if (format == MNN_DATA_FORMAT_NC4HW4) {
|
||||||
|
info.order = Express::NC4HW4;
|
||||||
|
}
|
||||||
|
outputs[mOutputFromTensor[i]] = Express::Variable::create(Express::Expr::create(std::move(info), mOutputTensors[i]->host<void>(), Express::VARP::CONSTANT, true), 0);
|
||||||
|
//::memcpy(outputs[i]->writeMap<void>(), mOutputTensors[i]->host<void>(), mOutputTensors[i]->size());
|
||||||
|
}
|
||||||
|
return outputs;
|
||||||
|
}
|
||||||
|
|
||||||
|
Module* StaticModule::clone(CloneContext* ctx) const {
|
||||||
|
StaticModule* module(new StaticModule);
|
||||||
|
module->mInputs = mInputs;
|
||||||
|
module->mOutputs = mOutputs;
|
||||||
|
|
||||||
|
module->mShapeFix = mShapeFix;
|
||||||
|
module->mOutputNumbers = mOutputNumbers;
|
||||||
|
module->mOutputFromInput = mOutputFromInput;
|
||||||
|
module->mOutputFromTensor = mOutputFromTensor;
|
||||||
|
if (mOutputFromTensor.empty()) {
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
|
||||||
|
module->mNet = mNet;
|
||||||
|
|
||||||
|
auto rt = Express::ExecutorScope::Current()->getRuntime();
|
||||||
|
ScheduleConfig config;
|
||||||
|
config.numThread = 1;
|
||||||
|
config.type = rt.first.begin()->first;
|
||||||
|
config.saveTensors = mOutputs;
|
||||||
|
module->mSession = module->mNet->createSession(config, rt);
|
||||||
|
|
||||||
|
module->mInputTensors.resize(mInputs.size());
|
||||||
|
module->mOutputTensors.resize(mOutputFromTensor.size());
|
||||||
|
for (int i=0; i<mInputs.size(); ++i) {
|
||||||
|
module->mInputTensors[i] =
|
||||||
|
module->mNet->getSessionInput(module->mSession, mInputs[i].c_str());
|
||||||
|
}
|
||||||
|
for (int i=0; i<mOutputFromTensor.size(); ++i) {
|
||||||
|
module->mOutputTensors[i] = module->mNet->getSessionOutput(
|
||||||
|
module->mSession, mOutputs[mOutputFromTensor[i]].c_str());
|
||||||
|
}
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
//
|
||||||
|
// StaticModule.hpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on b'2020/09/10'.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef StaticModule_hpp
|
||||||
|
#define StaticModule_hpp
|
||||||
|
|
||||||
|
#include <MNN/expr/Module.hpp>
|
||||||
|
#include <MNN/Interpreter.hpp>
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
class StaticModule : public Module {
|
||||||
|
public:
|
||||||
|
StaticModule(const void* buffer, size_t length, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool shapeFix = false);
|
||||||
|
virtual ~ StaticModule();
|
||||||
|
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
StaticModule() = default;
|
||||||
|
|
||||||
|
Module* clone(CloneContext* ctx) const override;
|
||||||
|
|
||||||
|
std::vector<std::string> mInputs;
|
||||||
|
std::vector<std::string> mOutputs;
|
||||||
|
|
||||||
|
std::shared_ptr<Interpreter> mNet;
|
||||||
|
Session* mSession;
|
||||||
|
std::vector<Tensor*> mInputTensors;
|
||||||
|
std::vector<Tensor*> mOutputTensors;
|
||||||
|
bool mShapeFix;
|
||||||
|
int mOutputNumbers;
|
||||||
|
|
||||||
|
// First: outputIndex, Second: outputTensor Index
|
||||||
|
std::vector<int> mOutputFromTensor;
|
||||||
|
// First: outputIndex, Second: input var index
|
||||||
|
std::vector<std::pair<int, int>> mOutputFromInput;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
|
@ -0,0 +1,186 @@
|
||||||
|
//
|
||||||
|
// WhileModule.cpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on b'2020/09/10'.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "WhileModule.hpp"
|
||||||
|
#include <MNN/expr/ExprCreator.hpp>
|
||||||
|
#include "MNN_generated.h"
|
||||||
|
//#define MNN_OPEN_TIME_TRACE
|
||||||
|
#include <MNN/AutoTime.hpp>
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
static int _findPos(const std::vector<std::string>& names, const std::string& key) {
|
||||||
|
for (int i=0; i<names.size(); ++i) {
|
||||||
|
if (names[i] == key) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
WhileModule* WhileModule::create(const Op* op, const std::map<std::string, SubGraph>& subGraph) {
|
||||||
|
auto module = new WhileModule;
|
||||||
|
auto whileParam = op->main_as_WhileParam();
|
||||||
|
auto& body = subGraph.find(whileParam->body_graph()->str())->second;
|
||||||
|
auto& cond = subGraph.find(whileParam->cond_graph()->str())->second;
|
||||||
|
module->mBody = body.m;
|
||||||
|
module->mCond = cond.m;
|
||||||
|
/** Compute map index
|
||||||
|
int mCondInputNumber;
|
||||||
|
int mBodyInputNumber;
|
||||||
|
|
||||||
|
// First mCondInputs' index, Second: inputs's index
|
||||||
|
std::vector<std::pair<int, int>> mInputForCond;
|
||||||
|
|
||||||
|
// First mBodyInputs' index, Second: inputs's index
|
||||||
|
std::vector<std::pair<int, int>> mInputForBody;
|
||||||
|
std::vector<int> mOutputFromBody;
|
||||||
|
std::vector<std::pair<int, int>> mUpdateForCond;
|
||||||
|
std::vector<std::pair<int, int>> mUpdateForBody;
|
||||||
|
std::vector<std::pair<int, int>> mCondUpdateForCond;
|
||||||
|
std::vector<std::pair<int, int>> mCondUpdateForBody;
|
||||||
|
*/
|
||||||
|
// Map Inputs
|
||||||
|
module->mBodyInputNumber = body.inputs.size();
|
||||||
|
module->mCondInputNumber = cond.inputs.size();
|
||||||
|
for (int i=0; i<whileParam->aliases_inputs()->size(); ++i) {
|
||||||
|
auto index = i;
|
||||||
|
auto data = whileParam->aliases_inputs()->GetAs<StringVec>(i);
|
||||||
|
for (int s=0; s<data->data()->size(); ++s) {
|
||||||
|
auto name = data->data()->GetAsString(s)->str();
|
||||||
|
auto bodyInputPos = _findPos(body.inputs, name);
|
||||||
|
if (bodyInputPos >= 0) {
|
||||||
|
module->mInputForBody.emplace_back(std::make_pair(bodyInputPos, i));
|
||||||
|
}
|
||||||
|
auto condInputPos = _findPos(cond.inputs, name);
|
||||||
|
if (condInputPos >= 0) {
|
||||||
|
module->mInputForCond.emplace_back(std::make_pair(condInputPos, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Map update
|
||||||
|
auto update = whileParam->aliases_updates();
|
||||||
|
std::map<int, int> replaceOutputs;
|
||||||
|
for (int i=0; i<update->size(); ++i) {
|
||||||
|
auto data = update->GetAs<StringVec>(i);
|
||||||
|
int bodyInputPos = -1;
|
||||||
|
int condInputPos = -1;
|
||||||
|
int bodyOutputPos = -1;
|
||||||
|
int condOutputPos = -1;
|
||||||
|
MNN_ASSERT(2 == data->data()->size());
|
||||||
|
auto outputName = data->data()->GetAsString(0)->str();
|
||||||
|
auto inputName = data->data()->GetAsString(1)->str();
|
||||||
|
bodyInputPos = _findPos(body.inputs, inputName);
|
||||||
|
condInputPos = _findPos(cond.inputs, inputName);
|
||||||
|
bodyOutputPos = _findPos(body.outputs, outputName);
|
||||||
|
condOutputPos = _findPos(cond.outputs, outputName);
|
||||||
|
|
||||||
|
auto updateBodyOutputPos = _findPos(body.outputs, inputName);
|
||||||
|
|
||||||
|
MNN_ASSERT(bodyOutputPos == -1 || condOutputPos == -1);
|
||||||
|
if (condOutputPos >= 0) {
|
||||||
|
if (bodyInputPos >= 0) {
|
||||||
|
module->mCondUpdateForBody.emplace_back(std::make_pair(bodyInputPos, condOutputPos));
|
||||||
|
}
|
||||||
|
if (condInputPos >= 0) {
|
||||||
|
module->mCondUpdateForCond.emplace_back(std::make_pair(condInputPos, condOutputPos));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (bodyOutputPos >= 0) {
|
||||||
|
if (bodyInputPos >= 0) {
|
||||||
|
module->mUpdateForBody.emplace_back(std::make_pair(bodyInputPos, bodyOutputPos));
|
||||||
|
}
|
||||||
|
if (condInputPos >= 0) {
|
||||||
|
module->mUpdateForCond.emplace_back(std::make_pair(condInputPos, bodyOutputPos));
|
||||||
|
}
|
||||||
|
if (updateBodyOutputPos >= 0) {
|
||||||
|
replaceOutputs.insert(std::make_pair(updateBodyOutputPos, bodyOutputPos));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Map outputs
|
||||||
|
auto output = whileParam->aliases_outputs();
|
||||||
|
for (int i=0; i<output->size(); ++i) {
|
||||||
|
auto data = output->GetAsString(i);
|
||||||
|
auto pos = _findPos(body.outputs, data->str());
|
||||||
|
MNN_ASSERT(pos >= 0);
|
||||||
|
if (replaceOutputs.find(pos) != replaceOutputs.end()) {
|
||||||
|
pos = replaceOutputs[pos];
|
||||||
|
}
|
||||||
|
module->mOutputFromBody.emplace_back(pos);
|
||||||
|
}
|
||||||
|
return module;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Express::VARP> WhileModule::onForward(const std::vector<Express::VARP>& inputsI) {
|
||||||
|
std::vector<Express::VARP> condInputs(mCondInputNumber);
|
||||||
|
std::vector<Express::VARP> bodyInputs(mBodyInputNumber);
|
||||||
|
auto& inputs = inputsI;
|
||||||
|
for (auto& p : mInputForCond) {
|
||||||
|
condInputs[p.first] = inputs[p.second];
|
||||||
|
}
|
||||||
|
for (auto& p : mInputForBody) {
|
||||||
|
bodyInputs[p.first] = inputs[p.second];
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Express::VARP> outputs(mOutputFromBody.size());
|
||||||
|
while (true) {
|
||||||
|
auto res = mCond->onForward(condInputs)[0];
|
||||||
|
auto resPtr = res->readMap<int>();
|
||||||
|
if (resPtr[0] <= 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
auto bodyOutputs = mBody->onForward(bodyInputs);
|
||||||
|
Express::Variable::prepareCompute(bodyOutputs);
|
||||||
|
for (int i=0; i<bodyOutputs.size(); ++i) {
|
||||||
|
auto p = bodyOutputs[i];
|
||||||
|
if (p->expr().first->get() != nullptr) {
|
||||||
|
auto ptr = p->readMap<void>();
|
||||||
|
auto info = p->getInfo();
|
||||||
|
auto newV = Express::_Input(info->dim, info->order, info->type);
|
||||||
|
if (nullptr != ptr) {
|
||||||
|
::memcpy(newV->writeMap<void>(), ptr, info->type.bytes() * info->size);
|
||||||
|
}
|
||||||
|
bodyOutputs[i] = newV;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i=0; i<mOutputFromBody.size(); ++i) {
|
||||||
|
outputs[i] = bodyOutputs[mOutputFromBody[i]];
|
||||||
|
}
|
||||||
|
for (auto& p : mUpdateForCond) {
|
||||||
|
condInputs[p.first] = bodyOutputs[p.second];
|
||||||
|
}
|
||||||
|
for (auto& p : mUpdateForBody) {
|
||||||
|
bodyInputs[p.first] = bodyOutputs[p.second];
|
||||||
|
}
|
||||||
|
for (auto& p : mCondUpdateForCond) {
|
||||||
|
condInputs[p.first] = res;
|
||||||
|
}
|
||||||
|
for (auto& p : mCondUpdateForBody) {
|
||||||
|
bodyInputs[p.first] = res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return outputs;
|
||||||
|
}
|
||||||
|
|
||||||
|
Module* WhileModule::clone(CloneContext* ctx) const {
|
||||||
|
WhileModule* module(new WhileModule);
|
||||||
|
module->mCondInputNumber = mCondInputNumber;
|
||||||
|
module->mBodyInputNumber = mBodyInputNumber;
|
||||||
|
module->mInputForCond = mInputForCond;
|
||||||
|
module->mInputForBody = mInputForBody;
|
||||||
|
module->mOutputFromBody = mOutputFromBody;
|
||||||
|
module->mUpdateForCond = mUpdateForCond;
|
||||||
|
module->mUpdateForBody = mUpdateForBody;
|
||||||
|
module->mCondUpdateForCond = mCondUpdateForCond;
|
||||||
|
module->mCondUpdateForBody = mCondUpdateForBody;
|
||||||
|
module->mCond.reset(mCond->clone(ctx));
|
||||||
|
module->mBody.reset(mBody->clone(ctx));
|
||||||
|
return this->cloneBaseTo(ctx, module);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
};
|
|
@ -0,0 +1,46 @@
|
||||||
|
//
|
||||||
|
// WhileModule.hpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on b'2020/09/10'.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
#ifndef WhileModule_hpp
|
||||||
|
#define WhileModule_hpp
|
||||||
|
#include <MNN/expr/Module.hpp>
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
class WhileModule : public Module {
|
||||||
|
public:
|
||||||
|
virtual ~ WhileModule() {
|
||||||
|
// Do nothing
|
||||||
|
}
|
||||||
|
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
|
||||||
|
static WhileModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
|
||||||
|
|
||||||
|
private:
|
||||||
|
WhileModule(){}
|
||||||
|
|
||||||
|
Module* clone(CloneContext* ctx) const override;
|
||||||
|
|
||||||
|
int mCondInputNumber;
|
||||||
|
int mBodyInputNumber;
|
||||||
|
|
||||||
|
// First mCondInputs' index, Second: inputs's index
|
||||||
|
std::vector<std::pair<int, int>> mInputForCond;
|
||||||
|
|
||||||
|
// First mBodyInputs' index, Second: inputs's index
|
||||||
|
std::vector<std::pair<int, int>> mInputForBody;
|
||||||
|
std::vector<int> mOutputFromBody;
|
||||||
|
std::vector<std::pair<int, int>> mUpdateForCond;
|
||||||
|
std::vector<std::pair<int, int>> mUpdateForBody;
|
||||||
|
|
||||||
|
std::vector<std::pair<int, int>> mCondUpdateForCond;
|
||||||
|
std::vector<std::pair<int, int>> mCondUpdateForBody;
|
||||||
|
|
||||||
|
std::shared_ptr<Module> mCond;
|
||||||
|
std::shared_ptr<Module> mBody;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <MNN/ErrorCode.hpp>
|
#include <MNN/ErrorCode.hpp>
|
||||||
#include <MNN/MNNForwardType.h>
|
#include <MNN/MNNForwardType.h>
|
||||||
|
@ -67,6 +68,7 @@ class Session;
|
||||||
struct Content;
|
struct Content;
|
||||||
class Tensor;
|
class Tensor;
|
||||||
class Backend;
|
class Backend;
|
||||||
|
class Runtime;
|
||||||
|
|
||||||
class MNN_PUBLIC OperatorInfo {
|
class MNN_PUBLIC OperatorInfo {
|
||||||
struct Info;
|
struct Info;
|
||||||
|
@ -89,6 +91,7 @@ protected:
|
||||||
|
|
||||||
typedef std::function<bool(const std::vector<Tensor*>&, const std::string& /*opName*/)> TensorCallBack;
|
typedef std::function<bool(const std::vector<Tensor*>&, const std::string& /*opName*/)> TensorCallBack;
|
||||||
typedef std::function<bool(const std::vector<Tensor*>&, const OperatorInfo*)> TensorCallBackWithInfo;
|
typedef std::function<bool(const std::vector<Tensor*>&, const OperatorInfo*)> TensorCallBackWithInfo;
|
||||||
|
typedef std::pair<std::map<MNNForwardType, std::shared_ptr<Runtime>>, std::shared_ptr<Runtime>> RuntimeInfo;
|
||||||
|
|
||||||
/** net data holder. multiple sessions could share same net. */
|
/** net data holder. multiple sessions could share same net. */
|
||||||
class MNN_PUBLIC Interpreter {
|
class MNN_PUBLIC Interpreter {
|
||||||
|
@ -108,7 +111,43 @@ public:
|
||||||
static Interpreter* createFromBuffer(const void* buffer, size_t size);
|
static Interpreter* createFromBuffer(const void* buffer, size_t size);
|
||||||
~Interpreter();
|
~Interpreter();
|
||||||
|
|
||||||
|
enum SessionMode {
|
||||||
|
/** About CallBack, Default Session_Debug*/
|
||||||
|
/** runSessionWithCallBack is allowed and can get internal op info*/
|
||||||
|
Session_Debug = 0,
|
||||||
|
/** runSessionWithCallBack is not valid and can't get any info of op in session*/
|
||||||
|
Session_Release = 1,
|
||||||
|
|
||||||
|
/** About input tenosr, Default Session_Input_Inside*/
|
||||||
|
/** The input tensor is alloced by session, input data after session resized*/
|
||||||
|
Session_Input_Inside = 2,
|
||||||
|
/** The input tensor is alloced by user, set input data before session resize*/
|
||||||
|
Session_Input_User = 3,
|
||||||
|
};
|
||||||
|
/**
|
||||||
|
* @brief The API shoud be called before create session.
|
||||||
|
* @param mode session mode
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
void setSessionMode(SessionMode mode);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The API shoud be called before create session.
|
||||||
|
* If the cache exist, try to load cache from file.
|
||||||
|
* After createSession, try to save cache to file.
|
||||||
|
* @param cacheFile cache file name
|
||||||
|
* @param keySize the first `keySize` bytes used as the key to check if the `cacheFile` exists.
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
void setCacheFile(const char* cacheFile, size_t keySize = 128);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
/**
|
||||||
|
* @brief create runtimeInfo seperately with schedule config.
|
||||||
|
* @param config session schedule configs.
|
||||||
|
*/
|
||||||
|
static RuntimeInfo createRuntime(const std::vector<ScheduleConfig>& configs);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief create session with schedule config. created session will be managed in net.
|
* @brief create session with schedule config. created session will be managed in net.
|
||||||
* @param config session schedule config.
|
* @param config session schedule config.
|
||||||
|
@ -116,6 +155,13 @@ public:
|
||||||
*/
|
*/
|
||||||
Session* createSession(const ScheduleConfig& config);
|
Session* createSession(const ScheduleConfig& config);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief create session with schedule config and user-specified runtime.
|
||||||
|
* @param config session schedule config, runtime runtimeInfo used by the created session.
|
||||||
|
* @return created session if success, NULL otherwise.
|
||||||
|
*/
|
||||||
|
Session* createSession(const ScheduleConfig& config, const RuntimeInfo& runtime);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief create multi-path session with schedule configs. created session will be managed in net.
|
* @brief create multi-path session with schedule configs. created session will be managed in net.
|
||||||
* @param configs session schedule configs.
|
* @param configs session schedule configs.
|
||||||
|
@ -123,6 +169,14 @@ public:
|
||||||
*/
|
*/
|
||||||
Session* createMultiPathSession(const std::vector<ScheduleConfig>& configs);
|
Session* createMultiPathSession(const std::vector<ScheduleConfig>& configs);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief create multi-path session with schedule configs and user-specified runtime.
|
||||||
|
created session will be managed in net.
|
||||||
|
* @param configs session schedule configs.
|
||||||
|
* @return created session if success, NULL otherwise.
|
||||||
|
*/
|
||||||
|
Session* createMultiPathSession(const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtime);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief release session.
|
* @brief release session.
|
||||||
* @param session given session.
|
* @param session given session.
|
||||||
|
@ -204,17 +258,39 @@ public:
|
||||||
*/
|
*/
|
||||||
Tensor* getSessionOutput(const Session* session, const char* name);
|
Tensor* getSessionOutput(const Session* session, const char* name);
|
||||||
|
|
||||||
|
enum SessionInfoCode {
|
||||||
|
/** memory session used in MB, float* */
|
||||||
|
MEMORY = 0,
|
||||||
|
|
||||||
|
/** float operation needed in session in M, float* */
|
||||||
|
FLOPS = 1,
|
||||||
|
|
||||||
|
/** Backends in session in M, int*, length >= the configs when create session */
|
||||||
|
BACKENDS = 2,
|
||||||
|
|
||||||
|
ALL
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief get all input tensors.
|
* @brief get session info
|
||||||
* @param session given session.
|
* @param session given session.
|
||||||
* @return all input tensors mapped with name.
|
* @param code given info code.
|
||||||
|
* @param void* given info ptr, see SessionInfoCode for detail
|
||||||
|
* @return true if support the code, false otherwise.
|
||||||
*/
|
*/
|
||||||
const std::map<std::string, Tensor*>& getSessionOutputAll(const Session* session) const;
|
bool getSesionInfo(const Session* session, SessionInfoCode code, void* ptr);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief get all output tensors.
|
* @brief get all output tensors.
|
||||||
* @param session given session.
|
* @param session given session.
|
||||||
* @return all output tensors mapped with name.
|
* @return all output tensors mapped with name.
|
||||||
*/
|
*/
|
||||||
|
const std::map<std::string, Tensor*>& getSessionOutputAll(const Session* session) const;
|
||||||
|
/**
|
||||||
|
* @brief get all input tensors.
|
||||||
|
* @param session given session.
|
||||||
|
* @return all input tensors mapped with name.
|
||||||
|
*/
|
||||||
const std::map<std::string, Tensor*>& getSessionInputAll(const Session* session) const;
|
const std::map<std::string, Tensor*>& getSessionInputAll(const Session* session) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -38,13 +38,7 @@
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define MNN_ASSERT(x) \
|
#define MNN_ASSERT(x)
|
||||||
{ \
|
|
||||||
int res = (x); \
|
|
||||||
if (!res) { \
|
|
||||||
MNN_ERROR("Error for %d\n", __LINE__); \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define FUNC_PRINT(x) MNN_PRINT(#x "=%d in %s, %d \n", x, __func__, __LINE__);
|
#define FUNC_PRINT(x) MNN_PRINT(#x "=%d in %s, %d \n", x, __func__, __LINE__);
|
||||||
|
|
|
@ -23,8 +23,8 @@ typedef enum {
|
||||||
/*Hand write metal*/
|
/*Hand write metal*/
|
||||||
MNN_FORWARD_METAL = 1,
|
MNN_FORWARD_METAL = 1,
|
||||||
|
|
||||||
/*Use IOS's MPS instead of hand-write metal, Not Support yet*/
|
/*NVIDIA GPU API*/
|
||||||
MNN_FORWARD_MPS = 2,
|
MNN_FORWARD_CUDA = 2,
|
||||||
|
|
||||||
/*Android / Common Device GPU API*/
|
/*Android / Common Device GPU API*/
|
||||||
MNN_FORWARD_OPENCL = 3,
|
MNN_FORWARD_OPENCL = 3,
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <MNN/HalideRuntime.h>
|
#include <MNN/HalideRuntime.h>
|
||||||
#include <MNN/MNNDefine.h>
|
#include <MNN/MNNDefine.h>
|
||||||
|
#define MNN_MAX_TENSOR_DIM 6
|
||||||
|
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include <MNN/ErrorCode.hpp>
|
#include <MNN/ErrorCode.hpp>
|
||||||
#include <MNN/expr/Expr.hpp>
|
#include <MNN/expr/Expr.hpp>
|
||||||
#include <MNN/Tensor.hpp>
|
#include <MNN/Tensor.hpp>
|
||||||
|
#include <MNN/Interpreter.hpp>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
@ -17,41 +18,19 @@
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
class Backend;
|
class Backend;
|
||||||
class Execution;
|
class Execution;
|
||||||
|
class Runtime;
|
||||||
|
struct Op;
|
||||||
namespace Express {
|
namespace Express {
|
||||||
class MNN_PUBLIC Executor {
|
class MNN_PUBLIC Executor {
|
||||||
public:
|
public:
|
||||||
class ComputeCache {
|
class ComputeCache;
|
||||||
public:
|
|
||||||
void setShapeDirty(int offset, Variable::Info* info);
|
|
||||||
void setContentDirty();
|
|
||||||
void setContentReady();
|
|
||||||
void syncInput(int offset, const Variable::Info* info);
|
|
||||||
void syncOutput(int offset, Variable::Info* info);
|
|
||||||
|
|
||||||
struct TensorContent {
|
|
||||||
std::shared_ptr<Tensor> tensor;
|
|
||||||
int refCount = 0;
|
|
||||||
void reset();
|
|
||||||
bool aliveOutside = false;
|
|
||||||
};
|
|
||||||
struct Unit;
|
struct Unit;
|
||||||
virtual ~ ComputeCache() {}
|
static void setShapeDirty(ComputeCache* cache);
|
||||||
ComputeCache() {}
|
static void setContentDirty(ComputeCache* cache);
|
||||||
virtual ErrorCode compute() = 0;
|
static void* mapOutput(ComputeCache* cache, int offset, Tensor* dest);
|
||||||
virtual ErrorCode resize() = 0;
|
|
||||||
protected:
|
|
||||||
// Get the index tensor with the need of needBackend
|
|
||||||
// If the Tensor don't belong to the backend, need use needBackend to alloc it and return
|
|
||||||
virtual Tensor* getTensor(int index, bool host) = 0;
|
|
||||||
void _setShapeDirty();
|
|
||||||
friend class Executor;
|
|
||||||
bool mContentDirty = true;
|
|
||||||
bool mShapeDirty = true;
|
|
||||||
};
|
|
||||||
struct Requirement {
|
struct Requirement {
|
||||||
std::vector<bool> contentNeedContent;
|
std::vector<bool> contentNeedContent;
|
||||||
std::vector<bool> shapeNeedContent;
|
std::vector<bool> shapeNeedContent;
|
||||||
std::vector<bool> supportError;
|
|
||||||
};
|
};
|
||||||
~Executor();
|
~Executor();
|
||||||
Requirement getRequirement(Expr* expr) const;
|
Requirement getRequirement(Expr* expr) const;
|
||||||
|
@ -65,25 +44,27 @@ public:
|
||||||
};
|
};
|
||||||
void gc(GCFlag flag = FULL);
|
void gc(GCFlag flag = FULL);
|
||||||
static std::shared_ptr<Executor> getGlobalExecutor();
|
static std::shared_ptr<Executor> getGlobalExecutor();
|
||||||
|
|
||||||
|
static std::shared_ptr<Executor> newExecutor(MNNForwardType type,
|
||||||
|
const BackendConfig& config,
|
||||||
|
int numberThread);
|
||||||
void resetProfile();
|
void resetProfile();
|
||||||
void dumpProfile();
|
void dumpProfile();
|
||||||
void addOpCostTime(int op, float costTime);
|
void addOpCostTime(int op, float costTime);
|
||||||
|
void addOpCostTime(const std::string& type, float costTime);
|
||||||
|
void addOpFlops(const std::string& type, float flops);
|
||||||
class Profiler;
|
class Profiler;
|
||||||
|
static RuntimeInfo getRuntime();
|
||||||
private:
|
private:
|
||||||
void _createSingle(EXPRP expr);
|
void _makeCache(const std::vector<EXPRP>& outputs, bool forceCPU);
|
||||||
void _create(const std::vector<EXPRP>& outputs, std::set<std::shared_ptr<Executor::ComputeCache>>&& inputCaches, std::vector<ComputeCache::TensorContent>&& tensors, bool forceCPU);
|
void _create(const std::vector<EXPRP>& outputs, std::set<std::shared_ptr<Executor::ComputeCache>>&& inputCaches, std::set<std::shared_ptr<Expr::Inside>>&& inputNode, bool forceCPU);
|
||||||
|
|
||||||
void _addToCache(const std::vector<std::shared_ptr<ComputeCache>>& caches);
|
void _visit(EXPRP expr, std::set<std::shared_ptr<Executor::ComputeCache>>& inputCaches, std::set<std::shared_ptr<Expr::Inside>>& inputNode);
|
||||||
void _resetCache();
|
|
||||||
void _visit(EXPRP expr, std::set<std::shared_ptr<Executor::ComputeCache>>& inputCaches, std::vector<ComputeCache::TensorContent>& tensors);
|
|
||||||
|
|
||||||
Executor(std::shared_ptr<Backend> backend);
|
Executor(std::shared_ptr<Runtime> backend, MNNForwardType type);
|
||||||
std::shared_ptr<Backend> mBackend;
|
std::pair<std::shared_ptr<Runtime>, MNNForwardType> mRuntime;
|
||||||
std::shared_ptr<Backend> mBackupBackend;
|
std::pair<std::shared_ptr<Runtime>, MNNForwardType> mBackupRuntime;
|
||||||
std::mutex mMutex;
|
std::mutex mMutex;
|
||||||
std::vector<std::shared_ptr<Tensor>> mStack;
|
|
||||||
std::vector<Tensor*> mStackInputs;
|
|
||||||
std::vector<Tensor*> mStackOutputs;
|
|
||||||
std::shared_ptr<Profiler> mProfiler;
|
std::shared_ptr<Profiler> mProfiler;
|
||||||
};
|
};
|
||||||
} // namespace Express
|
} // namespace Express
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
//
|
||||||
|
// ExecutorScope.hpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2020/10/26.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef MNN_EXPR_EXECUTOR_SCOPE_HPP_
|
||||||
|
#define MNN_EXPR_EXECUTOR_SCOPE_HPP_
|
||||||
|
|
||||||
|
#include <MNN/expr/Executor.hpp>
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
|
||||||
|
struct ExecutorScope final {
|
||||||
|
public:
|
||||||
|
ExecutorScope() = delete;
|
||||||
|
explicit ExecutorScope(const ExecutorScope&) = delete;
|
||||||
|
explicit ExecutorScope(const std::shared_ptr<Executor>& current);
|
||||||
|
|
||||||
|
explicit ExecutorScope(const std::string& scope_name,
|
||||||
|
const std::shared_ptr<Executor>& current);
|
||||||
|
|
||||||
|
virtual ~ExecutorScope();
|
||||||
|
|
||||||
|
static const std::shared_ptr<Executor> Current();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace MNN
|
||||||
|
} // namespace Express
|
||||||
|
#endif // MNN_EXPR_EXECUTOR_SCOPE_HPP_
|
|
@ -87,6 +87,7 @@ public:
|
||||||
};
|
};
|
||||||
bool fix(InputType type) const;
|
bool fix(InputType type) const;
|
||||||
private:
|
private:
|
||||||
|
friend class Variable;
|
||||||
std::shared_ptr<Variable> mContent;
|
std::shared_ptr<Variable> mContent;
|
||||||
};
|
};
|
||||||
inline bool operator==(Variable* src, VARP dst) {
|
inline bool operator==(Variable* src, VARP dst) {
|
||||||
|
@ -107,7 +108,6 @@ public:
|
||||||
INTS dim;
|
INTS dim;
|
||||||
halide_type_t type;
|
halide_type_t type;
|
||||||
int size;
|
int size;
|
||||||
void* ptr = nullptr;
|
|
||||||
void syncSize();
|
void syncSize();
|
||||||
};
|
};
|
||||||
const std::string& name() const;
|
const std::string& name() const;
|
||||||
|
@ -173,7 +173,7 @@ private:
|
||||||
class MNN_PUBLIC Expr {
|
class MNN_PUBLIC Expr {
|
||||||
public:
|
public:
|
||||||
struct Inside;
|
struct Inside;
|
||||||
static EXPRP create(Variable::Info&& info);
|
static EXPRP create(Variable::Info&& info, const void* ptr, VARP::InputType type, bool copy = true);
|
||||||
static EXPRP create(const OpT* op, std::vector<VARP> inputs, int outputSize = 1);
|
static EXPRP create(const OpT* op, std::vector<VARP> inputs, int outputSize = 1);
|
||||||
static EXPRP create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize = 1);
|
static EXPRP create(std::pair<std::shared_ptr<char>, int> extra, std::vector<VARP>&& inputs, int outputSize = 1);
|
||||||
static EXPRP create(std::unique_ptr<OpT>&& op, std::vector<VARP> inputs, int outputSize = 1) {
|
static EXPRP create(std::unique_ptr<OpT>&& op, std::vector<VARP> inputs, int outputSize = 1) {
|
||||||
|
@ -188,7 +188,7 @@ public:
|
||||||
return mInputs;
|
return mInputs;
|
||||||
}
|
}
|
||||||
int outputSize() const {
|
int outputSize() const {
|
||||||
return mOutputNames.size();
|
return (int)mOutputNames.size();
|
||||||
}
|
}
|
||||||
static void replace(EXPRP oldExpr, EXPRP newExpr);
|
static void replace(EXPRP oldExpr, EXPRP newExpr);
|
||||||
bool requireInfo();
|
bool requireInfo();
|
||||||
|
|
|
@ -8,9 +8,14 @@
|
||||||
|
|
||||||
#ifndef MNN_Train_Module_hpp
|
#ifndef MNN_Train_Module_hpp
|
||||||
#define MNN_Train_Module_hpp
|
#define MNN_Train_Module_hpp
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
#include <MNN/expr/Expr.hpp>
|
#include <MNN/expr/Expr.hpp>
|
||||||
|
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
namespace Train {
|
namespace Express {
|
||||||
class MNN_PUBLIC Module {
|
class MNN_PUBLIC Module {
|
||||||
public:
|
public:
|
||||||
Module() = default;
|
Module() = default;
|
||||||
|
@ -21,9 +26,6 @@ public:
|
||||||
bool loadParameters(const std::vector<Express::VARP>& parameters);
|
bool loadParameters(const std::vector<Express::VARP>& parameters);
|
||||||
void setIsTraining(const bool isTraining);
|
void setIsTraining(const bool isTraining);
|
||||||
bool getIsTraining();
|
bool getIsTraining();
|
||||||
static std::shared_ptr<Module> transform(const std::vector<Express::VARP>& inputs,
|
|
||||||
const std::vector<Express::VARP>& outputs);
|
|
||||||
|
|
||||||
void clearCache();
|
void clearCache();
|
||||||
|
|
||||||
const std::string& name() const {
|
const std::string& name() const {
|
||||||
|
@ -38,12 +40,45 @@ public:
|
||||||
void setType(std::string type) {
|
void setType(std::string type) {
|
||||||
mType = std::move(type);
|
mType = std::move(type);
|
||||||
}
|
}
|
||||||
|
// Return the parameter index
|
||||||
|
int addParameter(Express::VARP parameter);
|
||||||
|
|
||||||
|
void setParameter(Express::VARP parameter, int index);
|
||||||
|
static Module* createEmpty(const std::vector<Express::VARP>& parameters);
|
||||||
|
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, bool dynamic = false);
|
||||||
|
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const char* fileName, bool dynamic = false);
|
||||||
|
|
||||||
|
static Module* clone(const Module* module, const bool shareParams = false);
|
||||||
|
|
||||||
|
class CloneContext {
|
||||||
|
public:
|
||||||
|
CloneContext() = default;
|
||||||
|
explicit CloneContext(const bool shareParams)
|
||||||
|
: mShareParams(shareParams) {}
|
||||||
|
virtual ~CloneContext() = default;
|
||||||
|
|
||||||
|
const bool shareParams() const { return mShareParams; }
|
||||||
|
|
||||||
|
EXPRP getOrClone(const EXPRP expr);
|
||||||
|
VARP getOrClone(const VARP var);
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool mShareParams = false;
|
||||||
|
std::unordered_map<const Expr*, EXPRP> mExprMap;
|
||||||
|
std::unordered_map<const Variable*, VARP> mVarMap;
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual Module* clone(CloneContext* ctx) const {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void registerModel(const std::vector<std::shared_ptr<Module>>& children);
|
void registerModel(const std::vector<std::shared_ptr<Module>>& children);
|
||||||
void addParameter(Express::VARP parameter);
|
|
||||||
virtual void onClearCache() {
|
virtual void onClearCache() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Module* cloneBaseTo(CloneContext* ctx, Module* module) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void _collectParameters(std::vector<Express::VARP>& result) const;
|
void _collectParameters(std::vector<Express::VARP>& result) const;
|
||||||
std::vector<std::shared_ptr<Module>> mChildren;
|
std::vector<std::shared_ptr<Module>> mChildren;
|
||||||
|
@ -52,6 +87,13 @@ private:
|
||||||
std::string mName;
|
std::string mName;
|
||||||
std::string mType;
|
std::string mType;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct SubGraph {
|
||||||
|
std::vector<std::string> inputs;
|
||||||
|
std::vector<std::string> outputs;
|
||||||
|
std::shared_ptr<Module> m;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace Train
|
} // namespace Train
|
||||||
} // namespace MNN
|
} // namespace MNN
|
||||||
|
|
|
@ -9,11 +9,10 @@
|
||||||
#ifndef MNN_Train_NN_hpp
|
#ifndef MNN_Train_NN_hpp
|
||||||
#define MNN_Train_NN_hpp
|
#define MNN_Train_NN_hpp
|
||||||
#include <MNN/expr/ExprCreator.hpp>
|
#include <MNN/expr/ExprCreator.hpp>
|
||||||
#include "Distributions.hpp"
|
#include <MNN/expr/Module.hpp>
|
||||||
#include "Module.hpp"
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
namespace Train {
|
namespace Express {
|
||||||
class Initializer;
|
class Initializer;
|
||||||
|
|
||||||
class MNN_PUBLIC NN {
|
class MNN_PUBLIC NN {
|
||||||
|
@ -29,7 +28,7 @@ public:
|
||||||
};
|
};
|
||||||
enum FeatureScaleStatMethod {
|
enum FeatureScaleStatMethod {
|
||||||
PerTensor = 0,
|
PerTensor = 0,
|
||||||
PerChannel = 1
|
PerChannel = 1 // Depercerate
|
||||||
};
|
};
|
||||||
/* Unlike enum in class, class in class need be dllimport or dllexport explcility.
|
/* Unlike enum in class, class in class need be dllimport or dllexport explcility.
|
||||||
Compiling in other system will not be affected.
|
Compiling in other system will not be affected.
|
||||||
|
@ -86,7 +85,7 @@ public:
|
||||||
static ConvParameters ExtractConvolution(Express::EXPRP expr);
|
static ConvParameters ExtractConvolution(Express::EXPRP expr);
|
||||||
|
|
||||||
// Extract BatchNormal and Dropout
|
// Extract BatchNormal and Dropout
|
||||||
static Module* ExtractNotRunableOp(Express::EXPRP expr);
|
static Module* ExtractNotRunableOp(Express::EXPRP expr, const std::map<std::string, SubGraph>& subgraphs);
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
|
@ -31,25 +31,30 @@ MNN_PUBLIC VARP _Const(const void* ptr, INTS shape = {}, Dimensionformat format
|
||||||
MNN_PUBLIC VARP _TrainableParam(float value, INTS dims, Dimensionformat format);
|
MNN_PUBLIC VARP _TrainableParam(float value, INTS dims, Dimensionformat format);
|
||||||
MNN_PUBLIC VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format,
|
MNN_PUBLIC VARP _TrainableParam(const void* ptr, INTS dims, Dimensionformat format,
|
||||||
halide_type_t type = halide_type_of<float>());
|
halide_type_t type = halide_type_of<float>());
|
||||||
|
MNN_PUBLIC VARP _InnerProduct(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS outputShape);
|
||||||
MNN_PUBLIC VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1},
|
MNN_PUBLIC VARP _Conv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1},
|
||||||
INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
|
INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
|
||||||
|
|
||||||
MNN_PUBLIC VARP _Conv(float weight, float bias, VARP x, INTS channel, INTS kernelSize, PaddingMode pad = VALID,
|
MNN_PUBLIC VARP _Conv(float weight, float bias, VARP x, INTS channel, INTS kernelSize, PaddingMode pad = VALID,
|
||||||
INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1);
|
INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1);
|
||||||
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
||||||
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
|
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false, int nbits = 8);
|
||||||
MNN_PUBLIC VARP _Conv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
MNN_PUBLIC VARP _Conv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
||||||
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
|
PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
|
||||||
MNN_PUBLIC VARP _Deconv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1},
|
MNN_PUBLIC VARP _Deconv(VARP weight, VARP bias, VARP x, PaddingMode pad = VALID, INTS stride = {1, 1},
|
||||||
INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
|
INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
|
||||||
|
|
||||||
|
MNN_PUBLIC VARP _Deconv(std::vector<float>&& weight, std::vector<float>&& bias, VARP x, INTS channel, INTS kernelSize,
|
||||||
|
PaddingMode pad, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0}, bool relu = false, bool relu6 = false);
|
||||||
|
|
||||||
MNN_PUBLIC VARP _MaxPool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0});
|
MNN_PUBLIC VARP _MaxPool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0});
|
||||||
MNN_PUBLIC VARP _AvePool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0});
|
MNN_PUBLIC VARP _AvePool(VARP x, INTS kernel, INTS stride = {1, 1}, PaddingMode pad = VALID, INTS pads= {0, 0});
|
||||||
MNN_PUBLIC VARP _Reshape(VARP x, INTS shape, Dimensionformat original_format = NHWC);
|
MNN_PUBLIC VARP _Reshape(VARP x, INTS shape, Dimensionformat original_format = NCHW);
|
||||||
MNN_PUBLIC VARP _Reshape(VARP x, VARP shape);
|
MNN_PUBLIC VARP _Reshape(VARP x, VARP shape);
|
||||||
MNN_PUBLIC VARP _Scale(VARP x, int channels, std::vector<float>&& scales, std::vector<float>&& bias);
|
MNN_PUBLIC VARP _Scale(VARP x, int channels, std::vector<float>&& scales, std::vector<float>&& bias);
|
||||||
|
|
||||||
MNN_PUBLIC VARP _Relu(VARP x, float slope = 0.0f);
|
MNN_PUBLIC VARP _Relu(VARP x, float slope = 0.0f);
|
||||||
MNN_PUBLIC VARP _Relu6(VARP x);
|
MNN_PUBLIC VARP _Relu6(VARP x, float minValue = 0.0f, float maxValue = 6.0f);
|
||||||
MNN_PUBLIC VARP _PRelu(VARP x, std::vector<float> &&slopes);
|
MNN_PUBLIC VARP _PRelu(VARP x, std::vector<float> &&slopes);
|
||||||
MNN_PUBLIC VARP _Softmax(VARP logits, int axis = -1);
|
MNN_PUBLIC VARP _Softmax(VARP logits, int axis = -1);
|
||||||
MNN_PUBLIC VARP _Softplus(VARP features);
|
MNN_PUBLIC VARP _Softplus(VARP features);
|
||||||
|
@ -76,7 +81,7 @@ MNN_PUBLIC VARP _Pad(VARP x, VARP paddings, PadValueMode mode = CONSTANT);
|
||||||
MNN_PUBLIC VARP _ExpandDims(VARP input, int axis);
|
MNN_PUBLIC VARP _ExpandDims(VARP input, int axis);
|
||||||
MNN_PUBLIC VARP _ExpandDims(VARP input, VARP axis);
|
MNN_PUBLIC VARP _ExpandDims(VARP input, VARP axis);
|
||||||
|
|
||||||
MNN_PUBLIC VARP _Shape(VARP input);
|
MNN_PUBLIC VARP _Shape(VARP input, bool nchw = false);
|
||||||
MNN_PUBLIC VARP _Stack(VARPS values, int axis=0);
|
MNN_PUBLIC VARP _Stack(VARPS values, int axis=0);
|
||||||
enum InterpolationMethod {BILINEAR, NEAREST};
|
enum InterpolationMethod {BILINEAR, NEAREST};
|
||||||
MNN_PUBLIC VARP _CropAndResize(VARP image, VARP boxes, VARP box_ind, VARP crop_size,
|
MNN_PUBLIC VARP _CropAndResize(VARP image, VARP boxes, VARP box_ind, VARP crop_size,
|
||||||
|
@ -92,6 +97,7 @@ MNN_PUBLIC VARP _GatherND(VARP params, VARP indices);
|
||||||
MNN_PUBLIC VARP _Selu(VARP features, float scale, float alpha);
|
MNN_PUBLIC VARP _Selu(VARP features, float scale, float alpha);
|
||||||
MNN_PUBLIC VARP _Size(VARP input);
|
MNN_PUBLIC VARP _Size(VARP input);
|
||||||
MNN_PUBLIC VARP _Elu(VARP features, float alpha=1.0);
|
MNN_PUBLIC VARP _Elu(VARP features, float alpha=1.0);
|
||||||
|
MNN_PUBLIC VARP _Threshold(VARP features, float alpha=1.0);
|
||||||
MNN_PUBLIC VARP _MatrixBandPart(VARP input, VARP num_lower, VARP num_upper);
|
MNN_PUBLIC VARP _MatrixBandPart(VARP input, VARP num_lower, VARP num_upper);
|
||||||
MNN_PUBLIC std::vector<VARP> _Moments(VARP x, INTS axis, VARP shift, bool keepDims);
|
MNN_PUBLIC std::vector<VARP> _Moments(VARP x, INTS axis, VARP shift, bool keepDims);
|
||||||
MNN_PUBLIC VARP _SetDiff1D(VARP x, VARP y);
|
MNN_PUBLIC VARP _SetDiff1D(VARP x, VARP y);
|
||||||
|
@ -123,7 +129,8 @@ MNN_PUBLIC VARP _ZeroGrad(VARP x);
|
||||||
|
|
||||||
// Int8 Inference
|
// Int8 Inference
|
||||||
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize,
|
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<float>&& scale, VARP x, INTS channel, INTS kernelSize,
|
||||||
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu);
|
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu, int nbits = 8);
|
||||||
|
MNN_PUBLIC VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim);
|
||||||
MNN_PUBLIC VARP _FloatToInt8(VARP x, VARP scale, char minValue, char maxValue);
|
MNN_PUBLIC VARP _FloatToInt8(VARP x, VARP scale, char minValue, char maxValue);
|
||||||
MNN_PUBLIC VARP _Int8ToFloat(VARP x, VARP scale);
|
MNN_PUBLIC VARP _Int8ToFloat(VARP x, VARP scale);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
//
|
||||||
|
// RuntimeScope.hpp
|
||||||
|
// MNN
|
||||||
|
//
|
||||||
|
// Created by MNN on 2020/10/26.
|
||||||
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef MNN_EXPR_SCOPE_HPP_
|
||||||
|
#define MNN_EXPR_SCOPE_HPP_
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
#include <MNN/Interpreter.hpp>
|
||||||
|
|
||||||
|
namespace MNN {
|
||||||
|
namespace Express {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class Scope {
|
||||||
|
public:
|
||||||
|
Scope();
|
||||||
|
virtual ~Scope() = default;
|
||||||
|
|
||||||
|
struct ScopedContent {
|
||||||
|
std::string scope_name;
|
||||||
|
T content;
|
||||||
|
};
|
||||||
|
void EnterScope(const ScopedContent& current);
|
||||||
|
void EnterScope(const T& current);
|
||||||
|
void EnterScope(const std::string& scope_name, const T& current);
|
||||||
|
|
||||||
|
void ExitScope();
|
||||||
|
|
||||||
|
const ScopedContent& Current() const;
|
||||||
|
|
||||||
|
int ScopedLevel() const { return scoped_level_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string MakeScopeName(const std::string& prefix, int level) const;
|
||||||
|
|
||||||
|
mutable std::mutex mutex_;
|
||||||
|
int scoped_level_ = 0;
|
||||||
|
std::vector<ScopedContent> scoped_contents_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
Scope<T>::Scope() : scoped_level_(0) {
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void Scope<T>::EnterScope(const ScopedContent& current) {
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_);
|
||||||
|
++scoped_level_;
|
||||||
|
scoped_contents_.push_back(current);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void Scope<T>::EnterScope(const T& current) {
|
||||||
|
EnterScope("scope", current);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void Scope<T>::EnterScope(const std::string& scope_name,
|
||||||
|
const T& current) {
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_);
|
||||||
|
int scoped_level = ScopedLevel();
|
||||||
|
std::string name = MakeScopeName(scope_name, scoped_level++);
|
||||||
|
ScopedContent content{name, current};
|
||||||
|
++scoped_level_;
|
||||||
|
scoped_contents_.push_back(content);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void Scope<T>::ExitScope() {
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_);
|
||||||
|
--scoped_level_;
|
||||||
|
scoped_contents_.resize(scoped_level_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
const typename Scope<T>::ScopedContent& Scope<T>::Current() const {
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_);
|
||||||
|
MNN_CHECK(scoped_contents_.size() > 0, "Scope level should not be 0.");
|
||||||
|
return scoped_contents_.back();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::string Scope<T>::MakeScopeName(const std::string& prefix,
|
||||||
|
int level) const {
|
||||||
|
char s[16];
|
||||||
|
snprintf(s, 16, "%d", level);
|
||||||
|
return prefix + "/" + std::string(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Express
|
||||||
|
} // namespace MNN
|
||||||
|
|
||||||
|
#endif // MNN_EXPR_SCOPE_HPP_
|
|
@ -1,12 +1,14 @@
|
||||||
# MNN_Windows
|
# MNN
|
||||||
# |------- MNN_Windows_lib
|
# |-- Debug
|
||||||
# |---------- Dynamic_Library
|
# | |--- MD
|
||||||
# |---------- Static_Library
|
# | |--- MT
|
||||||
# |------- MNN_Windows_tools
|
# |-- Release
|
||||||
|
# |--- MD
|
||||||
|
# |--- MT
|
||||||
|
|
||||||
$erroractionpreference = "stop"
|
$erroractionpreference = "stop"
|
||||||
|
|
||||||
Set-Variable -Name WINDOWS_PACKAGE_NAME -Value "MNN_Windows"
|
Set-Variable -Name WINDOWS_PACKAGE_NAME -Value "MNN"
|
||||||
|
|
||||||
#clear and create package directory
|
#clear and create package directory
|
||||||
powershell ./schema/generate.ps1
|
powershell ./schema/generate.ps1
|
||||||
|
@ -14,32 +16,50 @@ Set-Variable -Name WINDOWS_PACKAGE_PATH -Value "$(pwd)\$WINDOWS_PACKAGE_NAME"
|
||||||
Remove-Item $WINDOWS_PACKAGE_PATH -Recurse -ErrorAction Ignore
|
Remove-Item $WINDOWS_PACKAGE_PATH -Recurse -ErrorAction Ignore
|
||||||
mkdir $WINDOWS_PACKAGE_PATH\
|
mkdir $WINDOWS_PACKAGE_PATH\
|
||||||
cd $WINDOWS_PACKAGE_PATH
|
cd $WINDOWS_PACKAGE_PATH
|
||||||
mkdir -p MNN_Windows_lib\Dynamic_Library
|
mkdir -p Debug\MD
|
||||||
mkdir -p MNN_Windows_lib\Static_Library
|
mkdir -p Debug\MT
|
||||||
mkdir MNN_Windows_tools
|
mkdir -p Release\MD
|
||||||
|
mkdir -p Release\MT
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
Remove-Item build -Recurse -ErrorAction Ignore
|
Remove-Item build -Recurse -ErrorAction Ignore
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
pushd build
|
||||||
# tools without dependency, static library without sep_build
|
# tools without dependency, static library without sep_build
|
||||||
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_BUILD_CONVERTER=ON -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_TRAIN=ON -DMNN_BUILD_DEMO=ON -DMNN_BUILD_QUANTOOLS=ON -DMNN_EVALUATION=ON ..
|
#cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_BUILD_CONVERTER=ON -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_TRAIN=ON -DMNN_BUILD_DEMO=ON -DMNN_BUILD_QUANTOOLS=ON -DMNN_EVALUATION=ON ..
|
||||||
|
#ninja
|
||||||
|
#pushd $WINDOWS_PACKAGE_PATH
|
||||||
|
#cp ..\build\*.exe MNN_Windows_tools
|
||||||
|
#cp ..\build\*.pdb MNN_Windows_tools
|
||||||
|
#cp ..\build\MNN.lib MNN_Windows_lib\Static_Library
|
||||||
|
#popd
|
||||||
|
|
||||||
|
Remove-Item CMakeCache.txt -ErrorAction Ignore
|
||||||
|
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Debug -DMNN_WIN_RUNTIME_MT=ON -DMNN_OPENCL=ON ..
|
||||||
ninja
|
ninja
|
||||||
pushd $WINDOWS_PACKAGE_PATH
|
cp MNN.lib $WINDOWS_PACKAGE_PATH\Debug\MT
|
||||||
cp ..\build\*.exe MNN_Windows_tools
|
cp MNN.dll $WINDOWS_PACKAGE_PATH\Debug\MT
|
||||||
cp ..\build\*.pdb MNN_Windows_tools
|
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Debug\MT
|
||||||
cp ..\build\MNN.lib MNN_Windows_lib\Static_Library
|
|
||||||
|
Remove-Item CMakeCache.txt -ErrorAction Ignore
|
||||||
|
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Debug -DMNN_WIN_RUNTIME_MT=OFF -DMNN_OPENCL=ON ..
|
||||||
|
ninja
|
||||||
|
cp MNN.lib $WINDOWS_PACKAGE_PATH\Debug\MD
|
||||||
|
cp MNN.dll $WINDOWS_PACKAGE_PATH\Debug\MD
|
||||||
|
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Debug\MD
|
||||||
|
|
||||||
|
Remove-Item CMakeCache.txt -ErrorAction Ignore
|
||||||
|
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Release -DMNN_WIN_RUNTIME_MT=ON -DMNN_OPENCL=ON ..
|
||||||
|
ninja
|
||||||
|
cp MNN.lib $WINDOWS_PACKAGE_PATH\Release\MT
|
||||||
|
cp MNN.dll $WINDOWS_PACKAGE_PATH\Release\MT
|
||||||
|
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Release\MT
|
||||||
|
|
||||||
|
Remove-Item CMakeCache.txt -ErrorAction Ignore
|
||||||
|
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF -DCMAKE_BUILD_TYPE=Release -DMNN_WIN_RUNTIME_MT=OFF -DMNN_OPENCL=ON ..
|
||||||
|
ninja
|
||||||
|
cp MNN.lib $WINDOWS_PACKAGE_PATH\Release\MD
|
||||||
|
cp MNN.dll $WINDOWS_PACKAGE_PATH\Release\MD
|
||||||
|
cp MNN.pdb $WINDOWS_PACKAGE_PATH\Release\MD
|
||||||
|
|
||||||
popd
|
popd
|
||||||
|
|
||||||
#dynamic library without sep_build
|
|
||||||
rm .\CMakeCache.txt
|
|
||||||
cmake -G "Ninja" -DMNN_SEP_BUILD=OFF ..
|
|
||||||
ninja
|
|
||||||
cd $WINDOWS_PACKAGE_PATH
|
|
||||||
cp ..\build\MNN.lib MNN_Windows_lib\Dynamic_Library
|
|
||||||
cp ..\build\MNN.dll MNN_Windows_lib\Dynamic_Library
|
|
||||||
cp ..\build\MNN.pdb MNN_Windows_lib\Dynamic_Library
|
|
||||||
|
|
||||||
# Compress MNN_Windows_lib and MNN_Windows_tools
|
|
||||||
Compress-Archive -Path MNN_Windows_lib -DestinationPath MNN_Windows_lib.zip -Update -CompressionLevel Optimal
|
|
||||||
Compress-Archive -Path MNN_Windows_tools -DestinationPath MNN_Windows_tools.zip -Update -CompressionLevel Optimal
|
|
|
@ -8,15 +8,14 @@ set_target_properties(
|
||||||
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN.so
|
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN.so
|
||||||
)
|
)
|
||||||
|
|
||||||
add_library( MNN_Arm82 SHARED IMPORTED GLOBAL)
|
|
||||||
set_target_properties(
|
|
||||||
MNN_Arm82
|
|
||||||
PROPERTIES IMPORTED_LOCATION
|
|
||||||
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_Arm82.so
|
|
||||||
)
|
|
||||||
|
|
||||||
add_library( MNN_CL SHARED IMPORTED GLOBAL )
|
add_library( MNN_CL SHARED IMPORTED GLOBAL )
|
||||||
set_target_properties( MNN_CL
|
set_target_properties( MNN_CL
|
||||||
PROPERTIES IMPORTED_LOCATION
|
PROPERTIES IMPORTED_LOCATION
|
||||||
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_CL.so
|
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_CL.so
|
||||||
)
|
)
|
||||||
|
|
||||||
|
add_library( MNN_Express SHARED IMPORTED GLOBAL )
|
||||||
|
set_target_properties( MNN_Express
|
||||||
|
PROPERTIES IMPORTED_LOCATION
|
||||||
|
${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNN_Express.so
|
||||||
|
)
|
||||||
|
|
|
@ -5,7 +5,6 @@ adb push ./libMNN_CL.so /data/local/tmp/MNN/libMNN_CL.so
|
||||||
adb push ./libMNN_Vulkan.so /data/local/tmp/MNN/libMNN_Vulkan.so
|
adb push ./libMNN_Vulkan.so /data/local/tmp/MNN/libMNN_Vulkan.so
|
||||||
adb push ./libMNN_GL.so /data/local/tmp/MNN/libMNN_GL.so
|
adb push ./libMNN_GL.so /data/local/tmp/MNN/libMNN_GL.so
|
||||||
adb push ./libMNN_Express.so /data/local/tmp/MNN/libMNN_Express.so
|
adb push ./libMNN_Express.so /data/local/tmp/MNN/libMNN_Express.so
|
||||||
adb push ./libMNN_Arm82.so /data/local/tmp/MNN/libMNN_Arm82.so
|
|
||||||
adb push ./MNNV2Basic.out /data/local/tmp/MNN/MNNV2Basic.out
|
adb push ./MNNV2Basic.out /data/local/tmp/MNN/MNNV2Basic.out
|
||||||
adb shell "cd /data/local/tmp/MNN && rm -r output"
|
adb shell "cd /data/local/tmp/MNN && rm -r output"
|
||||||
adb shell "cd /data/local/tmp/MNN && mkdir output"
|
adb shell "cd /data/local/tmp/MNN && mkdir output"
|
||||||
|
@ -18,3 +17,4 @@ adb push ./timeProfile.out /data/local/tmp/MNN/timeProfile.out
|
||||||
adb push ./train.out /data/local/tmp/MNN/train.out
|
adb push ./train.out /data/local/tmp/MNN/train.out
|
||||||
adb push ./benchmark.out /data/local/tmp/MNN/benchmark.out
|
adb push ./benchmark.out /data/local/tmp/MNN/benchmark.out
|
||||||
adb push ./benchmarkExprModels.out /data/local/tmp/MNN/benchmarkExprModels.out
|
adb push ./benchmarkExprModels.out /data/local/tmp/MNN/benchmarkExprModels.out
|
||||||
|
adb push ./run_test.out /data/local/tmp/MNN/run_test.out
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -4,6 +4,8 @@
|
||||||
<dict>
|
<dict>
|
||||||
<key>CFBundleDevelopmentRegion</key>
|
<key>CFBundleDevelopmentRegion</key>
|
||||||
<string>$(DEVELOPMENT_LANGUAGE)</string>
|
<string>$(DEVELOPMENT_LANGUAGE)</string>
|
||||||
|
<key>CFBundleIdentifier</key>
|
||||||
|
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
|
||||||
<key>CFBundleInfoDictionaryVersion</key>
|
<key>CFBundleInfoDictionaryVersion</key>
|
||||||
<string>6.0</string>
|
<string>6.0</string>
|
||||||
<key>CFBundleName</key>
|
<key>CFBundleName</key>
|
||||||
|
|
|
@ -1,57 +0,0 @@
|
||||||
#!bin/sh
|
|
||||||
|
|
||||||
echo "Register Op Begin"
|
|
||||||
|
|
||||||
function read_dir(){
|
|
||||||
str1=`grep -e $2 $1/*.$4|sed s/[[:space:]]//g`
|
|
||||||
array=(${str1//\;/ })
|
|
||||||
for var in ${array[@]}; do
|
|
||||||
`echo $var|awk -F $3 '{
|
|
||||||
a="___";
|
|
||||||
b="__();";
|
|
||||||
c="extern void ";
|
|
||||||
print(c""a""$3"__"$4""b) >> "extern";
|
|
||||||
print (a""$3"__"$4""b) >> "call"
|
|
||||||
}'`
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
start=$(date +%s)
|
|
||||||
|
|
||||||
SEP='[:(,)]'
|
|
||||||
FILE_EXTERN_CPP='cpp'
|
|
||||||
FILE_EXTERN_MM='mm'
|
|
||||||
|
|
||||||
SHELL_FOLDER=$(dirname $0)'/../../..'
|
|
||||||
# handle CPU
|
|
||||||
CPUFILE=$SHELL_FOLDER/source/backend/cpu/CPUOPRegister.cpp
|
|
||||||
echo "// This file is generated by Shell for ops register\nnamespace MNN {\n#ifdef MNN_CODEGEN_REGISTER" > $CPUFILE
|
|
||||||
echo "Start Register CPU"
|
|
||||||
CPU=$SHELL_FOLDER/source/backend/cpu
|
|
||||||
CPU_KEY='REGISTER_CPU_OP_CREATOR'
|
|
||||||
read_dir $CPU $CPU_KEY $SEP $FILE_EXTERN_CPP
|
|
||||||
cat extern >> $CPUFILE
|
|
||||||
rm extern
|
|
||||||
echo '\nvoid registerCPUOps() {' >> $CPUFILE
|
|
||||||
cat call >> $CPUFILE
|
|
||||||
echo '}\n#endif\n}' >> $CPUFILE
|
|
||||||
rm call
|
|
||||||
|
|
||||||
# handle Shape
|
|
||||||
echo "Start Register Shape"
|
|
||||||
SHAPEFILE=$SHELL_FOLDER/source/shape/ShapeRegister.cpp
|
|
||||||
SHAPE=$SHELL_FOLDER/source/shape
|
|
||||||
SHAPE_KEY="REGISTER_SHAPE"
|
|
||||||
echo "// This file is generated by Shell for ops register\nnamespace MNN {\n#ifdef MNN_CODEGEN_REGISTER" > $SHAPEFILE
|
|
||||||
read_dir $SHAPE $SHAPE_KEY $SEP $FILE_EXTERN_CPP
|
|
||||||
cat extern >> $SHAPEFILE
|
|
||||||
rm extern
|
|
||||||
echo '\nvoid registerShapeOps() {' >> $SHAPEFILE
|
|
||||||
cat call >> $SHAPEFILE
|
|
||||||
echo '}\n#endif\n}' >> $SHAPEFILE
|
|
||||||
rm call
|
|
||||||
|
|
||||||
echo "Register Op End"
|
|
||||||
|
|
||||||
dur=$(echo "$(date +%s) - $start" | bc)
|
|
||||||
printf "Execution time: %.6f seconds" $dur
|
|
|
@ -8,10 +8,14 @@
|
||||||
|
|
||||||
#import "AppDelegate.h"
|
#import "AppDelegate.h"
|
||||||
#import "MNNTestSuite.h"
|
#import "MNNTestSuite.h"
|
||||||
|
#import <MNN/expr/Executor.hpp>
|
||||||
|
|
||||||
@implementation AppDelegate
|
@implementation AppDelegate
|
||||||
|
|
||||||
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
|
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
|
||||||
|
MNN::BackendConfig config;
|
||||||
|
// If want to test metal, change MNN_FORWARD_CPU to MNN_FORWARD_METAL
|
||||||
|
MNN::Express::Executor::getGlobalExecutor()->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 1);
|
||||||
MNNTestSuite::runAll();
|
MNNTestSuite::runAll();
|
||||||
return YES;
|
return YES;
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,9 @@ import cv2
|
||||||
def inference():
|
def inference():
|
||||||
""" inference mobilenet_v1 using a specific picture """
|
""" inference mobilenet_v1 using a specific picture """
|
||||||
interpreter = MNN.Interpreter("mobilenet_v1.mnn")
|
interpreter = MNN.Interpreter("mobilenet_v1.mnn")
|
||||||
|
interpreter.setCacheFile('.tempcache')
|
||||||
|
config = {}
|
||||||
|
config['precision'] = 'low'
|
||||||
session = interpreter.createSession()
|
session = interpreter.createSession()
|
||||||
input_tensor = interpreter.getSessionInput(session)
|
input_tensor = interpreter.getSessionInput(session)
|
||||||
image = cv2.imread('ILSVRC2012_val_00049999.JPEG')
|
image = cv2.imread('ILSVRC2012_val_00049999.JPEG')
|
||||||
|
|
|
@ -96,8 +96,7 @@ def demo():
|
||||||
train_dataloader = MNN.data.DataLoader(train_dataset, batch_size = 64, shuffle = True)
|
train_dataloader = MNN.data.DataLoader(train_dataset, batch_size = 64, shuffle = True)
|
||||||
test_dataloader = MNN.data.DataLoader(test_dataset, batch_size = 100, shuffle = False)
|
test_dataloader = MNN.data.DataLoader(test_dataset, batch_size = 100, shuffle = False)
|
||||||
|
|
||||||
opt = MNN.optim.SGD(0.01, 0.9, 0.0005)
|
opt = MNN.optim.SGD(model, 0.01, 0.9, 0.0005)
|
||||||
opt.append(model.parameters)
|
|
||||||
|
|
||||||
F.set_thread_number(4)
|
F.set_thread_number(4)
|
||||||
|
|
||||||
|
|
|
@ -125,8 +125,7 @@ def demo():
|
||||||
|
|
||||||
net = Net(feature_extractor, num_classes)
|
net = Net(feature_extractor, num_classes)
|
||||||
|
|
||||||
opt = MNN.optim.SGD(1e-3, 0.9, 0.00004)
|
opt = MNN.optim.SGD(net, 1e-3, 0.9, 0.00004)
|
||||||
opt.append(net.parameters)
|
|
||||||
|
|
||||||
for epoch in range(10):
|
for epoch in range(10):
|
||||||
train_func(net, train_dataloader, opt, num_classes)
|
train_func(net, train_dataloader, opt, num_classes)
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
import numpy as np
|
||||||
|
import MNN
|
||||||
|
nn = MNN.nn
|
||||||
|
F = MNN.expr
|
||||||
|
|
||||||
|
v0 = F.const([0.3,0.1, -0.3,0.4], [4])
|
||||||
|
v2 = F.const([0.3,0.1, -0.3,0.4], [4])
|
||||||
|
v1 = v0 * v0
|
||||||
|
|
||||||
|
outputDiff = F.const([0.05, 0.03, 0.02, 0.01], [4])
|
||||||
|
|
||||||
|
v0Grad = nn.grad(v1, [v0, v2], [outputDiff], "")
|
||||||
|
print(v0Grad)
|
||||||
|
print(v0Grad[0].read())
|
||||||
|
F.save(v0Grad, "temp.grad")
|
|
@ -0,0 +1,36 @@
|
||||||
|
import numpy as np
|
||||||
|
import MNN
|
||||||
|
nn = MNN.nn
|
||||||
|
F = MNN.expr
|
||||||
|
|
||||||
|
class Net(nn.Module):
|
||||||
|
"""construct a lenet 5 model"""
|
||||||
|
def __init__(self):
|
||||||
|
super(Net, self).__init__()
|
||||||
|
self.conv1 = nn.conv(1, 20, [5, 5])
|
||||||
|
self.conv2 = nn.conv(20, 50, [5, 5])
|
||||||
|
self.fc1 = nn.linear(800, 500)
|
||||||
|
self.fc2 = nn.linear(500, 10)
|
||||||
|
self.step = F.const([10], [], F.NCHW, F.int)
|
||||||
|
self.lr = F.const([0.0004],[], F.NCHW, F.float)
|
||||||
|
def forward(self, x):
|
||||||
|
x = F.relu(self.conv1(x))
|
||||||
|
x = F.max_pool(x, [2, 2], [2, 2])
|
||||||
|
x = F.relu(self.conv2(x))
|
||||||
|
x = F.max_pool(x, [2, 2], [2, 2])
|
||||||
|
x = F.reshape(x, [0, -1])
|
||||||
|
x = F.relu(self.fc1(x))
|
||||||
|
x = self.fc2(x)
|
||||||
|
x = F.softmax(x, 1)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
model = Net()
|
||||||
|
F.save(model.parameters, 'mnist.snapshot')
|
||||||
|
|
||||||
|
|
||||||
|
model2 = Net()
|
||||||
|
model2.load_parameters(F.load_as_list('mnist.snapshot'))
|
||||||
|
|
||||||
|
print(model2.lr.read())
|
||||||
|
print(model2.step.read())
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue